import py
here = py.path.local(__file__).dirpath()
py.std.sys.path.append(here.dirpath().strpath)
import nanosax
class TestNanoSax(object):
def test_parse(self):
class tp(nanosax.nsparser):
def __init__(self, handler, chunks):
self.chunks = chunks
super(tp, self).__init__(handler)
def _parse_into_chunks(self, xml):
return self.chunks
def _handle_pis(self, xml):
return xml
handler = nanosax.echohandler()
parser = tp(handler, [
(tp.TYPE_START, 1, 'foo bar="baz"'),
(tp.TYPE_COMMENT, 1, 'here is the content of tag foo'),
(tp.TYPE_TEXT, 1, 'content of tag foo'),
(tp.TYPE_END, 1, 'foo'),
])
parser.parse('')
assert handler.xml == (''
''
'content of tag foo'
'')
handler = nanosax.echohandler()
parser = tp(handler, [
(tp.TYPE_START, 1, 'document'),
(tp.TYPE_TEXT, 1, '\n '),
(tp.TYPE_START, 2, 'node attr="value"'),
(tp.TYPE_TEXT, 2, '\n '),
(tp.TYPE_CDATA, 3, '\n some CDATA\n '),
(tp.TYPE_TEXT, 5, '\n '),
(tp.TYPE_END, 6, 'node'),
(tp.TYPE_TEXT, 7, '\n'),
(tp.TYPE_END, 8, 'document'),
])
parser.parse('')
assert handler.xml == ('\n'
' \n'
' \n'
' \n'
'')
def test_parse_into_chunks(self):
handler = nanosax.nshandler()
parser = nanosax.nsparser(handler)
assert list(parser._parse_into_chunks(
''
''
'content of tag foo'
''
)) == [
(parser.TYPE_START, 1, 'foo bar="baz"'),
(parser.TYPE_COMMENT, 1, 'here is the content of tag foo'),
(parser.TYPE_TEXT, 1, 'content of tag foo'),
(parser.TYPE_END, 1, 'foo'),
]
handler = nanosax.nshandler()
parser = nanosax.nsparser(handler)
chunks = list(parser._parse_into_chunks(
'\n'
' \n'
' \n'
' \n'
''
))
expected = [
(parser.TYPE_START, 1, 'document'),
(parser.TYPE_TEXT, 1, '\n '),
(parser.TYPE_START, 2, 'node attr="value"'),
(parser.TYPE_TEXT, 2, '\n '),
(parser.TYPE_CDATA, 3, '\n some CDATA\n '),
(parser.TYPE_TEXT, 5, '\n '),
(parser.TYPE_END, 6, 'node'),
(parser.TYPE_TEXT, 6, '\n'),
(parser.TYPE_END, 7, 'document'),
]
assert chunks == expected
handler = nanosax.echohandler()
parser = nanosax.nsparser(handler)
py.test.raises(nanosax.XMLError,
'list(parser._parse_into_chunks(""))')
py.test.raises(nanosax.XMLError,
r'list(parser._parse_into_chunks(""))')
py.test.raises(nanosax.XMLError,
r'list(parser._parse_into_chunks("bar"))')
py.test.raises(nanosax.XMLError,
r'list(parser._parse_into_chunks(""))')
def test_handle_pis(self):
handler = nanosax.nshandler()
parser = nanosax.nsparser(handler)
xml = (''
']>'
''
'')
result = parser._handle_pis(xml)
assert result == (
''
''
)
def test_parse_start(self):
handler = nanosax.nshandler()
parser = nanosax.nsparser(handler)
assert parser._parse_start(1, 'foo bar="baz"') == \
('foo', {'bar': 'baz'})
assert parser._parse_start(1, 'foo bar="baz" xmlns="foo:"') == \
('foo', {'bar': 'baz', 'xmlns': 'foo:'})
assert parser._parse_start(1, 'foo:bar bar:baz="qux"') == \
('foo:bar', {'bar:baz': 'qux'})
assert parser._parse_start(1, 'foo\n\t\tbar="baz\t\n"\t\t')
py.test.raises(nanosax.XMLError,
"parser._parse_start(1, 'foo$bar')")
py.test.raises(nanosax.XMLError,
"parser._parse_start(1, 'foo bar#baz=\"qux\"')")
py.test.raises(nanosax.XMLError,
"parser._parse_start(1, 'foo baz=\"qux\"\"')")
py.test.raises(nanosax.XMLError,
"parser._parse_start(1, 'foo bar')")
def test_regs(self):
p = nanosax.nsparser
assert p._reg_name.match('foo')
assert p._reg_name.match('foo:bar')
assert p._reg_name.match('foo-bar:baz-qux')
assert p._reg_name.match('foo123bar:baz123qux')
assert p._reg_name.match('foo_bar')
assert not p._reg_name.match('foo bar')
assert not p._reg_name.match('foo$bar')
assert p._reg_start.match('foo bar="baz"')
assert p._reg_start.match('foo:bar xmlns="bar:" xmlns:foo="foo:"')
assert p._reg_start.match('foo\n\t\tbar="baz\n\t\t\t\tqux"')
assert not p._reg_start.match('foo bar')
assert not p._reg_start.match('foo bar=""baz"')
assert not p._reg_start.match('foo "')
assert p._reg_xml_decl.match(
'')
assert p._reg_xml_decl.match('')
assert not p._reg_xml_decl.match('')
assert not p._reg_xml_decl.match('')
assert p._reg_encoding.match('encoding="UTF-8"')
assert p._reg_encoding.match('encoding="latin-1"').group(1) == \
'latin-1'
assert not p._reg_encoding.match('encoding=""')
assert not p._reg_encoding.match('hyperencoding="UTF-8"')
assert p._reg_pi.match('')
assert p._reg_pi.match('')
assert not p._reg_pi.match('')
assert p._reg_dtd_1.match('')
dtd = (''
']>')
assert p._reg_dtd_1.match(dtd).group(0) == dtd
assert not p._reg_dtd_1.match('')
assert not p._reg_dtd_1.match('')
assert p._reg_dtd_2.match('')
assert p._reg_dtd_2.match("")
assert p._reg_dtd_2.match("")
class TestNanoSaxFunctional(object):
def test_working(self):
for file in here.join('data/nsxml').listdir('working_*.xml'):
xml = file.read()
handler = nanosax.echohandler()
parser = nanosax.nsparser(handler)
parser.parse(xml)
xmlin = self.normalize(xml)
xmlout = self.normalize(handler.xml)
assert xmlin == xmlout, file.strpath
def test_failing(self):
for file in here.join('data/nsxml').listdir('failing_*.xml'):
xml = file.read()
handler = nanosax.nshandler()
parser = nanosax.nsparser(handler)
py.test.raises(nanosax.XMLError, 'parser.parse(xml)')
def normalize(self, xml):
from xml.dom import minidom
xml = self.remove_cruft(xml)
return minidom.parseString(xml).toxml()
def remove_cruft(self, xml):
p = nanosax.nsparser
for reg in [p._reg_xml_decl, p._reg_pi, p._reg_dtd_1, p._reg_dtd_2]:
while 1:
match = reg.search(xml)
if not match:
break
xml = xml.replace(match.group(0), '')
return xml