# -*- coding: utf-8 -*-
"""
Test cases related to SAX I/O
"""
from __future__ import absolute_import
import unittest
from xml.dom import pulldom
from xml.sax.handler import ContentHandler
from .common_imports import HelperTestCase, make_doctest, BytesIO, _bytes
from lxml import sax
class ETreeSaxTestCase(HelperTestCase):
def test_etree_sax_simple(self):
tree = self.parse('abba')
xml_out = self._saxify_serialize(tree)
self.assertEqual(_bytes('abba'),
xml_out)
def test_etree_sax_double(self):
tree = self.parse('abbbba')
xml_out = self._saxify_serialize(tree)
self.assertEqual(_bytes('abbbba'),
xml_out)
def test_etree_sax_comment(self):
tree = self.parse('abba')
xml_out = self._saxify_serialize(tree)
self.assertEqual(_bytes('abba'),
xml_out)
def test_etree_sax_pi(self):
tree = self.parse('abba')
xml_out = self._saxify_serialize(tree)
self.assertEqual(_bytes('abba'),
xml_out)
def test_etree_sax_comment_root(self):
tree = self.parse('ab')
xml_out = self._saxify_serialize(tree)
self.assertEqual(_bytes('ab'),
xml_out)
def test_etree_sax_pi_root(self):
tree = self.parse('ab')
xml_out = self._saxify_serialize(tree)
self.assertEqual(_bytes('ab'),
xml_out)
def test_etree_sax_attributes(self):
tree = self.parse('abba')
xml_out = self._saxify_serialize(tree)
self.assertEqual(_bytes('abba'),
xml_out)
def test_etree_sax_ns1(self):
tree = self.parse('abbbba')
new_tree = self._saxify_unsaxify(tree)
root = new_tree.getroot()
self.assertEqual('{bla}a',
root.tag)
self.assertEqual('{bla}b',
root[0].tag)
def test_etree_sax_ns2(self):
tree = self.parse('abbbba')
new_tree = self._saxify_unsaxify(tree)
root = new_tree.getroot()
self.assertEqual('{blaA}a',
root.tag)
self.assertEqual('{blaB}b',
root[0].tag)
def test_sax_to_pulldom(self):
tree = self.parse('abbbba')
handler = pulldom.SAX2DOM()
sax.saxify(tree, handler)
dom = handler.document
self.assertEqual('a',
dom.firstChild.localName)
self.assertEqual('blaA',
dom.firstChild.namespaceURI)
self.assertEqual(None,
dom.firstChild.prefix)
children = dom.firstChild.childNodes
self.assertEqual('ab',
children[0].nodeValue)
self.assertEqual('blaB',
children[1].namespaceURI)
self.assertEqual('ba',
children[2].nodeValue)
def test_sax_to_pulldom_multiple_namespaces(self):
tree = self.parse('')
handler = pulldom.SAX2DOM()
sax.saxify(tree, handler)
dom = handler.document
# With multiple prefix definitions, the node should keep the one
# that was actually used, even if the others also are valid.
self.assertEqual('a',
dom.firstChild.localName)
self.assertEqual('blaA',
dom.firstChild.namespaceURI)
self.assertEqual(None,
dom.firstChild.prefix)
tree = self.parse('')
handler = pulldom.SAX2DOM()
sax.saxify(tree, handler)
dom = handler.document
self.assertEqual('a',
dom.firstChild.localName)
self.assertEqual('blaA',
dom.firstChild.namespaceURI)
self.assertEqual('a',
dom.firstChild.prefix)
def test_element_sax(self):
tree = self.parse('')
a = tree.getroot()
b = a[0]
xml_out = self._saxify_serialize(a)
self.assertEqual(_bytes(''),
xml_out)
xml_out = self._saxify_serialize(b)
self.assertEqual(_bytes(''),
xml_out)
def test_element_sax_ns(self):
tree = self.parse('')
a = tree.getroot()
b = a[0]
new_tree = self._saxify_unsaxify(a)
root = new_tree.getroot()
self.assertEqual('{blaA}a',
root.tag)
self.assertEqual('b',
root[0].tag)
new_tree = self._saxify_unsaxify(b)
root = new_tree.getroot()
self.assertEqual('b',
root.tag)
self.assertEqual(0,
len(root))
def test_etree_sax_handler_default_ns(self):
handler = sax.ElementTreeContentHandler()
handler.startDocument()
handler.startPrefixMapping(None, 'blaA')
handler.startElementNS(('blaA', 'a'), 'a', {})
handler.startPrefixMapping(None, 'blaB')
handler.startElementNS(('blaB', 'b'), 'b', {})
handler.endElementNS( ('blaB', 'b'), 'b')
handler.endPrefixMapping(None)
handler.startElementNS(('blaA', 'c'), 'c', {})
handler.endElementNS( ('blaA', 'c'), 'c')
handler.endElementNS( ('blaA', 'a'), 'a')
handler.endPrefixMapping(None)
handler.endDocument()
new_tree = handler.etree
root = new_tree.getroot()
self.assertEqual('{blaA}a',
root.tag)
self.assertEqual('{blaB}b',
root[0].tag)
self.assertEqual('{blaA}c',
root[1].tag)
def test_etree_sax_handler_default_ns_None(self):
handler = sax.ElementTreeContentHandler()
handler.startDocument()
handler.startPrefixMapping(None, 'blaA')
handler.startElementNS((None, 'a'), 'a', {})
handler.startPrefixMapping(None, 'blaB')
handler.startElementNS((None, 'b'), 'b', {})
handler.endElementNS( (None, 'b'), 'b')
handler.endPrefixMapping(None)
handler.startElementNS((None, 'c'), 'c', {})
handler.endElementNS( (None, 'c'), 'c')
handler.endElementNS( (None, 'a'), 'a')
handler.endPrefixMapping(None)
handler.endDocument()
new_tree = handler.etree
root = new_tree.getroot()
self.assertEqual('{blaA}a',
root.tag)
self.assertEqual('{blaB}b',
root[0].tag)
self.assertEqual('{blaA}c',
root[1].tag)
def test_etree_sax_redefine_ns(self):
handler = sax.ElementTreeContentHandler()
handler.startDocument()
handler.startPrefixMapping('ns', 'blaA')
handler.startElementNS(('blaA', 'a'), 'ns:a', {})
handler.startPrefixMapping('ns', 'blaB')
handler.startElementNS(('blaB', 'b'), 'ns:b', {})
handler.endElementNS( ('blaB', 'b'), 'ns:b')
handler.endPrefixMapping('ns')
handler.startElementNS(('blaA', 'c'), 'ns:c', {})
handler.endElementNS( ('blaA', 'c'), 'ns:c')
handler.endElementNS( ('blaA', 'a'), 'ns:a')
handler.endPrefixMapping('ns')
handler.endDocument()
new_tree = handler.etree
root = new_tree.getroot()
self.assertEqual('{blaA}a',
root.tag)
self.assertEqual('{blaB}b',
root[0].tag)
self.assertEqual('{blaA}c',
root[1].tag)
def test_etree_sax_no_ns(self):
handler = sax.ElementTreeContentHandler()
handler.startDocument()
handler.startElement('a', {})
handler.startElement('b', {})
handler.endElement('b')
handler.startElement('c') # with empty attributes
handler.endElement('c')
handler.endElement('a')
handler.endDocument()
new_tree = handler.etree
root = new_tree.getroot()
self.assertEqual('a', root.tag)
self.assertEqual('b', root[0].tag)
self.assertEqual('c', root[1].tag)
def test_etree_sax_no_ns_attributes(self):
handler = sax.ElementTreeContentHandler()
handler.startDocument()
handler.startElement('a', {"attr_a1": "a1"})
handler.startElement('b', {"attr_b1": "b1"})
handler.endElement('b')
handler.endElement('a')
handler.endDocument()
new_tree = handler.etree
root = new_tree.getroot()
self.assertEqual('a', root.tag)
self.assertEqual('b', root[0].tag)
self.assertEqual('a1', root.attrib["attr_a1"])
self.assertEqual('b1', root[0].attrib["attr_b1"])
def test_etree_sax_ns_attributes(self):
handler = sax.ElementTreeContentHandler()
handler.startDocument()
self.assertRaises(ValueError,
handler.startElement,
'a', {"blaA:attr_a1": "a1"}
)
def test_etree_sax_error(self):
handler = sax.ElementTreeContentHandler()
handler.startDocument()
handler.startElement('a')
self.assertRaises(sax.SaxError, handler.endElement, 'b')
def test_etree_sax_error2(self):
handler = sax.ElementTreeContentHandler()
handler.startDocument()
handler.startElement('a')
handler.startElement('b')
self.assertRaises(sax.SaxError, handler.endElement, 'a')
def _saxify_unsaxify(self, saxifiable):
handler = sax.ElementTreeContentHandler()
sax.ElementTreeProducer(saxifiable, handler).saxify()
return handler.etree
def _saxify_serialize(self, tree):
new_tree = self._saxify_unsaxify(tree)
f = BytesIO()
new_tree.write(f)
return f.getvalue().replace(_bytes('\n'), _bytes(''))
class SimpleContentHandler(ContentHandler, object):
"""A SAX content handler that just stores the events"""
def __init__(self):
self.sax_events = []
super(SimpleContentHandler, self).__init__()
def startDocument(self):
self.sax_events.append(('startDocument',))
def endDocument(self):
self.sax_events.append(('endDocument',))
def startPrefixMapping(self, prefix, uri):
self.sax_events.append(('startPrefixMapping', prefix, uri))
def endPrefixMapping(self, prefix):
self.sax_events.append(('endPrefixMapping', prefix))
def startElement(self, name, attrs):
self.sax_events.append(('startElement', name, dict(attrs)))
def endElement(self, name):
self.sax_events.append(('endElement', name))
def startElementNS(self, name, qname, attrs):
self.sax_events.append(('startElementNS', name, qname, attrs._qnames))
def endElementNS(self, name, qname):
self.sax_events.append(('endElementNS', name, qname))
def characters(self, content):
self.sax_events.append(('characters', content))
def ignorableWhitespace(self, whitespace):
self.sax_events.append(('ignorableWhitespace', whitespace))
def processingInstruction(self, target, data):
self.sax_events.append(('processingInstruction', target, data))
def skippedEntity(self, name):
self.sax_events.append(('skippedEntity', name))
class NSPrefixSaxTestCase(HelperTestCase):
"""Testing that namespaces generate the right SAX events"""
def _saxify(self, tree):
handler = SimpleContentHandler()
sax.ElementTreeProducer(tree, handler).saxify()
return handler.sax_events
def test_element_sax_ns_prefix(self):
# The name of the prefix should be preserved, if the uri is unique
tree = self.parse(''
'')
a = tree.getroot()
self.assertEqual(
[('startElementNS', ('blaA', 'a'), 'a:a', {}),
('startElementNS', (None, 'd'), 'd',
{('blaA', 'attr'): 'a:attr', ('blaC', 'attr'): 'c:attr'}),
('endElementNS', (None, 'd'), 'd'),
('endElementNS', ('blaA', 'a'), 'a:a'),
],
self._saxify(a)[3:7])
def test_element_sax_default_ns_prefix(self):
# Default prefixes should also not get a generated prefix
tree = self.parse('')
a = tree.getroot()
self.assertEqual(
[('startDocument',),
# NS prefix should be None:
('startPrefixMapping', None, 'blaA'),
('startElementNS', ('blaA', 'a'), 'a', {}),
# Attribute prefix should be None:
('startElementNS', ('blaA', 'b'), 'b', {(None, 'attr'): 'attr'}),
('endElementNS', ('blaA', 'b'), 'b'),
('endElementNS', ('blaA', 'a'), 'a'),
# Prefix should be None again:
('endPrefixMapping', None),
('endDocument',)],
self._saxify(a))
# Except for attributes, if there is both a default namespace
# and a named namespace with the same uri
tree = self.parse(''
'')
a = tree.getroot()
self.assertEqual(
('startElementNS', ('bla', 'b'), 'b', {('bla', 'attr'): 'a:attr'}),
self._saxify(a)[4])
def test_element_sax_twin_ns_prefix(self):
# Make an element with an doubly registered uri
tree = self.parse(''
'')
a = tree.getroot()
self.assertEqual(
# It should get the b prefix in this case
('startElementNS', (None, 'd'), 'd', {('bla', 'attr'): 'b:attr'}),
self._saxify(a)[4])
def test_suite():
suite = unittest.TestSuite()
suite.addTests([unittest.makeSuite(ETreeSaxTestCase)])
suite.addTests([unittest.makeSuite(NSPrefixSaxTestCase)])
suite.addTests(
[make_doctest('../../../doc/sax.txt')])
return suite
if __name__ == '__main__':
print('to test use test.py %s' % __file__)