diff options
| author | Stefan Behnel <stefan_ml@behnel.de> | 2019-02-05 21:31:02 +0100 |
|---|---|---|
| committer | Stefan Behnel <stefan_ml@behnel.de> | 2019-02-05 21:31:02 +0100 |
| commit | 201b712edf0478e6a94ace984c1e8435bf3bc3c3 (patch) | |
| tree | 1ebf9a07fd10d3f17aa423d11294bdb58b7627f0 /src | |
| parent | 7303cadd01b81fceb40f74148a5b9b6178936768 (diff) | |
| download | python-lxml-201b712edf0478e6a94ace984c1e8435bf3bc3c3.tar.gz | |
LP#1814522: Fix a crash when appending a child subtree that contains unsubstituted entity references.
This is a work-around for a (supposed) bug in libxml2 (https://gitlab.gnome.org/GNOME/libxml2/issues/42), which crashes by running into an infinite recursive loop while traversing the child nodes of the entity reference.
A lucky side effect is that the previously duplicated cleanup traversal to a) update the .doc pointers in libxml2 and b) update the dict names in lxml is now replaced by a single traversal, which should speed things up for large subtrees.
Diffstat (limited to 'src')
| -rw-r--r-- | src/lxml/apihelpers.pxi | 21 | ||||
| -rw-r--r-- | src/lxml/includes/tree.pxd | 2 | ||||
| -rw-r--r-- | src/lxml/proxy.pxi | 14 | ||||
| -rw-r--r-- | src/lxml/tests/test_etree.py | 18 |
4 files changed, 53 insertions, 2 deletions
diff --git a/src/lxml/apihelpers.pxi b/src/lxml/apihelpers.pxi index 5366fcaf..bccf5fbb 100644 --- a/src/lxml/apihelpers.pxi +++ b/src/lxml/apihelpers.pxi @@ -1267,6 +1267,21 @@ cdef int _replaceSlice(_Element parent, xmlNode* c_node, return 0 + +cdef int _linkChild(xmlNode* c_parent, xmlNode* c_node) except -1: + """Simple version of 'xmlAddChild()' that does not deep-fix the document links. + """ + assert _isElement(c_node) + c_node.parent = c_parent + if c_parent.children is NULL: + c_parent.children = c_parent.last = c_node + else: + c_node.prev = c_parent.last + c_parent.last.next = c_node + c_parent.last = c_node + return 0 + + cdef int _appendChild(_Element parent, _Element child) except -1: u"""Append a new child to a parent element. """ @@ -1279,7 +1294,8 @@ cdef int _appendChild(_Element parent, _Element child) except -1: c_next = c_node.next # move node itself tree.xmlUnlinkNode(c_node) - tree.xmlAddChild(parent._c_node, c_node) + # do not call xmlAddChild() here since it would deep-traverse the tree + _linkChild(parent._c_node, c_node) _moveTail(c_next, c_node) # uh oh, elements may be pointing to different doc when # parent element has moved; change them too.. @@ -1300,7 +1316,8 @@ cdef int _prependChild(_Element parent, _Element child) except -1: c_child = _findChildForwards(parent._c_node, 0) if c_child is NULL: tree.xmlUnlinkNode(c_node) - tree.xmlAddChild(parent._c_node, c_node) + # do not call xmlAddChild() here since it would deep-traverse the tree + _linkChild(parent._c_node, c_node) else: tree.xmlAddPrevSibling(c_child, c_node) _moveTail(c_next, c_node) diff --git a/src/lxml/includes/tree.pxd b/src/lxml/includes/tree.pxd index 0d9d8843..fb47473c 100644 --- a/src/lxml/includes/tree.pxd +++ b/src/lxml/includes/tree.pxd @@ -286,6 +286,7 @@ cdef extern from "libxml/tree.h": xmlAttr* prev xmlDoc* doc xmlNs* ns + xmlAttributeType atype ctypedef struct xmlID: const_xmlChar* value @@ -334,6 +335,7 @@ cdef extern from "libxml/tree.h": cdef xmlAttr* xmlSetProp(xmlNode* node, const_xmlChar* name, const_xmlChar* value) nogil cdef xmlAttr* xmlSetNsProp(xmlNode* node, xmlNs* ns, const_xmlChar* name, const_xmlChar* value) nogil + cdef int xmlRemoveID(xmlDoc* doc, xmlAttr* cur) nogil cdef int xmlRemoveProp(xmlAttr* cur) nogil cdef xmlChar* xmlGetNodePath(xmlNode* node) nogil cdef void xmlDocDumpMemory(xmlDoc* cur, char** mem, int* size) nogil diff --git a/src/lxml/proxy.pxi b/src/lxml/proxy.pxi index 2b948f26..bc803c22 100644 --- a/src/lxml/proxy.pxi +++ b/src/lxml/proxy.pxi @@ -324,6 +324,8 @@ cdef int moveNodeToDocument(_Document doc, xmlDoc* c_source_doc, """ cdef xmlNode* c_start_node cdef xmlNode* c_node + cdef xmlDoc* c_doc = doc._c_doc + cdef tree.xmlAttr* c_attr cdef char* c_name cdef _nscache c_ns_cache = [NULL, 0, 0] cdef xmlNs* c_ns @@ -339,6 +341,9 @@ cdef int moveNodeToDocument(_Document doc, xmlDoc* c_source_doc, c_start_node = c_element tree.BEGIN_FOR_EACH_FROM(c_element, c_element, 1) + # 0) set C doc link + c_element.doc = c_doc + if tree._isElementOrXInclude(c_element): if hasProxy(c_element): proxy_count += 1 @@ -387,6 +392,15 @@ cdef int moveNodeToDocument(_Document doc, xmlDoc* c_source_doc, c_node = <xmlNode*>c_element.properties else: c_node = c_node.next + + if c_node: + # set C doc link also for properties + c_node.doc = c_doc + # remove attribute from ID table (see xmlSetTreeDoc() in libxml2's tree.c) + c_attr = <tree.xmlAttr*>c_node + if c_attr.atype == tree.XML_ATTRIBUTE_ID: + tree.xmlRemoveID(c_source_doc, c_attr) + tree.END_FOR_EACH_FROM(c_element) # free now unused namespace declarations diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py index bfb438e2..e2670ab7 100644 --- a/src/lxml/tests/test_etree.py +++ b/src/lxml/tests/test_etree.py @@ -1555,6 +1555,24 @@ class ETreeOnlyTestCase(HelperTestCase): self.assertEqual(_bytes('<root>&test;</root>'), tostring(root)) + def test_entity_append_parsed(self): + Entity = self.etree.Entity + Element = self.etree.Element + parser = self.etree.XMLParser(resolve_entities=False) + entity = self.etree.XML('''<!DOCTYPE data [ + <!ENTITY a "a"> + <!ENTITY b "&a;"> + ]> + <data>&b;</data> + ''', parser) + + el = Element('test') + el.append(entity) + self.assertEqual(el.tag, 'test') + self.assertEqual(el[0].tag, 'data') + self.assertEqual(el[0][0].tag, Entity) + self.assertEqual(el[0][0].name, 'b') + def test_entity_values(self): Entity = self.etree.Entity self.assertEqual(Entity("test").text, '&test;') |
