summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorStefan Behnel <stefan_ml@behnel.de>2019-02-05 21:31:02 +0100
committerStefan Behnel <stefan_ml@behnel.de>2019-02-05 21:31:02 +0100
commit201b712edf0478e6a94ace984c1e8435bf3bc3c3 (patch)
tree1ebf9a07fd10d3f17aa423d11294bdb58b7627f0 /src
parent7303cadd01b81fceb40f74148a5b9b6178936768 (diff)
downloadpython-lxml-201b712edf0478e6a94ace984c1e8435bf3bc3c3.tar.gz
LP#1814522: Fix a crash when appending a child subtree that contains unsubstituted entity references.
This is a work-around for a (supposed) bug in libxml2 (https://gitlab.gnome.org/GNOME/libxml2/issues/42), which crashes by running into an infinite recursive loop while traversing the child nodes of the entity reference. A lucky side effect is that the previously duplicated cleanup traversal to a) update the .doc pointers in libxml2 and b) update the dict names in lxml is now replaced by a single traversal, which should speed things up for large subtrees.
Diffstat (limited to 'src')
-rw-r--r--src/lxml/apihelpers.pxi21
-rw-r--r--src/lxml/includes/tree.pxd2
-rw-r--r--src/lxml/proxy.pxi14
-rw-r--r--src/lxml/tests/test_etree.py18
4 files changed, 53 insertions, 2 deletions
diff --git a/src/lxml/apihelpers.pxi b/src/lxml/apihelpers.pxi
index 5366fcaf..bccf5fbb 100644
--- a/src/lxml/apihelpers.pxi
+++ b/src/lxml/apihelpers.pxi
@@ -1267,6 +1267,21 @@ cdef int _replaceSlice(_Element parent, xmlNode* c_node,
return 0
+
+cdef int _linkChild(xmlNode* c_parent, xmlNode* c_node) except -1:
+ """Simple version of 'xmlAddChild()' that does not deep-fix the document links.
+ """
+ assert _isElement(c_node)
+ c_node.parent = c_parent
+ if c_parent.children is NULL:
+ c_parent.children = c_parent.last = c_node
+ else:
+ c_node.prev = c_parent.last
+ c_parent.last.next = c_node
+ c_parent.last = c_node
+ return 0
+
+
cdef int _appendChild(_Element parent, _Element child) except -1:
u"""Append a new child to a parent element.
"""
@@ -1279,7 +1294,8 @@ cdef int _appendChild(_Element parent, _Element child) except -1:
c_next = c_node.next
# move node itself
tree.xmlUnlinkNode(c_node)
- tree.xmlAddChild(parent._c_node, c_node)
+ # do not call xmlAddChild() here since it would deep-traverse the tree
+ _linkChild(parent._c_node, c_node)
_moveTail(c_next, c_node)
# uh oh, elements may be pointing to different doc when
# parent element has moved; change them too..
@@ -1300,7 +1316,8 @@ cdef int _prependChild(_Element parent, _Element child) except -1:
c_child = _findChildForwards(parent._c_node, 0)
if c_child is NULL:
tree.xmlUnlinkNode(c_node)
- tree.xmlAddChild(parent._c_node, c_node)
+ # do not call xmlAddChild() here since it would deep-traverse the tree
+ _linkChild(parent._c_node, c_node)
else:
tree.xmlAddPrevSibling(c_child, c_node)
_moveTail(c_next, c_node)
diff --git a/src/lxml/includes/tree.pxd b/src/lxml/includes/tree.pxd
index 0d9d8843..fb47473c 100644
--- a/src/lxml/includes/tree.pxd
+++ b/src/lxml/includes/tree.pxd
@@ -286,6 +286,7 @@ cdef extern from "libxml/tree.h":
xmlAttr* prev
xmlDoc* doc
xmlNs* ns
+ xmlAttributeType atype
ctypedef struct xmlID:
const_xmlChar* value
@@ -334,6 +335,7 @@ cdef extern from "libxml/tree.h":
cdef xmlAttr* xmlSetProp(xmlNode* node, const_xmlChar* name, const_xmlChar* value) nogil
cdef xmlAttr* xmlSetNsProp(xmlNode* node, xmlNs* ns,
const_xmlChar* name, const_xmlChar* value) nogil
+ cdef int xmlRemoveID(xmlDoc* doc, xmlAttr* cur) nogil
cdef int xmlRemoveProp(xmlAttr* cur) nogil
cdef xmlChar* xmlGetNodePath(xmlNode* node) nogil
cdef void xmlDocDumpMemory(xmlDoc* cur, char** mem, int* size) nogil
diff --git a/src/lxml/proxy.pxi b/src/lxml/proxy.pxi
index 2b948f26..bc803c22 100644
--- a/src/lxml/proxy.pxi
+++ b/src/lxml/proxy.pxi
@@ -324,6 +324,8 @@ cdef int moveNodeToDocument(_Document doc, xmlDoc* c_source_doc,
"""
cdef xmlNode* c_start_node
cdef xmlNode* c_node
+ cdef xmlDoc* c_doc = doc._c_doc
+ cdef tree.xmlAttr* c_attr
cdef char* c_name
cdef _nscache c_ns_cache = [NULL, 0, 0]
cdef xmlNs* c_ns
@@ -339,6 +341,9 @@ cdef int moveNodeToDocument(_Document doc, xmlDoc* c_source_doc,
c_start_node = c_element
tree.BEGIN_FOR_EACH_FROM(c_element, c_element, 1)
+ # 0) set C doc link
+ c_element.doc = c_doc
+
if tree._isElementOrXInclude(c_element):
if hasProxy(c_element):
proxy_count += 1
@@ -387,6 +392,15 @@ cdef int moveNodeToDocument(_Document doc, xmlDoc* c_source_doc,
c_node = <xmlNode*>c_element.properties
else:
c_node = c_node.next
+
+ if c_node:
+ # set C doc link also for properties
+ c_node.doc = c_doc
+ # remove attribute from ID table (see xmlSetTreeDoc() in libxml2's tree.c)
+ c_attr = <tree.xmlAttr*>c_node
+ if c_attr.atype == tree.XML_ATTRIBUTE_ID:
+ tree.xmlRemoveID(c_source_doc, c_attr)
+
tree.END_FOR_EACH_FROM(c_element)
# free now unused namespace declarations
diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py
index bfb438e2..e2670ab7 100644
--- a/src/lxml/tests/test_etree.py
+++ b/src/lxml/tests/test_etree.py
@@ -1555,6 +1555,24 @@ class ETreeOnlyTestCase(HelperTestCase):
self.assertEqual(_bytes('<root>&test;</root>'),
tostring(root))
+ def test_entity_append_parsed(self):
+ Entity = self.etree.Entity
+ Element = self.etree.Element
+ parser = self.etree.XMLParser(resolve_entities=False)
+ entity = self.etree.XML('''<!DOCTYPE data [
+ <!ENTITY a "a">
+ <!ENTITY b "&a;">
+ ]>
+ <data>&b;</data>
+ ''', parser)
+
+ el = Element('test')
+ el.append(entity)
+ self.assertEqual(el.tag, 'test')
+ self.assertEqual(el[0].tag, 'data')
+ self.assertEqual(el[0][0].tag, Entity)
+ self.assertEqual(el[0][0].name, 'b')
+
def test_entity_values(self):
Entity = self.etree.Entity
self.assertEqual(Entity("test").text, '&test;')