# Proxy functions and low level node allocation stuff # Proxies represent elements, their reference is stored in the C # structure of the respective node to avoid multiple instantiation of # the Python class cdef inline _Element getProxy(xmlNode* c_node): u"""Get a proxy for a given node. """ #print "getProxy for:", c_node if c_node is not NULL and c_node._private is not NULL: return <_Element>c_node._private else: return None cdef inline int hasProxy(xmlNode* c_node): return c_node._private is not NULL cdef inline int _registerProxy(_Element proxy, _Document doc, xmlNode* c_node) except -1: u"""Register a proxy and type for the node it's proxying for. """ #print "registering for:", proxy._c_node assert c_node._private is NULL, u"double registering proxy!" proxy._doc = doc proxy._c_node = c_node c_node._private = proxy # additional INCREF to make sure _Document is GC-ed LAST! proxy._gc_doc = doc python.Py_INCREF(doc) cdef inline int _unregisterProxy(_Element proxy) except -1: u"""Unregister a proxy for the node it's proxying for. """ cdef xmlNode* c_node c_node = proxy._c_node assert c_node._private is proxy, u"Tried to unregister unknown proxy" c_node._private = NULL return 0 cdef inline void _releaseProxy(_Element proxy): u"""An additional DECREF for the document. """ python.Py_XDECREF(proxy._gc_doc) proxy._gc_doc = NULL cdef inline void _updateProxyDocument(xmlNode* c_node, _Document doc): u"""Replace the document reference of a proxy and return the old one iff it was replaced (None otherwise). """ cdef _Document old_doc cdef _Element element = <_Element>c_node._private if element._doc is not doc: old_doc = element._doc element._doc = doc python.Py_INCREF(doc) element._gc_doc = doc python.Py_DECREF(old_doc) ################################################################################ # temporarily make a node the root node of its document cdef xmlDoc* _fakeRootDoc(xmlDoc* c_base_doc, xmlNode* c_node) except NULL: return _plainFakeRootDoc(c_base_doc, c_node, 1) cdef xmlDoc* _plainFakeRootDoc(xmlDoc* c_base_doc, xmlNode* c_node, bint with_siblings) except NULL: # build a temporary document that has the given node as root node # note that copy and original must not be modified during its lifetime!! # always call _destroyFakeDoc() after use! cdef xmlNode* c_child cdef xmlNode* c_root cdef xmlNode* c_new_root cdef xmlDoc* c_doc if with_siblings or (c_node.prev is NULL and c_node.next is NULL): c_root = tree.xmlDocGetRootElement(c_base_doc) if c_root is c_node: # already the root node, no siblings return c_base_doc c_doc = _copyDoc(c_base_doc, 0) # non recursive! c_new_root = tree.xmlDocCopyNode(c_node, c_doc, 2) # non recursive! tree.xmlDocSetRootElement(c_doc, c_new_root) _copyParentNamespaces(c_node, c_new_root) c_new_root.children = c_node.children c_new_root.last = c_node.last c_new_root.next = c_new_root.prev = NULL # store original node c_doc._private = c_node # divert parent pointers of children c_child = c_new_root.children while c_child is not NULL: c_child.parent = c_new_root c_child = c_child.next c_doc.children = c_new_root return c_doc cdef void _destroyFakeDoc(xmlDoc* c_base_doc, xmlDoc* c_doc): # delete a temporary document cdef xmlNode* c_child cdef xmlNode* c_parent cdef xmlNode* c_root if c_doc is c_base_doc: return c_root = tree.xmlDocGetRootElement(c_doc) # restore parent pointers of children c_parent = c_doc._private c_child = c_root.children while c_child is not NULL: c_child.parent = c_parent c_child = c_child.next # prevent recursive removal of children c_root.children = c_root.last = NULL tree.xmlFreeDoc(c_doc) cdef _Element _fakeDocElementFactory(_Document doc, xmlNode* c_element): u"""Special element factory for cases where we need to create a fake root document, but still need to instantiate arbitrary nodes from it. If we instantiate the fake root node, things will turn bad when it's destroyed. Instead, if we are asked to instantiate the fake root node, we instantiate the original node instead. """ if c_element.doc is not doc._c_doc: if c_element.doc._private is not NULL: if c_element is c_element.doc.children: c_element = c_element.doc._private #assert c_element.type == tree.XML_ELEMENT_NODE return _elementFactory(doc, c_element) ################################################################################ # support for freeing tree elements when proxy objects are destroyed cdef int attemptDeallocation(xmlNode* c_node): u"""Attempt deallocation of c_node (or higher up in tree). """ cdef xmlNode* c_top # could be we actually aren't referring to the tree at all if c_node is NULL: #print "not freeing, node is NULL" return 0 c_top = getDeallocationTop(c_node) if c_top is not NULL: #print "freeing:", c_top.name _removeText(c_top.next) # tail tree.xmlFreeNode(c_top) return 1 return 0 cdef xmlNode* getDeallocationTop(xmlNode* c_node): u"""Return the top of the tree that can be deallocated, or NULL. """ cdef xmlNode* c_current cdef xmlNode* c_top #print "trying to do deallocating:", c_node.type if c_node._private is not NULL: #print "Not freeing: proxies still exist" return NULL c_current = c_node.parent c_top = c_node while c_current is not NULL: #print "checking:", c_current.type if c_current.type == tree.XML_DOCUMENT_NODE or \ c_current.type == tree.XML_HTML_DOCUMENT_NODE: #print "not freeing: still in doc" return NULL # if we're still attached to the document, don't deallocate if c_current._private is not NULL: #print "Not freeing: proxies still exist" return NULL c_top = c_current c_current = c_current.parent # see whether we have children to deallocate if canDeallocateChildNodes(c_top): return c_top else: return NULL cdef int canDeallocateChildNodes(xmlNode* c_parent): cdef xmlNode* c_node c_node = c_parent.children tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_parent, c_node, 1) if c_node._private is not NULL: return 0 tree.END_FOR_EACH_ELEMENT_FROM(c_node) return 1 ################################################################################ # fix _Document references and namespaces when a node changes documents cdef void _copyParentNamespaces(xmlNode* c_from_node, xmlNode* c_to_node) nogil: u"""Copy the namespaces of all ancestors of c_from_node to c_to_node. """ cdef xmlNode* c_parent cdef xmlNs* c_ns cdef xmlNs* c_new_ns cdef int prefix_known c_parent = c_from_node.parent while c_parent is not NULL and (tree._isElementOrXInclude(c_parent) or c_parent.type == tree.XML_DOCUMENT_NODE): c_new_ns = c_parent.nsDef while c_new_ns is not NULL: # libxml2 will check if the prefix is already defined tree.xmlNewNs(c_to_node, c_new_ns.href, c_new_ns.prefix) c_new_ns = c_new_ns.next c_parent = c_parent.parent ctypedef struct _nscache: xmlNs** new xmlNs** old size_t size size_t last cdef int _growNsCache(_nscache* c_ns_cache) except -1: cdef xmlNs** c_ns_ptr if c_ns_cache.size == 0: c_ns_cache.size = 20 else: c_ns_cache.size *= 2 c_ns_ptr = stdlib.realloc( c_ns_cache.new, c_ns_cache.size * sizeof(xmlNs*)) if c_ns_ptr is not NULL: c_ns_cache.new = c_ns_ptr c_ns_ptr = stdlib.realloc( c_ns_cache.old, c_ns_cache.size * sizeof(xmlNs*)) if c_ns_ptr is not NULL: c_ns_cache.old = c_ns_ptr else: stdlib.free(c_ns_cache.new) stdlib.free(c_ns_cache.old) raise MemoryError() return 0 cdef inline int _appendToNsCache(_nscache* c_ns_cache, xmlNs* c_old_ns, xmlNs* c_new_ns) except -1: if c_ns_cache.last >= c_ns_cache.size: _growNsCache(c_ns_cache) c_ns_cache.old[c_ns_cache.last] = c_old_ns c_ns_cache.new[c_ns_cache.last] = c_new_ns c_ns_cache.last += 1 cdef int _stripRedundantNamespaceDeclarations( xmlNode* c_element, _nscache* c_ns_cache, xmlNs** c_del_ns_list) except -1: u"""Removes namespace declarations from an element that are already defined in its parents. Does not free the xmlNs's, just prepends them to the c_del_ns_list. """ cdef xmlNs* c_ns cdef xmlNs* c_ns_next cdef xmlNs** c_nsdef # use a xmlNs** to handle assignments to "c_element.nsDef" correctly c_nsdef = &c_element.nsDef while c_nsdef[0] is not NULL: c_ns = tree.xmlSearchNsByHref( c_element.doc, c_element.parent, c_nsdef[0].href) if c_ns is NULL: # new namespace href => keep and cache the ns declaration _appendToNsCache(c_ns_cache, c_nsdef[0], c_nsdef[0]) c_nsdef = &c_nsdef[0].next else: # known namespace href => cache mapping and strip old ns _appendToNsCache(c_ns_cache, c_nsdef[0], c_ns) # cut out c_nsdef.next and prepend it to garbage chain c_ns_next = c_nsdef[0].next c_nsdef[0].next = c_del_ns_list[0] c_del_ns_list[0] = c_nsdef[0] c_nsdef[0] = c_ns_next return 0 cdef int moveNodeToDocument(_Document doc, xmlDoc* c_source_doc, xmlNode* c_element) except -1: u"""Fix the xmlNs pointers of a node and its subtree that were moved. Originally copied from libxml2's xmlReconciliateNs(). Expects libxml2 doc pointers of node to be correct already, but fixes _Document references. For each node in the subtree, we do this: 1) Remove redundant declarations of namespace that are already defined in its parents. 2) Replace namespaces that are *not* defined on the node or its parents by the equivalent namespace declarations that *are* defined on the node or its parents (possibly using a different prefix). If a namespace is unknown, declare a new one on the node. 3) Reassign the names of tags and attribute from the dict of the target document *iff* it is different from the dict used in the source subtree. 4) Set the Document reference to the new Document (if different). This is done on backtracking to keep the original Document alive as long as possible, until all its elements are updated. Note that the namespace declarations are removed from the tree in step 1), but freed only after the complete subtree was traversed and all occurrences were replaced by tree-internal pointers. """ cdef xmlNode* c_start_node cdef xmlNode* c_node cdef char* c_name cdef _nscache c_ns_cache cdef xmlNs* c_ns cdef xmlNs* c_ns_next cdef xmlNs* c_nsdef cdef xmlNs* c_del_ns_list cdef size_t i, proxy_count = 0 if not tree._isElementOrXInclude(c_element): return 0 c_start_node = c_element c_del_ns_list = NULL c_ns_cache.new = NULL c_ns_cache.old = NULL c_ns_cache.size = 0 c_ns_cache.last = 0 tree.BEGIN_FOR_EACH_FROM(c_element, c_element, 1) if tree._isElementOrXInclude(c_element): if c_element._private is not NULL: proxy_count += 1 # 1) cut out namespaces defined here that are already known by # the ancestors if c_element.nsDef is not NULL: _stripRedundantNamespaceDeclarations( c_element, &c_ns_cache, &c_del_ns_list) # 2) make sure the namespaces of an element and its attributes # are declared in this document (i.e. on the node or its parents) c_node = c_element while c_node is not NULL: if c_node.ns is not NULL: for i from 0 <= i < c_ns_cache.last: if c_node.ns is c_ns_cache.old[i]: c_node.ns = c_ns_cache.new[i] break else: # not in cache => find a replacement from this document c_ns = doc._findOrBuildNodeNs( c_start_node, c_node.ns.href, c_node.ns.prefix, c_node.type == tree.XML_ATTRIBUTE_NODE) _appendToNsCache(&c_ns_cache, c_node.ns, c_ns) c_node.ns = c_ns if c_node is c_element: # after the element, continue with its attributes c_node = c_element.properties else: c_node = c_node.next tree.END_FOR_EACH_FROM(c_element) # free now unused namespace declarations if c_del_ns_list is not NULL: tree.xmlFreeNsList(c_del_ns_list) # cleanup if c_ns_cache.new is not NULL: stdlib.free(c_ns_cache.new) if c_ns_cache.old is not NULL: stdlib.free(c_ns_cache.old) # 3) fix the names in the tree if we moved it from a different thread if doc._c_doc.dict is not c_source_doc.dict: fixThreadDictNames(c_start_node, c_source_doc.dict, doc._c_doc.dict) # 4) fix _Document references # (and potentially deallocate the source document) if proxy_count > 0: if proxy_count == 1 and c_start_node._private is not NULL: _updateProxyDocument(c_start_node, doc) else: fixElementDocument(c_start_node, doc, proxy_count) return 0 cdef void fixElementDocument(xmlNode* c_element, _Document doc, size_t proxy_count): cdef xmlNode* c_node = c_element tree.BEGIN_FOR_EACH_FROM(c_element, c_node, 1) if c_node._private is not NULL: _updateProxyDocument(c_node, doc) proxy_count -= 1 if proxy_count == 0: return tree.END_FOR_EACH_FROM(c_node) cdef void fixThreadDictNames(xmlNode* c_element, tree.xmlDict* c_src_dict, tree.xmlDict* c_dict) nogil: # re-assign the names of tags and attributes # # this should only be called when the element is based on a # different libxml2 tag name dictionary if c_element.type == tree.XML_DOCUMENT_NODE or \ c_element.type == tree.XML_HTML_DOCUMENT_NODE: # may define "xml" namespace fixThreadDictNsForNode(c_element, c_src_dict, c_dict) c_element = c_element.children while c_element is not NULL: fixThreadDictNamesForNode(c_element, c_src_dict, c_dict) c_element = c_element.next elif tree._isElementOrXInclude(c_element): fixThreadDictNamesForNode(c_element, c_src_dict, c_dict) cdef void fixThreadDictNamesForNode(xmlNode* c_element, tree.xmlDict* c_src_dict, tree.xmlDict* c_dict) nogil: cdef xmlNode* c_node = c_element tree.BEGIN_FOR_EACH_FROM(c_element, c_node, 1) if c_node.name is not NULL: fixThreadDictNameForNode(c_node, c_src_dict, c_dict) if c_node.type in (tree.XML_ELEMENT_NODE, tree.XML_XINCLUDE_START): fixThreadDictNamesForAttributes( c_node.properties, c_src_dict, c_dict) fixThreadDictNsForNode(c_node, c_src_dict, c_dict) elif c_node.type == tree.XML_TEXT_NODE: # libxml2's SAX2 parser interns some indentation space fixThreadDictContentForNode(c_node, c_src_dict, c_dict) tree.END_FOR_EACH_FROM(c_node) cdef inline void fixThreadDictNamesForAttributes(tree.xmlAttr* c_attr, tree.xmlDict* c_src_dict, tree.xmlDict* c_dict) nogil: cdef xmlNode* c_child cdef xmlNode* c_node = c_attr while c_node is not NULL: fixThreadDictNameForNode(c_node, c_src_dict, c_dict) # libxml2 keeps some (!) attribute values in the dict c_child = c_node.children while c_child is not NULL: fixThreadDictContentForNode(c_child, c_src_dict, c_dict) c_child = c_child.next c_node = c_node.next cdef inline void fixThreadDictNameForNode(xmlNode* c_node, tree.xmlDict* c_src_dict, tree.xmlDict* c_dict) nogil: cdef char* c_name = c_node.name if c_name is not NULL and \ c_node.type != tree.XML_TEXT_NODE and \ c_node.type != tree.XML_COMMENT_NODE: if tree.xmlDictOwns(c_src_dict, c_node.name): # c_name can be NULL on memory error, but we don't handle that here c_name = tree.xmlDictLookup(c_dict, c_name, -1) if c_name is not NULL: c_node.name = c_name cdef inline void fixThreadDictContentForNode(xmlNode* c_node, tree.xmlDict* c_src_dict, tree.xmlDict* c_dict) nogil: if c_node.content is not NULL and \ c_node.content is not &c_node.properties: if tree.xmlDictOwns(c_src_dict, c_node.content): # result can be NULL on memory error, but we don't handle that here c_node.content = tree.xmlDictLookup(c_dict, c_node.content, -1) cdef inline void fixThreadDictNsForNode(xmlNode* c_node, tree.xmlDict* c_src_dict, tree.xmlDict* c_dict) nogil: cdef xmlNs* c_ns = c_node.nsDef while c_ns is not NULL: if c_ns.href is not NULL: if tree.xmlDictOwns(c_src_dict, c_ns.href): c_ns.href = tree.xmlDictLookup(c_dict, c_ns.href, -1) if c_ns.prefix is not NULL: if tree.xmlDictOwns(c_src_dict, c_ns.prefix): c_ns.prefix = tree.xmlDictLookup(c_dict, c_ns.prefix, -1) c_ns = c_ns.next