summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorfaassen <none@none>2004-12-10 22:12:56 +0100
committerfaassen <none@none>2004-12-10 22:12:56 +0100
commitdbac1f7c6bea3befd6796cee06fee0a620a2ea33 (patch)
treeee210b8fd3a04ce4253f3be236b9b50527297163
parentc61df7cb8eab5ca7f6467341d8ae2ed4fd585710 (diff)
downloadpython-lxml-dbac1f7c6bea3befd6796cee06fee0a620a2ea33.tar.gz
[svn r90] Add a framework for registering nodes with a registry, for management
of deallocation. No real tests yet, but at least it all compiles. --HG-- branch : trunk
-rw-r--r--lxmldistutils.py59
-rw-r--r--setup.py19
-rw-r--r--src/lxml/nodereg.pxd22
-rw-r--r--src/lxml/nodereg.pyx112
-rw-r--r--src/lxml/noderegtest.pyx101
-rw-r--r--src/lxml/tests/test_nodereg.py17
-rw-r--r--src/lxml/tree.pxd101
7 files changed, 430 insertions, 1 deletions
diff --git a/lxmldistutils.py b/lxmldistutils.py
new file mode 100644
index 00000000..c9758f09
--- /dev/null
+++ b/lxmldistutils.py
@@ -0,0 +1,59 @@
+# Based on the version from Pyrex, cleaned up
+# Added feature to allow include path option to support pxd files better
+
+import distutils.command.build_ext
+import Pyrex.Compiler.Main
+from Pyrex.Compiler.Errors import PyrexError
+from distutils.dep_util import newer
+import os
+import sys
+
+def replace_suffix(path, new_suffix):
+ return os.path.splitext(path)[0] + new_suffix
+
+class build_ext(distutils.command.build_ext.build_ext):
+
+ description = ("compile Pyrex scripts, then build C/C++ extensions "
+ "(compile/link to build directory)")
+
+ def finalize_options(self):
+ distutils.command.build_ext.build_ext.finalize_options(self)
+
+ def swig_sources(self, sources):
+ if not self.extensions:
+ return
+
+ pyx_sources = [source for source in sources
+ if source.endswith('.pyx')]
+ other_sources = [source for source in sources
+ if not source.endswith('.pyx')]
+ c_sources = []
+
+ for pyx in pyx_sources:
+ # should I raise an exception if it doesn't exist?
+ if os.path.exists(pyx):
+ source = pyx
+ target = replace_suffix(source, '.c')
+ c_sources.append(target)
+ if newer(source, target) or self.force:
+ self.pyrex_compile(source)
+ return c_sources + other_sources
+
+ def pyrex_compile(self, source):
+ options = Pyrex.Compiler.Main.CompilationOptions(
+ show_version=0,
+ use_listing_file=0,
+ errors_to_stderr=1,
+ include_path=self.get_pxd_include_paths(),
+ c_only=1,
+ obj_only=1,
+ output_file=None)
+
+ result = Pyrex.Compiler.Main.compile(source, options)
+ if result.num_errors <> 0:
+ sys.exit(1)
+
+ def get_pxd_include_paths(self):
+ """Override this to return a list of include paths for pyrex.
+ """
+ return []
diff --git a/setup.py b/setup.py
index 5aa039d4..6d844a54 100644
--- a/setup.py
+++ b/setup.py
@@ -9,7 +9,7 @@ from distutils.command.install_lib import install_lib as installcmd
from distutils.core import setup
from distutils.dist import Distribution
from distutils.extension import Extension
-from Pyrex.Distutils import build_ext
+from lxmldistutils import build_ext
# We have to snoop for file types that distutils doesn't copy correctly when
# doing a non-build-in-place.
@@ -90,6 +90,11 @@ class MyExtBuilder(build_ext):
os.path.walk(os.curdir, remove_stale_bytecode, None)
build_ext.run(self)
+ def get_pxd_include_paths(self):
+ """lxml specific pxd paths.
+ """
+ return ['src/lxml']
+
class MyLibInstaller(installcmd):
def run(self):
installcmd.run(self)
@@ -118,6 +123,18 @@ ext_modules = [
libraries=['xml2'],
extra_compile_args = ['-w']
),
+ Extension('lxml.nodereg',
+ sources=['src/lxml/nodereg.pyx'],
+ include_dirs=['/usr/include/libxml2'],
+ libraries=['xml2'],
+ extra_compile_args = ['-w']
+ ),
+ Extension('lxml.noderegtest',
+ sources=['src/lxml/noderegtest.pyx'],
+ include_dirs=['/usr/include/libxml2'],
+ libraries=['xml2'],
+ extra_compile_args = ['-w']
+ ),
]
setup(name="lxml",
diff --git a/src/lxml/nodereg.pxd b/src/lxml/nodereg.pxd
new file mode 100644
index 00000000..806d8e64
--- /dev/null
+++ b/src/lxml/nodereg.pxd
@@ -0,0 +1,22 @@
+from tree cimport xmlNode, xmlDoc
+cimport tree
+
+cdef class NodeRegistry
+cdef class NodeProxyBase
+
+cdef class DocumentProxyBase:
+ cdef xmlDoc* _c_doc
+ cdef NodeRegistry _registry
+
+cdef class NodeProxyBase:
+ cdef DocumentProxyBase _doc
+ cdef xmlNode* _c_node
+
+cdef class NodeRegistry:
+ cdef object _proxies
+
+ cdef NodeProxyBase getProxy(self, xmlNode* c_node)
+ cdef void registerProxy(self, NodeProxyBase proxy)
+ cdef attemptDeallocation(self, xmlNode* c_node)
+ cdef xmlNode* getDeallocationTop(self, xmlNode* c_node)
+ cdef int canDeallocateChildren(self, xmlNode* c_node)
diff --git a/src/lxml/nodereg.pyx b/src/lxml/nodereg.pyx
new file mode 100644
index 00000000..988fc1e5
--- /dev/null
+++ b/src/lxml/nodereg.pyx
@@ -0,0 +1,112 @@
+from tree cimport xmlNode, xmlDoc
+#cimport tree
+
+import weakref
+
+#cdef class NodeRegistry
+#cdef class NodeProxyBase
+
+cdef class DocumentProxyBase:
+ def __init__(self):
+ self._registry = NodeRegistry()
+
+ def getProxy(self, xmlNode* c_node):
+ return self._registry.getProxy(c_node)
+
+ def registerProxy(self, NodeProxyBase proxy):
+ self._registry.registerProxy(proxy)
+
+ def __dealloc__(self):
+ # if there are no more references to the document, it is safe
+ # to clean the whole thing up, as all nodes have a reference to
+ # the document
+ tree.xmlFreeDoc(self._c_doc)
+
+cdef class NodeProxyBase:
+ def __dealloc__(self):
+ self._doc._registry.attemptDeallocation(self._c_node)
+
+cdef class NodeRegistry:
+ """A registry of Python-level proxies for libxml2 nodes.
+
+ All libxml2 nodes that have a Python proxy for them are managed here.
+
+ The idea is that there can only be a single Python proxy for each
+ libxml2 node. This class tracks these proxies. Whenever a proxy
+ has no more references to it, Pyrex will call the __dealloc__ method
+ on it.
+
+ This method will then check whether the underlying libxml2 node
+ (and its subtree) can be safely garbage collected.
+
+ Garbage collection of the underlying C-level structure is only
+ safe if:
+
+ * the top of the C-level tree is not connected to anything, such
+ as being part of a larger tree.
+
+ * there is no node proxy pointing to any part of the tree.
+
+ The proxies themselves need to be weak-referenceable, as the
+ mapping in the registry will have to consist of weak references.
+ This way, a node being registered in the registry does not count
+ as something that stops the node from being deallocated.
+ """
+ def __init__(self):
+ self._proxies = weakref.WeakValueDictionary()
+
+ cdef NodeProxyBase getProxy(self, xmlNode* c_node):
+ """Given an xmlNode, return node proxy, or None if no proxy yet.
+ """
+ return self._proxies.get(<int>c_node, None)
+
+ cdef void registerProxy(self, NodeProxyBase proxy):
+ """Register a proxy with the registry.
+ """
+ cdef xmlNode* c_node
+ c_node = proxy._c_node
+ assert not self._proxies.has_key[<int>c_node]
+ self._proxies[<int>c_node] = proxy
+
+ cdef attemptDeallocation(self, xmlNode* c_node):
+ """Attempt deallocation of c_node (or higher up in tree).
+ """
+ cdef xmlNode* c_top
+ c_top = self.getDeallocationTop(c_node)
+ if c_top is not NULL:
+ tree.xmlFreeNode(c_top)
+
+ cdef xmlNode* getDeallocationTop(self, xmlNode* c_node):
+ """Return the top of the tree that can be deallocated, or NULL.
+ """
+ cdef xmlNode* c_current
+ c_current = c_node.parent
+ while c_current is not NULL:
+ c_current = c_current.parent
+ # if we're still attached to the document, don't deallocate
+ if c_current.type == tree.XML_DOCUMENT_NODE:
+ return NULL
+ # otherwise, see whether we have children to deallocate
+ if self.canDeallocateChildren(c_current):
+ return c_current
+ else:
+ return NULL
+
+ cdef int canDeallocateChildren(self, xmlNode* c_node):
+ # the current implementation is inefficient as it does a
+ # tree traversal to find out whether there are any node proxies
+ # we could improve this by a smarter datastructure
+ # XXX should handle attribute nodes and other things we don't reach
+ cdef xmlNode* c_current
+ c_current = c_node.children
+ proxies = self._proxies
+ while c_current is not NULL:
+ id = <int>c_node
+ if proxies.has_key(id):
+ return 0
+ if not self.canDeallocateChildren(c_current):
+ return 0
+ c_current = c_current.next
+ # apparently we can deallocate all subnodes
+ return 1
+
diff --git a/src/lxml/noderegtest.pyx b/src/lxml/noderegtest.pyx
new file mode 100644
index 00000000..29f376e4
--- /dev/null
+++ b/src/lxml/noderegtest.pyx
@@ -0,0 +1,101 @@
+# this is a test module which sets up some concrete classes and
+# factories so it is possible to test the use of the nodereg module
+
+from tree cimport xmlNode, xmlDoc
+cimport tree
+
+cdef extern from "libxml/parser.h":
+ cdef xmlDoc* xmlParseFile(char* filename)
+ cdef xmlDoc* xmlParseDoc(char* cur)
+
+import nodereg
+cimport nodereg
+
+cdef class DocumentBase(nodereg.DocumentProxyBase):
+ property documentElement:
+ def __get__(self):
+ cdef xmlNode* c_node
+ c_node = self._c_doc.children
+ while c_node is not NULL:
+ if c_node.type == tree.XML_ELEMENT_NODE:
+ return _elementFactory(self, c_node)
+ c_node = c_node.next
+ return None
+
+class Document(DocumentBase):
+ __slots__ = ['__weakref__']
+
+cdef DocumentBase _documentFactory(xmlDoc* c_doc):
+ cdef DocumentBase doc
+ doc = Document()
+ doc._c_doc = c_doc
+ return doc
+
+cdef object _nodeFactory(DocumentBase doc, xmlNode* c_node):
+ if c_node is NULL:
+ return None
+ elif c_node.type == tree.XML_ELEMENT_NODE:
+ return _elementFactory(doc, c_node)
+ elif c_node.type == tree.XML_DOCUMENT_NODE:
+ return doc
+
+cdef class Node(nodereg.NodeProxyBase):
+
+ property parentNode:
+ def __get__(self):
+ return _nodeFactory(self._doc, self._c_node.parent)
+
+ property firstChild:
+ def __get__(self):
+ return _nodeFactory(self._doc, self._c_node.children)
+
+ property lastChild:
+ def __get__(self):
+ return _nodeFactory(self._doc, self._c_node.last)
+
+ property previousSibling:
+ def __get__(self):
+ return _nodeFactory(self._doc, self._c_node.prev)
+
+ property nextSibling:
+ def __get__(self):
+ return _nodeFactory(self._doc, self._c_node.next)
+
+cdef class ElementBase(Node):
+ property nodeName:
+ def __get__(self):
+ if self.prefix is None:
+ return self.localName
+ else:
+ return self.prefix + ':' + self.localName
+
+ property localName:
+ def __get__(self):
+ return unicode(self._c_node.name, 'UTF-8')
+
+ property prefix:
+ def __get__(self):
+ if self._c_node.ns is NULL or self._c_node.ns.prefix is NULL:
+ return None
+ return unicode(self._c_node.ns.prefix, 'UTF-8')
+
+class Element(ElementBase):
+ __slots__ = ['__weakref__']
+
+cdef ElementBase _elementFactory(DocumentBase doc, xmlNode* c_node):
+ cdef ElementBase result
+ result = doc._registry.getProxy(c_node)
+ if result is not None:
+ return result
+ result = Element()
+ result._doc = doc
+ result._c_node = c_node
+ doc._registry.registerProxy(result)
+ return result
+
+def makeDocument(text):
+ """Construct a document from some xml.
+ """
+ cdef xmlDoc* c_doc
+ c_doc = xmlParseDoc(text)
+ return _documentFactory(c_doc)
diff --git a/src/lxml/tests/test_nodereg.py b/src/lxml/tests/test_nodereg.py
new file mode 100644
index 00000000..0c4db384
--- /dev/null
+++ b/src/lxml/tests/test_nodereg.py
@@ -0,0 +1,17 @@
+import unittest
+
+#from lxml.nodereg import Element, ElementTree, SubElement, XML
+from lxml import noderegtest
+
+class NodeRegTestCase(unittest.TestCase):
+ def test_foo(self):
+ doc = noderegtest.makeDocument('<foo><bar/></foo>')
+
+
+def test_suite():
+ suite = unittest.TestSuite()
+ suite.addTests([unittest.makeSuite(NodeRegTestCase)])
+ return suite
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/src/lxml/tree.pxd b/src/lxml/tree.pxd
new file mode 100644
index 00000000..627c7fd1
--- /dev/null
+++ b/src/lxml/tree.pxd
@@ -0,0 +1,101 @@
+cdef extern from "libxml/tree.h":
+
+ ctypedef enum xmlElementType:
+ XML_ELEMENT_NODE= 1
+ XML_ATTRIBUTE_NODE= 2
+ XML_TEXT_NODE= 3
+ XML_CDATA_SECTION_NODE= 4
+ XML_ENTITY_REF_NODE= 5
+ XML_ENTITY_NODE= 6
+ XML_PI_NODE= 7
+ XML_COMMENT_NODE= 8
+ XML_DOCUMENT_NODE= 9
+ XML_DOCUMENT_TYPE_NODE= 10
+ XML_DOCUMENT_FRAG_NODE= 11
+ XML_NOTATION_NODE= 12
+ XML_HTML_DOCUMENT_NODE= 13
+ XML_DTD_NODE= 14
+ XML_ELEMENT_DECL= 15
+ XML_ATTRIBUTE_DECL= 16
+ XML_ENTITY_DECL= 17
+ XML_NAMESPACE_DECL= 18
+ XML_XINCLUDE_START= 19
+ XML_XINCLUDE_END= 20
+
+ ctypedef struct xmlDoc
+ ctypedef struct xmlAttr
+
+ ctypedef struct xmlNs:
+ char* href
+ char* prefix
+
+ ctypedef struct xmlNode:
+ xmlElementType type
+ char *name
+ xmlNode *children
+ xmlNode *last
+ xmlNode *parent
+ xmlNode *next
+ xmlNode *prev
+ xmlDoc *doc
+ char *content
+ xmlAttr* properties
+ xmlNs* ns
+
+ ctypedef struct xmlDoc:
+ xmlElementType type
+ char *name
+ xmlNode *children
+ xmlNode *last
+ xmlNode *parent
+ xmlNode *next
+ xmlNode *prev
+ xmlDoc *doc
+
+ ctypedef struct xmlAttr:
+ xmlElementType type
+ char* name
+ xmlNode* children
+ xmlNode* last
+ xmlNode* parent
+ xmlNode* next
+ xmlNode* prev
+ xmlDoc* doc
+ xmlNs* ns
+
+ ctypedef struct xmlElement:
+ xmlElementType type
+ char* name
+ xmlNode* children
+ xmlNode* last
+ xmlNode* parent
+ xmlNode* next
+ xmlNode* prev
+ xmlDoc* doc
+
+ cdef void xmlFreeDoc(xmlDoc *cur)
+ cdef void xmlFreeNode(xmlNode* cur)
+ cdef void xmlFree(char* buf)
+
+ cdef xmlNode* xmlNewNode(xmlNs* ns, char* name)
+ cdef xmlNode* xmlAddChild(xmlNode* parent, xmlNode* cur)
+ cdef xmlNode* xmlNewDocNode(xmlDoc* doc, xmlNs* ns,
+ char* name, char* content)
+ cdef xmlDoc* xmlNewDoc(char* version)
+ cdef xmlAttr* xmlNewProp(xmlNode* node, char* name, char* value)
+ cdef char* xmlGetNoNsProp(xmlNode* node, char* name)
+ cdef char* xmlGetNsProp(xmlNode* node, char* name, char* nameSpace)
+ cdef void xmlSetProp(xmlNode* node, char* name, char* value)
+ cdef void xmlDocDumpMemory(xmlDoc* cur,
+ char** mem,
+ int* size)
+ cdef void xmlUnlinkNode(xmlNode* cur)
+ cdef xmlNode* xmlDocSetRootElement(xmlDoc* doc, xmlNode* root)
+ cdef xmlNode* xmlDocGetRootElement(xmlDoc* doc)
+ cdef void xmlSetTreeDoc(xmlNode* tree, xmlDoc* doc)
+ cdef xmlNode* xmlDocCopyNode(xmlNode* node, xmlDoc* doc, int extended)
+ cdef xmlAttr* xmlHasProp(xmlNode* node, char* name)
+ cdef xmlAttr* xmlHasNsProp(xmlNode* node, char* name, char* nameSpace)
+ cdef char* xmlNodeGetContent(xmlNode* cur)
+ cdef xmlNs* xmlSearchNs(xmlDoc* doc, xmlNode* node, char* nameSpace)
+ cdef int xmlIsBlankNode(xmlNode* node)