diff options
| author | faassen <none@none> | 2004-12-10 22:12:56 +0100 |
|---|---|---|
| committer | faassen <none@none> | 2004-12-10 22:12:56 +0100 |
| commit | dbac1f7c6bea3befd6796cee06fee0a620a2ea33 (patch) | |
| tree | ee210b8fd3a04ce4253f3be236b9b50527297163 | |
| parent | c61df7cb8eab5ca7f6467341d8ae2ed4fd585710 (diff) | |
| download | python-lxml-dbac1f7c6bea3befd6796cee06fee0a620a2ea33.tar.gz | |
[svn r90] Add a framework for registering nodes with a registry, for management
of deallocation. No real tests yet, but at least it all compiles.
--HG--
branch : trunk
| -rw-r--r-- | lxmldistutils.py | 59 | ||||
| -rw-r--r-- | setup.py | 19 | ||||
| -rw-r--r-- | src/lxml/nodereg.pxd | 22 | ||||
| -rw-r--r-- | src/lxml/nodereg.pyx | 112 | ||||
| -rw-r--r-- | src/lxml/noderegtest.pyx | 101 | ||||
| -rw-r--r-- | src/lxml/tests/test_nodereg.py | 17 | ||||
| -rw-r--r-- | src/lxml/tree.pxd | 101 |
7 files changed, 430 insertions, 1 deletions
diff --git a/lxmldistutils.py b/lxmldistutils.py new file mode 100644 index 00000000..c9758f09 --- /dev/null +++ b/lxmldistutils.py @@ -0,0 +1,59 @@ +# Based on the version from Pyrex, cleaned up +# Added feature to allow include path option to support pxd files better + +import distutils.command.build_ext +import Pyrex.Compiler.Main +from Pyrex.Compiler.Errors import PyrexError +from distutils.dep_util import newer +import os +import sys + +def replace_suffix(path, new_suffix): + return os.path.splitext(path)[0] + new_suffix + +class build_ext(distutils.command.build_ext.build_ext): + + description = ("compile Pyrex scripts, then build C/C++ extensions " + "(compile/link to build directory)") + + def finalize_options(self): + distutils.command.build_ext.build_ext.finalize_options(self) + + def swig_sources(self, sources): + if not self.extensions: + return + + pyx_sources = [source for source in sources + if source.endswith('.pyx')] + other_sources = [source for source in sources + if not source.endswith('.pyx')] + c_sources = [] + + for pyx in pyx_sources: + # should I raise an exception if it doesn't exist? + if os.path.exists(pyx): + source = pyx + target = replace_suffix(source, '.c') + c_sources.append(target) + if newer(source, target) or self.force: + self.pyrex_compile(source) + return c_sources + other_sources + + def pyrex_compile(self, source): + options = Pyrex.Compiler.Main.CompilationOptions( + show_version=0, + use_listing_file=0, + errors_to_stderr=1, + include_path=self.get_pxd_include_paths(), + c_only=1, + obj_only=1, + output_file=None) + + result = Pyrex.Compiler.Main.compile(source, options) + if result.num_errors <> 0: + sys.exit(1) + + def get_pxd_include_paths(self): + """Override this to return a list of include paths for pyrex. + """ + return [] @@ -9,7 +9,7 @@ from distutils.command.install_lib import install_lib as installcmd from distutils.core import setup from distutils.dist import Distribution from distutils.extension import Extension -from Pyrex.Distutils import build_ext +from lxmldistutils import build_ext # We have to snoop for file types that distutils doesn't copy correctly when # doing a non-build-in-place. @@ -90,6 +90,11 @@ class MyExtBuilder(build_ext): os.path.walk(os.curdir, remove_stale_bytecode, None) build_ext.run(self) + def get_pxd_include_paths(self): + """lxml specific pxd paths. + """ + return ['src/lxml'] + class MyLibInstaller(installcmd): def run(self): installcmd.run(self) @@ -118,6 +123,18 @@ ext_modules = [ libraries=['xml2'], extra_compile_args = ['-w'] ), + Extension('lxml.nodereg', + sources=['src/lxml/nodereg.pyx'], + include_dirs=['/usr/include/libxml2'], + libraries=['xml2'], + extra_compile_args = ['-w'] + ), + Extension('lxml.noderegtest', + sources=['src/lxml/noderegtest.pyx'], + include_dirs=['/usr/include/libxml2'], + libraries=['xml2'], + extra_compile_args = ['-w'] + ), ] setup(name="lxml", diff --git a/src/lxml/nodereg.pxd b/src/lxml/nodereg.pxd new file mode 100644 index 00000000..806d8e64 --- /dev/null +++ b/src/lxml/nodereg.pxd @@ -0,0 +1,22 @@ +from tree cimport xmlNode, xmlDoc +cimport tree + +cdef class NodeRegistry +cdef class NodeProxyBase + +cdef class DocumentProxyBase: + cdef xmlDoc* _c_doc + cdef NodeRegistry _registry + +cdef class NodeProxyBase: + cdef DocumentProxyBase _doc + cdef xmlNode* _c_node + +cdef class NodeRegistry: + cdef object _proxies + + cdef NodeProxyBase getProxy(self, xmlNode* c_node) + cdef void registerProxy(self, NodeProxyBase proxy) + cdef attemptDeallocation(self, xmlNode* c_node) + cdef xmlNode* getDeallocationTop(self, xmlNode* c_node) + cdef int canDeallocateChildren(self, xmlNode* c_node) diff --git a/src/lxml/nodereg.pyx b/src/lxml/nodereg.pyx new file mode 100644 index 00000000..988fc1e5 --- /dev/null +++ b/src/lxml/nodereg.pyx @@ -0,0 +1,112 @@ +from tree cimport xmlNode, xmlDoc +#cimport tree + +import weakref + +#cdef class NodeRegistry +#cdef class NodeProxyBase + +cdef class DocumentProxyBase: + def __init__(self): + self._registry = NodeRegistry() + + def getProxy(self, xmlNode* c_node): + return self._registry.getProxy(c_node) + + def registerProxy(self, NodeProxyBase proxy): + self._registry.registerProxy(proxy) + + def __dealloc__(self): + # if there are no more references to the document, it is safe + # to clean the whole thing up, as all nodes have a reference to + # the document + tree.xmlFreeDoc(self._c_doc) + +cdef class NodeProxyBase: + def __dealloc__(self): + self._doc._registry.attemptDeallocation(self._c_node) + +cdef class NodeRegistry: + """A registry of Python-level proxies for libxml2 nodes. + + All libxml2 nodes that have a Python proxy for them are managed here. + + The idea is that there can only be a single Python proxy for each + libxml2 node. This class tracks these proxies. Whenever a proxy + has no more references to it, Pyrex will call the __dealloc__ method + on it. + + This method will then check whether the underlying libxml2 node + (and its subtree) can be safely garbage collected. + + Garbage collection of the underlying C-level structure is only + safe if: + + * the top of the C-level tree is not connected to anything, such + as being part of a larger tree. + + * there is no node proxy pointing to any part of the tree. + + The proxies themselves need to be weak-referenceable, as the + mapping in the registry will have to consist of weak references. + This way, a node being registered in the registry does not count + as something that stops the node from being deallocated. + """ + def __init__(self): + self._proxies = weakref.WeakValueDictionary() + + cdef NodeProxyBase getProxy(self, xmlNode* c_node): + """Given an xmlNode, return node proxy, or None if no proxy yet. + """ + return self._proxies.get(<int>c_node, None) + + cdef void registerProxy(self, NodeProxyBase proxy): + """Register a proxy with the registry. + """ + cdef xmlNode* c_node + c_node = proxy._c_node + assert not self._proxies.has_key[<int>c_node] + self._proxies[<int>c_node] = proxy + + cdef attemptDeallocation(self, xmlNode* c_node): + """Attempt deallocation of c_node (or higher up in tree). + """ + cdef xmlNode* c_top + c_top = self.getDeallocationTop(c_node) + if c_top is not NULL: + tree.xmlFreeNode(c_top) + + cdef xmlNode* getDeallocationTop(self, xmlNode* c_node): + """Return the top of the tree that can be deallocated, or NULL. + """ + cdef xmlNode* c_current + c_current = c_node.parent + while c_current is not NULL: + c_current = c_current.parent + # if we're still attached to the document, don't deallocate + if c_current.type == tree.XML_DOCUMENT_NODE: + return NULL + # otherwise, see whether we have children to deallocate + if self.canDeallocateChildren(c_current): + return c_current + else: + return NULL + + cdef int canDeallocateChildren(self, xmlNode* c_node): + # the current implementation is inefficient as it does a + # tree traversal to find out whether there are any node proxies + # we could improve this by a smarter datastructure + # XXX should handle attribute nodes and other things we don't reach + cdef xmlNode* c_current + c_current = c_node.children + proxies = self._proxies + while c_current is not NULL: + id = <int>c_node + if proxies.has_key(id): + return 0 + if not self.canDeallocateChildren(c_current): + return 0 + c_current = c_current.next + # apparently we can deallocate all subnodes + return 1 + diff --git a/src/lxml/noderegtest.pyx b/src/lxml/noderegtest.pyx new file mode 100644 index 00000000..29f376e4 --- /dev/null +++ b/src/lxml/noderegtest.pyx @@ -0,0 +1,101 @@ +# this is a test module which sets up some concrete classes and +# factories so it is possible to test the use of the nodereg module + +from tree cimport xmlNode, xmlDoc +cimport tree + +cdef extern from "libxml/parser.h": + cdef xmlDoc* xmlParseFile(char* filename) + cdef xmlDoc* xmlParseDoc(char* cur) + +import nodereg +cimport nodereg + +cdef class DocumentBase(nodereg.DocumentProxyBase): + property documentElement: + def __get__(self): + cdef xmlNode* c_node + c_node = self._c_doc.children + while c_node is not NULL: + if c_node.type == tree.XML_ELEMENT_NODE: + return _elementFactory(self, c_node) + c_node = c_node.next + return None + +class Document(DocumentBase): + __slots__ = ['__weakref__'] + +cdef DocumentBase _documentFactory(xmlDoc* c_doc): + cdef DocumentBase doc + doc = Document() + doc._c_doc = c_doc + return doc + +cdef object _nodeFactory(DocumentBase doc, xmlNode* c_node): + if c_node is NULL: + return None + elif c_node.type == tree.XML_ELEMENT_NODE: + return _elementFactory(doc, c_node) + elif c_node.type == tree.XML_DOCUMENT_NODE: + return doc + +cdef class Node(nodereg.NodeProxyBase): + + property parentNode: + def __get__(self): + return _nodeFactory(self._doc, self._c_node.parent) + + property firstChild: + def __get__(self): + return _nodeFactory(self._doc, self._c_node.children) + + property lastChild: + def __get__(self): + return _nodeFactory(self._doc, self._c_node.last) + + property previousSibling: + def __get__(self): + return _nodeFactory(self._doc, self._c_node.prev) + + property nextSibling: + def __get__(self): + return _nodeFactory(self._doc, self._c_node.next) + +cdef class ElementBase(Node): + property nodeName: + def __get__(self): + if self.prefix is None: + return self.localName + else: + return self.prefix + ':' + self.localName + + property localName: + def __get__(self): + return unicode(self._c_node.name, 'UTF-8') + + property prefix: + def __get__(self): + if self._c_node.ns is NULL or self._c_node.ns.prefix is NULL: + return None + return unicode(self._c_node.ns.prefix, 'UTF-8') + +class Element(ElementBase): + __slots__ = ['__weakref__'] + +cdef ElementBase _elementFactory(DocumentBase doc, xmlNode* c_node): + cdef ElementBase result + result = doc._registry.getProxy(c_node) + if result is not None: + return result + result = Element() + result._doc = doc + result._c_node = c_node + doc._registry.registerProxy(result) + return result + +def makeDocument(text): + """Construct a document from some xml. + """ + cdef xmlDoc* c_doc + c_doc = xmlParseDoc(text) + return _documentFactory(c_doc) diff --git a/src/lxml/tests/test_nodereg.py b/src/lxml/tests/test_nodereg.py new file mode 100644 index 00000000..0c4db384 --- /dev/null +++ b/src/lxml/tests/test_nodereg.py @@ -0,0 +1,17 @@ +import unittest + +#from lxml.nodereg import Element, ElementTree, SubElement, XML +from lxml import noderegtest + +class NodeRegTestCase(unittest.TestCase): + def test_foo(self): + doc = noderegtest.makeDocument('<foo><bar/></foo>') + + +def test_suite(): + suite = unittest.TestSuite() + suite.addTests([unittest.makeSuite(NodeRegTestCase)]) + return suite + +if __name__ == '__main__': + unittest.main() diff --git a/src/lxml/tree.pxd b/src/lxml/tree.pxd new file mode 100644 index 00000000..627c7fd1 --- /dev/null +++ b/src/lxml/tree.pxd @@ -0,0 +1,101 @@ +cdef extern from "libxml/tree.h": + + ctypedef enum xmlElementType: + XML_ELEMENT_NODE= 1 + XML_ATTRIBUTE_NODE= 2 + XML_TEXT_NODE= 3 + XML_CDATA_SECTION_NODE= 4 + XML_ENTITY_REF_NODE= 5 + XML_ENTITY_NODE= 6 + XML_PI_NODE= 7 + XML_COMMENT_NODE= 8 + XML_DOCUMENT_NODE= 9 + XML_DOCUMENT_TYPE_NODE= 10 + XML_DOCUMENT_FRAG_NODE= 11 + XML_NOTATION_NODE= 12 + XML_HTML_DOCUMENT_NODE= 13 + XML_DTD_NODE= 14 + XML_ELEMENT_DECL= 15 + XML_ATTRIBUTE_DECL= 16 + XML_ENTITY_DECL= 17 + XML_NAMESPACE_DECL= 18 + XML_XINCLUDE_START= 19 + XML_XINCLUDE_END= 20 + + ctypedef struct xmlDoc + ctypedef struct xmlAttr + + ctypedef struct xmlNs: + char* href + char* prefix + + ctypedef struct xmlNode: + xmlElementType type + char *name + xmlNode *children + xmlNode *last + xmlNode *parent + xmlNode *next + xmlNode *prev + xmlDoc *doc + char *content + xmlAttr* properties + xmlNs* ns + + ctypedef struct xmlDoc: + xmlElementType type + char *name + xmlNode *children + xmlNode *last + xmlNode *parent + xmlNode *next + xmlNode *prev + xmlDoc *doc + + ctypedef struct xmlAttr: + xmlElementType type + char* name + xmlNode* children + xmlNode* last + xmlNode* parent + xmlNode* next + xmlNode* prev + xmlDoc* doc + xmlNs* ns + + ctypedef struct xmlElement: + xmlElementType type + char* name + xmlNode* children + xmlNode* last + xmlNode* parent + xmlNode* next + xmlNode* prev + xmlDoc* doc + + cdef void xmlFreeDoc(xmlDoc *cur) + cdef void xmlFreeNode(xmlNode* cur) + cdef void xmlFree(char* buf) + + cdef xmlNode* xmlNewNode(xmlNs* ns, char* name) + cdef xmlNode* xmlAddChild(xmlNode* parent, xmlNode* cur) + cdef xmlNode* xmlNewDocNode(xmlDoc* doc, xmlNs* ns, + char* name, char* content) + cdef xmlDoc* xmlNewDoc(char* version) + cdef xmlAttr* xmlNewProp(xmlNode* node, char* name, char* value) + cdef char* xmlGetNoNsProp(xmlNode* node, char* name) + cdef char* xmlGetNsProp(xmlNode* node, char* name, char* nameSpace) + cdef void xmlSetProp(xmlNode* node, char* name, char* value) + cdef void xmlDocDumpMemory(xmlDoc* cur, + char** mem, + int* size) + cdef void xmlUnlinkNode(xmlNode* cur) + cdef xmlNode* xmlDocSetRootElement(xmlDoc* doc, xmlNode* root) + cdef xmlNode* xmlDocGetRootElement(xmlDoc* doc) + cdef void xmlSetTreeDoc(xmlNode* tree, xmlDoc* doc) + cdef xmlNode* xmlDocCopyNode(xmlNode* node, xmlDoc* doc, int extended) + cdef xmlAttr* xmlHasProp(xmlNode* node, char* name) + cdef xmlAttr* xmlHasNsProp(xmlNode* node, char* name, char* nameSpace) + cdef char* xmlNodeGetContent(xmlNode* cur) + cdef xmlNs* xmlSearchNs(xmlDoc* doc, xmlNode* node, char* nameSpace) + cdef int xmlIsBlankNode(xmlNode* node) |
