diff options
| author | smerten <smerten@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2013-11-03 11:38:09 +0000 |
|---|---|---|
| committer | smerten <smerten@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2013-11-03 11:38:09 +0000 |
| commit | 2f58c894e72941337daf7f2deb0e82f7e5d59dfc (patch) | |
| tree | cb65e094034225cc68cbd3eb611ee3e6ae379470 /sandbox/odf2docutils/odf2docutilslib/python.py | |
| parent | 15ffc12c1ac841a82e63078f220f4adb6838b407 (diff) | |
| download | docutils-2f58c894e72941337daf7f2deb0e82f7e5d59dfc.tar.gz | |
Complete refactoring:
* Standard docutils infrastructure is used
* There is a Python based approach besides the XSLT based approach
* XSLT and Python based approach are implemented as standard Docutils
reader / parser / writer
Python implementation has some failing tests because Python
implementation is not yet as mature as XSLT implementation.
XSLT implementation fails for one test which is beyond the
capabilities of XSLT.
git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk@7724 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
Diffstat (limited to 'sandbox/odf2docutils/odf2docutilslib/python.py')
| -rw-r--r-- | sandbox/odf2docutils/odf2docutilslib/python.py | 325 |
1 files changed, 325 insertions, 0 deletions
diff --git a/sandbox/odf2docutils/odf2docutilslib/python.py b/sandbox/odf2docutils/odf2docutilslib/python.py new file mode 100644 index 000000000..76ca1a7ff --- /dev/null +++ b/sandbox/odf2docutils/odf2docutilslib/python.py @@ -0,0 +1,325 @@ +# Copyright (C) 2013 Stefan Merten + +# This file is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published +# by the Free Software Foundation; either version 2 of the License, +# or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +# 02111-1307, USA. + +""" +Do conversion by using Python. +""" + +__docformat__ = 'reStructuredText' # Formatted to be rendered by epydoc + +############################################################################### +############################################################################### +# Import + +import os.path + +import docutils.parsers, docutils.writers, docutils.nodes + +from lxml import etree + +############################################################################### +############################################################################### +# Constants + +############################################################################### +############################################################################### +# Variables + +############################################################################### +############################################################################### +# Functions + +def elem2PrefixName(elem): + """ + :Parameters: + + elem : etree._Element + The root element of the part of the input document to be traversed. + + :rtype: ( unicode | None, unicode ) + :return: Return namespace prefix and localname of `elem`. Namespace + prefix may be ``None`` if no or unknown namespace. + """ + qName = etree.QName(elem) + prefix = None + # elem.prefix would also work for lxml but using the namespace is saver + if qName.namespace: + prefix = Parser.ns2Prefix.get(qName.namespace, None) + return ( prefix, qName.localname ) + +############################################################################### +############################################################################### +# Classes + +class Parser(docutils.parsers.Parser): + """ + Parse the input file and translate it to the output file by Python. + """ + + ns2Prefix = { + "http://www.w3.org/2001/xml-events": u'dom', + "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0": u'draw', + "urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0": u'fo', + "urn:oasis:names:tc:opendocument:xmlns:presentation:1.0": u'presentation', + "urn:oasis:names:tc:opendocument:xmlns:script:1.0": u'script', + "urn:oasis:names:tc:opendocument:xmlns:smil-compatible:1.0": u'smil', + "urn:oasis:names:tc:opendocument:xmlns:style:1.0": u'style', + "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0": u'svg', + "urn:oasis:names:tc:opendocument:xmlns:table:1.0": u'table', + "urn:oasis:names:tc:opendocument:xmlns:text:1.0": u'text', + "http://www.w3.org/1999/xlink": u'xlink', + "urn:oasis:names:tc:opendocument:xmlns:office:1.0": u'office', + } + """Map namespace tag name to namespace URI.""" + + def parse(self, inputstring, document): + [ etree.register_namespace(prefix, uri) + for ( uri, prefix ) in self.ns2Prefix.items() ] + inDoc = etree.fromstring(inputstring) + self.walk(inDoc, OdfVisitor(document)) + + def walk(self, elem, visitor): + """ + Traverse the element tree and create the result. + + :Parameters: + + elem : etree._Element + The root element of the part of the input document to be traversed. + + visitor : OdfVisitor + The visitor to use. + + :return: ``True`` if the traversal should be stopped. + :rtype: bool + """ + stop = False + skipDeparture = False + someChildren = None + try: + try: + visitor.visit(elem) + except docutils.nodes.SkipNode: + return stop + except docutils.nodes.SkipDeparture: + skipDeparture = True + except SomeChildren, e: + someChildren = [ ( unicode(prefix), unicode(tag) ) + for ( prefix, tag ) in e.tags ] + try: + for child in elem: + if (someChildren is not None + and elem2PrefixName(child) not in someChildren): + continue + if self.walk(child, visitor): + stop = True + break + except docutils.nodes.SkipSiblings: + pass + except docutils.nodes.SkipChildren: + pass + except docutils.nodes.StopTraversal: + stop = True + if not skipDeparture: + visitor.depart(elem) + return stop + +############################################################################### + +class SomeChildren(Exception): + """ + Thrown to process only some children of the given node. + """ + + def __init__(self, tags): + self.tags = tags + """ + :type: ( ( str, str ), ... ) + + List of children tags to process. A tag is given as a pair of namespace + prefix and local name. + """ + +############################################################################### + +class OdfVisitor(object): + """ + Visitor class for visiting an ODF element tree. + + See `event_prefix_tag` for a description of the methods in this visitor. + """ + + def event_prefix_tag(self, elem): + """ + Dummy method for documentation. + + Vistor methods are named *event*\_\ *prefix*\_\ *tag* where *event* is + either ``visit`` or ``depart``, *prefix* is the namespace prefix and + *tag* is the local name of the tag with hyphens removed. + + :Parameters: + + elem : etree._Element + The element to process. + + :except docutils.nodes.TreePruningException: + Thrown to control the behavior of the caller. + + :except SomeChildren: + Thrown to limit the children processed. + + :rtype: void + + """ + raise NotImplementedError("'event_prefix_tag' is just for documentation") + + def __init__(self, document): + self.document = document + """ + :type: docutils.nodes.document + + The target document. + """ + self.stack = [ document ] + """ + :type: [ docutils.nodes.Node, ... ] + + The stack of current nodes. The last one is the most recent current + node. + """ + + def applyMethod(self, elem, event): + """ + Find and apply method. + + :Parameters: + + elem : lxml.etree._Element + The element to apply the method to. + + event : str + The event to apply. + """ + ( prefix, name ) = elem2PrefixName(elem) + name = name.replace("-", "") + methodName = "_".join(( event, prefix, name )) + method = getattr(self, methodName, self.visitDefault) + return method(elem) + + def visit(self, elem): + """ + Visit an element. + + :Parameters: + + elem : lxml.etree._Element + The element to visit. + """ + self.applyMethod(elem, 'visit') + + def depart(self, elem): + """ + Depart an element. + + :Parameters: + + elem : lxml.etree._Element + The element to depart. + """ + self.applyMethod(elem, 'depart') + + def visitDefault(self, elem): + """ + Used for elements without specific visit method. + """ + pass + + def departDefault(self, elem): + """ + Used for elements without specific depart method. + """ + pass + + def push(self, node): + """ + Add a node to the current node and make the node the current node. + + :Parameters: + + node : docutils.nodes.Node + The new node to be made current. + + :return: The former current node which is now the parent. + :rtype: docutils.nodes.Node + """ + parent = self.stack[-1] + assert isinstance(parent, docutils.nodes.Element), \ + "Can not push to '%s'" % ( type(parent), ) + parent.append(node) + self.stack.append(node) + return parent + + def pop(self): + """ + Remove current node and make its parent the new current node. + + :return: The former current node which is now a child of the current + node. + :rtype: docutils.nodes.Node + """ + return self.stack.pop() + + ########################################################################### + # Presentation + + def visit_draw_page(self, elem): + self.push(docutils.nodes.section()) + + def depart_draw_page(self, elem): + self.pop() + + ########################################################################### + # Lists + + def visit_text_list(self, elem): + self.push(docutils.nodes.bullet_list(bullet="*")) + raise SomeChildren(( ( "text", "list-item" ), )) + + def depart_text_list(self, elem): + self.pop() + + def visit_text_listitem(self, elem): + self.push(docutils.nodes.list_item()) + raise SomeChildren(( ( "text", "list" ), ( "text", "p" ), )) + + def depart_text_listitem(self, elem): + self.pop() + + ########################################################################### + # Text + + def visit_text_p(self, elem): + para = docutils.nodes.paragraph() + if elem.text is not None: + para.append(docutils.nodes.Text(elem.text)) + self.push(para) + # TODO tails of embedded inline elements must be considered + + def depart_text_p(self, elem): + self.pop() + + # TODO Implement |
