summaryrefslogtreecommitdiff
path: root/sandbox/odf2docutils/odf2docutilslib/python.py
diff options
context:
space:
mode:
Diffstat (limited to 'sandbox/odf2docutils/odf2docutilslib/python.py')
-rw-r--r--sandbox/odf2docutils/odf2docutilslib/python.py325
1 files changed, 325 insertions, 0 deletions
diff --git a/sandbox/odf2docutils/odf2docutilslib/python.py b/sandbox/odf2docutils/odf2docutilslib/python.py
new file mode 100644
index 000000000..76ca1a7ff
--- /dev/null
+++ b/sandbox/odf2docutils/odf2docutilslib/python.py
@@ -0,0 +1,325 @@
+# Copyright (C) 2013 Stefan Merten
+
+# This file is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published
+# by the Free Software Foundation; either version 2 of the License,
+# or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+# 02111-1307, USA.
+
+"""
+Do conversion by using Python.
+"""
+
+__docformat__ = 'reStructuredText' # Formatted to be rendered by epydoc
+
+###############################################################################
+###############################################################################
+# Import
+
+import os.path
+
+import docutils.parsers, docutils.writers, docutils.nodes
+
+from lxml import etree
+
+###############################################################################
+###############################################################################
+# Constants
+
+###############################################################################
+###############################################################################
+# Variables
+
+###############################################################################
+###############################################################################
+# Functions
+
+def elem2PrefixName(elem):
+ """
+ :Parameters:
+
+ elem : etree._Element
+ The root element of the part of the input document to be traversed.
+
+ :rtype: ( unicode | None, unicode )
+ :return: Return namespace prefix and localname of `elem`. Namespace
+ prefix may be ``None`` if no or unknown namespace.
+ """
+ qName = etree.QName(elem)
+ prefix = None
+ # elem.prefix would also work for lxml but using the namespace is saver
+ if qName.namespace:
+ prefix = Parser.ns2Prefix.get(qName.namespace, None)
+ return ( prefix, qName.localname )
+
+###############################################################################
+###############################################################################
+# Classes
+
+class Parser(docutils.parsers.Parser):
+ """
+ Parse the input file and translate it to the output file by Python.
+ """
+
+ ns2Prefix = {
+ "http://www.w3.org/2001/xml-events": u'dom',
+ "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0": u'draw',
+ "urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0": u'fo',
+ "urn:oasis:names:tc:opendocument:xmlns:presentation:1.0": u'presentation',
+ "urn:oasis:names:tc:opendocument:xmlns:script:1.0": u'script',
+ "urn:oasis:names:tc:opendocument:xmlns:smil-compatible:1.0": u'smil',
+ "urn:oasis:names:tc:opendocument:xmlns:style:1.0": u'style',
+ "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0": u'svg',
+ "urn:oasis:names:tc:opendocument:xmlns:table:1.0": u'table',
+ "urn:oasis:names:tc:opendocument:xmlns:text:1.0": u'text',
+ "http://www.w3.org/1999/xlink": u'xlink',
+ "urn:oasis:names:tc:opendocument:xmlns:office:1.0": u'office',
+ }
+ """Map namespace tag name to namespace URI."""
+
+ def parse(self, inputstring, document):
+ [ etree.register_namespace(prefix, uri)
+ for ( uri, prefix ) in self.ns2Prefix.items() ]
+ inDoc = etree.fromstring(inputstring)
+ self.walk(inDoc, OdfVisitor(document))
+
+ def walk(self, elem, visitor):
+ """
+ Traverse the element tree and create the result.
+
+ :Parameters:
+
+ elem : etree._Element
+ The root element of the part of the input document to be traversed.
+
+ visitor : OdfVisitor
+ The visitor to use.
+
+ :return: ``True`` if the traversal should be stopped.
+ :rtype: bool
+ """
+ stop = False
+ skipDeparture = False
+ someChildren = None
+ try:
+ try:
+ visitor.visit(elem)
+ except docutils.nodes.SkipNode:
+ return stop
+ except docutils.nodes.SkipDeparture:
+ skipDeparture = True
+ except SomeChildren, e:
+ someChildren = [ ( unicode(prefix), unicode(tag) )
+ for ( prefix, tag ) in e.tags ]
+ try:
+ for child in elem:
+ if (someChildren is not None
+ and elem2PrefixName(child) not in someChildren):
+ continue
+ if self.walk(child, visitor):
+ stop = True
+ break
+ except docutils.nodes.SkipSiblings:
+ pass
+ except docutils.nodes.SkipChildren:
+ pass
+ except docutils.nodes.StopTraversal:
+ stop = True
+ if not skipDeparture:
+ visitor.depart(elem)
+ return stop
+
+###############################################################################
+
+class SomeChildren(Exception):
+ """
+ Thrown to process only some children of the given node.
+ """
+
+ def __init__(self, tags):
+ self.tags = tags
+ """
+ :type: ( ( str, str ), ... )
+
+ List of children tags to process. A tag is given as a pair of namespace
+ prefix and local name.
+ """
+
+###############################################################################
+
+class OdfVisitor(object):
+ """
+ Visitor class for visiting an ODF element tree.
+
+ See `event_prefix_tag` for a description of the methods in this visitor.
+ """
+
+ def event_prefix_tag(self, elem):
+ """
+ Dummy method for documentation.
+
+ Vistor methods are named *event*\_\ *prefix*\_\ *tag* where *event* is
+ either ``visit`` or ``depart``, *prefix* is the namespace prefix and
+ *tag* is the local name of the tag with hyphens removed.
+
+ :Parameters:
+
+ elem : etree._Element
+ The element to process.
+
+ :except docutils.nodes.TreePruningException:
+ Thrown to control the behavior of the caller.
+
+ :except SomeChildren:
+ Thrown to limit the children processed.
+
+ :rtype: void
+
+ """
+ raise NotImplementedError("'event_prefix_tag' is just for documentation")
+
+ def __init__(self, document):
+ self.document = document
+ """
+ :type: docutils.nodes.document
+
+ The target document.
+ """
+ self.stack = [ document ]
+ """
+ :type: [ docutils.nodes.Node, ... ]
+
+ The stack of current nodes. The last one is the most recent current
+ node.
+ """
+
+ def applyMethod(self, elem, event):
+ """
+ Find and apply method.
+
+ :Parameters:
+
+ elem : lxml.etree._Element
+ The element to apply the method to.
+
+ event : str
+ The event to apply.
+ """
+ ( prefix, name ) = elem2PrefixName(elem)
+ name = name.replace("-", "")
+ methodName = "_".join(( event, prefix, name ))
+ method = getattr(self, methodName, self.visitDefault)
+ return method(elem)
+
+ def visit(self, elem):
+ """
+ Visit an element.
+
+ :Parameters:
+
+ elem : lxml.etree._Element
+ The element to visit.
+ """
+ self.applyMethod(elem, 'visit')
+
+ def depart(self, elem):
+ """
+ Depart an element.
+
+ :Parameters:
+
+ elem : lxml.etree._Element
+ The element to depart.
+ """
+ self.applyMethod(elem, 'depart')
+
+ def visitDefault(self, elem):
+ """
+ Used for elements without specific visit method.
+ """
+ pass
+
+ def departDefault(self, elem):
+ """
+ Used for elements without specific depart method.
+ """
+ pass
+
+ def push(self, node):
+ """
+ Add a node to the current node and make the node the current node.
+
+ :Parameters:
+
+ node : docutils.nodes.Node
+ The new node to be made current.
+
+ :return: The former current node which is now the parent.
+ :rtype: docutils.nodes.Node
+ """
+ parent = self.stack[-1]
+ assert isinstance(parent, docutils.nodes.Element), \
+ "Can not push to '%s'" % ( type(parent), )
+ parent.append(node)
+ self.stack.append(node)
+ return parent
+
+ def pop(self):
+ """
+ Remove current node and make its parent the new current node.
+
+ :return: The former current node which is now a child of the current
+ node.
+ :rtype: docutils.nodes.Node
+ """
+ return self.stack.pop()
+
+ ###########################################################################
+ # Presentation
+
+ def visit_draw_page(self, elem):
+ self.push(docutils.nodes.section())
+
+ def depart_draw_page(self, elem):
+ self.pop()
+
+ ###########################################################################
+ # Lists
+
+ def visit_text_list(self, elem):
+ self.push(docutils.nodes.bullet_list(bullet="*"))
+ raise SomeChildren(( ( "text", "list-item" ), ))
+
+ def depart_text_list(self, elem):
+ self.pop()
+
+ def visit_text_listitem(self, elem):
+ self.push(docutils.nodes.list_item())
+ raise SomeChildren(( ( "text", "list" ), ( "text", "p" ), ))
+
+ def depart_text_listitem(self, elem):
+ self.pop()
+
+ ###########################################################################
+ # Text
+
+ def visit_text_p(self, elem):
+ para = docutils.nodes.paragraph()
+ if elem.text is not None:
+ para.append(docutils.nodes.Text(elem.text))
+ self.push(para)
+ # TODO tails of embedded inline elements must be considered
+
+ def depart_text_p(self, elem):
+ self.pop()
+
+ # TODO Implement