summaryrefslogtreecommitdiff
path: root/docutils/readers/python/moduleparser.py
diff options
context:
space:
mode:
authorgoodger <goodger@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2002-12-05 02:26:03 +0000
committergoodger <goodger@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2002-12-05 02:26:03 +0000
commit9635f4c1c9ba56761da76c853eaa53bde44655ca (patch)
tree0466e300b88b671455b0d82c1d6a04ab5077c830 /docutils/readers/python/moduleparser.py
parenta692660f038f4d089c877cc1d98d1654d72d0f3a (diff)
downloaddocutils-9635f4c1c9ba56761da76c853eaa53bde44655ca.tar.gz
Parser for Python modules
git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@990 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
Diffstat (limited to 'docutils/readers/python/moduleparser.py')
-rw-r--r--docutils/readers/python/moduleparser.py244
1 files changed, 244 insertions, 0 deletions
diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py
new file mode 100644
index 000000000..a4ef25298
--- /dev/null
+++ b/docutils/readers/python/moduleparser.py
@@ -0,0 +1,244 @@
+# Author: David Goodger
+# Contact: goodger@users.sourceforge.net
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+"""
+Parser for Python modules.
+
+Ideas:
+
+* Tokenize the module in parallel to extract initial values, comments, etc.
+
+* Merge the compiler & tokenize output such that the raw text hangs off of
+ nodes? Especially assignment expressions (RHS).
+
+"""
+
+__docformat__ = 'reStructuredText'
+
+import sys
+import compiler
+import compiler.ast
+import compiler.visitor
+from compiler.consts import OP_ASSIGN
+from types import StringType, UnicodeType
+
+
+def parse_module(module_text, filename):
+ ast = compiler.parse(module_text)
+ visitor = ModuleVisitor(filename)
+ compiler.walk(ast, visitor, walker=visitor)
+ return visitor.module
+
+
+class ModuleVisitor(compiler.visitor.ASTVisitor):
+
+ def __init__(self, filename):
+ compiler.visitor.ASTVisitor.__init__(self)
+ self.filename = filename
+ self.module = None
+ self.context = []
+ self.documentable = None
+
+ def default(self, node, *args):
+ self.documentable = None
+ #print 'in default (%s)' % node.__class__.__name__
+ #compiler.visitor.ASTVisitor.default(self, node, *args)
+
+ def default_ignore(self, node, *args):
+ #print 'in default_ignore (%s)' % node.__class__.__name__
+ compiler.visitor.ASTVisitor.default(self, node, *args)
+
+ def visitModule(self, node):
+ #print dir(node)
+ self.module = module = Module(node, self.filename)
+ if node.doc is not None:
+ module.append(Docstring(node, node.doc))
+ self.context.append(module)
+ self.documentable = module
+ self.visit(node.node)
+ self.context.pop()
+
+ def visitStmt(self, node):
+ self.default_ignore(node)
+
+ def visitDiscard(self, node):
+ if self.documentable:
+ self.visit(node.expr)
+
+ def visitConst(self, node):
+ if self.documentable:
+ if type(node.value) in (StringType, UnicodeType):
+ self.documentable.append(Docstring(node, node.value))
+ else:
+ self.documentable = None
+
+ def visitImport(self, node):
+ self.context[-1].append(Import(node, node.names))
+ self.documentable = None
+
+ def visitFrom(self, node):
+ self.context[-1].append(
+ Import(node, node.names, from_name=node.modname))
+ self.documentable = None
+
+ def visitAssign(self, node):
+ visitor = AssignmentVisitor()
+ compiler.walk(node, visitor, walker=visitor)
+ if visitor.attributes:
+ self.context[-1].extend(visitor.attributes)
+ if len(visitor.attributes) == 1:
+ self.documentable = visitor.attributes[0]
+ else:
+ self.documentable = None
+
+
+class AssignmentVisitor(compiler.visitor.ASTVisitor):
+
+ def __init__(self):
+ compiler.visitor.ASTVisitor.__init__(self)
+ self.attributes = []
+
+ def default(self, node, *args):
+ pass
+
+ def visitAssign(self, node):
+ compiler.visitor.ASTVisitor.default(self, node)
+
+ def visitAssName(self, node):
+ self.attributes.append(Attribute(node, node.name))
+
+ def get_rhs(self, node):
+ return "'TBD'"
+
+
+class Node: # (compiler.ast.Node)
+
+ def __init__(self, node):
+ self.children = []
+ """List of child nodes."""
+
+ self.lineno = node.lineno
+ """Line number of this node (or ``None``)."""
+
+ def __str__(self, indent=' ', level=0):
+ return ''.join(['%s%s\n' % (indent * level, repr(self))] +
+ [child.__str__(indent, level+1)
+ for child in self.children])
+
+ def __repr__(self):
+ parts = [self.__class__.__name__]
+ for name, value in self.attlist():
+ parts.append('%s="%s"' % (name, value))
+ return '<%s>' % ' '.join(parts)
+
+ def attlist(self, **atts):
+ if self.lineno is not None:
+ atts['lineno'] = self.lineno
+ attlist = atts.items()
+ attlist.sort()
+ return attlist
+
+ def append(self, node):
+ self.children.append(node)
+
+ def extend(self, node_list):
+ self.children.extend(node_list)
+
+
+class Module(Node):
+
+ def __init__(self, node, filename):
+ Node.__init__(self, node)
+ self.filename = filename
+
+ def attlist(self):
+ return Node.attlist(self, filename=self.filename)
+
+
+class Docstring(Node):
+
+ def __init__(self, node, text):
+ Node.__init__(self, node)
+ self.text = trim_docstring(text)
+
+ def __str__(self, indent=' ', level=0):
+ prefix = indent * (level + 1)
+ text = '\n'.join([prefix + line for line in self.text.splitlines()])
+ return Node.__str__(self, indent, level) + text + '\n'
+
+
+class Import(Node):
+
+ def __init__(self, node, names, from_name=None):
+ Node.__init__(self, node)
+ self.names = names
+ self.from_name = from_name
+
+ def __str__(self, indent=' ', level=0):
+ prefix = indent * (level + 1)
+ lines = []
+ for name, as in self.names:
+ if as:
+ lines.append('%s%s as %s' % (prefix, name, as))
+ else:
+ lines.append('%s%s' % (prefix, name))
+ text = '\n'.join(lines)
+ return Node.__str__(self, indent, level) + text + '\n'
+
+ def attlist(self):
+ if self.from_name:
+ atts = {'from': self.from_name}
+ else:
+ atts = {}
+ return Node.attlist(self, **atts)
+
+
+class Attribute(Node):
+
+ def __init__(self, node, name):
+ Node.__init__(self, node)
+ self.name = name
+
+ def attlist(self):
+ return Node.attlist(self, name=self.name)
+
+
+class Expression(Node):
+
+ def __init__(self, node, text):
+ Node.__init__(self, node)
+ self.text = text
+
+
+def trim_docstring(text):
+ """
+ Trim indentation and blank lines from docstring text & return it.
+
+ See PEP 257.
+ """
+ if not text:
+ return ''
+ # Convert tabs to spaces (following the normal Python rules)
+ # and split into a list of lines:
+ lines = text.expandtabs().splitlines()
+ # Determine minimum indentation (first line doesn't count):
+ indent = sys.maxint
+ for line in lines[1:]:
+ stripped = line.lstrip()
+ if stripped:
+ indent = min(indent, len(line) - len(stripped))
+ # Remove indentation (first line is special):
+ trimmed = [lines[0].strip()]
+ if indent < sys.maxint:
+ for line in lines[1:]:
+ trimmed.append(line[indent:].rstrip())
+ # Strip off trailing and leading blank lines:
+ while trimmed and not trimmed[-1]:
+ trimmed.pop()
+ while trimmed and not trimmed[0]:
+ trimmed.pop(0)
+ # Return a single string:
+ return '\n'.join(trimmed)