From a692660f038f4d089c877cc1d98d1654d72d0f3a Mon Sep 17 00:00:00 2001 From: goodger Date: Thu, 5 Dec 2002 02:25:35 +0000 Subject: Python Source Reader git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@989 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/readers/python/__init__.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 docutils/readers/python/__init__.py (limited to 'docutils/readers/python') diff --git a/docutils/readers/python/__init__.py b/docutils/readers/python/__init__.py new file mode 100644 index 000000000..a346ce32a --- /dev/null +++ b/docutils/readers/python/__init__.py @@ -0,0 +1,19 @@ +# Author: David Goodger +# Contact: goodger@users.sourceforge.net +# Revision: $Revision$ +# Date: $Date$ +# Copyright: This module has been placed in the public domain. + +""" +This package contains the Python Source Reader modules. +""" + +__docformat__ = 'reStructuredText' + + +import sys +import docutils.readers + + +class Reader(docutils.readers.Reader): + pass -- cgit v1.2.1 From 9635f4c1c9ba56761da76c853eaa53bde44655ca Mon Sep 17 00:00:00 2001 From: goodger Date: Thu, 5 Dec 2002 02:26:03 +0000 Subject: Parser for Python modules git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@990 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/readers/python/moduleparser.py | 244 ++++++++++++++++++++++++++++++++ 1 file changed, 244 insertions(+) create mode 100644 docutils/readers/python/moduleparser.py (limited to 'docutils/readers/python') diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py new file mode 100644 index 000000000..a4ef25298 --- /dev/null +++ b/docutils/readers/python/moduleparser.py @@ -0,0 +1,244 @@ +# Author: David Goodger +# Contact: goodger@users.sourceforge.net +# Revision: $Revision$ +# Date: $Date$ +# Copyright: This module has been placed in the public domain. + +""" +Parser for Python modules. + +Ideas: + +* Tokenize the module in parallel to extract initial values, comments, etc. + +* Merge the compiler & tokenize output such that the raw text hangs off of + nodes? Especially assignment expressions (RHS). + +""" + +__docformat__ = 'reStructuredText' + +import sys +import compiler +import compiler.ast +import compiler.visitor +from compiler.consts import OP_ASSIGN +from types import StringType, UnicodeType + + +def parse_module(module_text, filename): + ast = compiler.parse(module_text) + visitor = ModuleVisitor(filename) + compiler.walk(ast, visitor, walker=visitor) + return visitor.module + + +class ModuleVisitor(compiler.visitor.ASTVisitor): + + def __init__(self, filename): + compiler.visitor.ASTVisitor.__init__(self) + self.filename = filename + self.module = None + self.context = [] + self.documentable = None + + def default(self, node, *args): + self.documentable = None + #print 'in default (%s)' % node.__class__.__name__ + #compiler.visitor.ASTVisitor.default(self, node, *args) + + def default_ignore(self, node, *args): + #print 'in default_ignore (%s)' % node.__class__.__name__ + compiler.visitor.ASTVisitor.default(self, node, *args) + + def visitModule(self, node): + #print dir(node) + self.module = module = Module(node, self.filename) + if node.doc is not None: + module.append(Docstring(node, node.doc)) + self.context.append(module) + self.documentable = module + self.visit(node.node) + self.context.pop() + + def visitStmt(self, node): + self.default_ignore(node) + + def visitDiscard(self, node): + if self.documentable: + self.visit(node.expr) + + def visitConst(self, node): + if self.documentable: + if type(node.value) in (StringType, UnicodeType): + self.documentable.append(Docstring(node, node.value)) + else: + self.documentable = None + + def visitImport(self, node): + self.context[-1].append(Import(node, node.names)) + self.documentable = None + + def visitFrom(self, node): + self.context[-1].append( + Import(node, node.names, from_name=node.modname)) + self.documentable = None + + def visitAssign(self, node): + visitor = AssignmentVisitor() + compiler.walk(node, visitor, walker=visitor) + if visitor.attributes: + self.context[-1].extend(visitor.attributes) + if len(visitor.attributes) == 1: + self.documentable = visitor.attributes[0] + else: + self.documentable = None + + +class AssignmentVisitor(compiler.visitor.ASTVisitor): + + def __init__(self): + compiler.visitor.ASTVisitor.__init__(self) + self.attributes = [] + + def default(self, node, *args): + pass + + def visitAssign(self, node): + compiler.visitor.ASTVisitor.default(self, node) + + def visitAssName(self, node): + self.attributes.append(Attribute(node, node.name)) + + def get_rhs(self, node): + return "'TBD'" + + +class Node: # (compiler.ast.Node) + + def __init__(self, node): + self.children = [] + """List of child nodes.""" + + self.lineno = node.lineno + """Line number of this node (or ``None``).""" + + def __str__(self, indent=' ', level=0): + return ''.join(['%s%s\n' % (indent * level, repr(self))] + + [child.__str__(indent, level+1) + for child in self.children]) + + def __repr__(self): + parts = [self.__class__.__name__] + for name, value in self.attlist(): + parts.append('%s="%s"' % (name, value)) + return '<%s>' % ' '.join(parts) + + def attlist(self, **atts): + if self.lineno is not None: + atts['lineno'] = self.lineno + attlist = atts.items() + attlist.sort() + return attlist + + def append(self, node): + self.children.append(node) + + def extend(self, node_list): + self.children.extend(node_list) + + +class Module(Node): + + def __init__(self, node, filename): + Node.__init__(self, node) + self.filename = filename + + def attlist(self): + return Node.attlist(self, filename=self.filename) + + +class Docstring(Node): + + def __init__(self, node, text): + Node.__init__(self, node) + self.text = trim_docstring(text) + + def __str__(self, indent=' ', level=0): + prefix = indent * (level + 1) + text = '\n'.join([prefix + line for line in self.text.splitlines()]) + return Node.__str__(self, indent, level) + text + '\n' + + +class Import(Node): + + def __init__(self, node, names, from_name=None): + Node.__init__(self, node) + self.names = names + self.from_name = from_name + + def __str__(self, indent=' ', level=0): + prefix = indent * (level + 1) + lines = [] + for name, as in self.names: + if as: + lines.append('%s%s as %s' % (prefix, name, as)) + else: + lines.append('%s%s' % (prefix, name)) + text = '\n'.join(lines) + return Node.__str__(self, indent, level) + text + '\n' + + def attlist(self): + if self.from_name: + atts = {'from': self.from_name} + else: + atts = {} + return Node.attlist(self, **atts) + + +class Attribute(Node): + + def __init__(self, node, name): + Node.__init__(self, node) + self.name = name + + def attlist(self): + return Node.attlist(self, name=self.name) + + +class Expression(Node): + + def __init__(self, node, text): + Node.__init__(self, node) + self.text = text + + +def trim_docstring(text): + """ + Trim indentation and blank lines from docstring text & return it. + + See PEP 257. + """ + if not text: + return '' + # Convert tabs to spaces (following the normal Python rules) + # and split into a list of lines: + lines = text.expandtabs().splitlines() + # Determine minimum indentation (first line doesn't count): + indent = sys.maxint + for line in lines[1:]: + stripped = line.lstrip() + if stripped: + indent = min(indent, len(line) - len(stripped)) + # Remove indentation (first line is special): + trimmed = [lines[0].strip()] + if indent < sys.maxint: + for line in lines[1:]: + trimmed.append(line[indent:].rstrip()) + # Strip off trailing and leading blank lines: + while trimmed and not trimmed[-1]: + trimmed.pop() + while trimmed and not trimmed[0]: + trimmed.pop(0) + # Return a single string: + return '\n'.join(trimmed) -- cgit v1.2.1 From 241b81278dfcb2caddefea8ce1ed648a48f720d4 Mon Sep 17 00:00:00 2001 From: goodger Date: Sat, 7 Dec 2002 03:13:24 +0000 Subject: update git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1006 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/readers/python/moduleparser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'docutils/readers/python') diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py index a4ef25298..5aab372b2 100644 --- a/docutils/readers/python/moduleparser.py +++ b/docutils/readers/python/moduleparser.py @@ -114,7 +114,7 @@ class AssignmentVisitor(compiler.visitor.ASTVisitor): return "'TBD'" -class Node: # (compiler.ast.Node) +class Node: def __init__(self, node): self.children = [] -- cgit v1.2.1 From b75e46e8a06dd8170c9fbabaee4c4aaa22050e67 Mon Sep 17 00:00:00 2001 From: goodger Date: Thu, 12 Dec 2002 03:26:55 +0000 Subject: Updated. Dead-end with AssignmentVisitor reconstructing expressions. TokenReader seems to be the way to go. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1017 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/readers/python/moduleparser.py | 250 ++++++++++++++++++++++++++++++-- 1 file changed, 238 insertions(+), 12 deletions(-) (limited to 'docutils/readers/python') diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py index 5aab372b2..9ab3eea79 100644 --- a/docutils/readers/python/moduleparser.py +++ b/docutils/readers/python/moduleparser.py @@ -14,6 +14,150 @@ Ideas: * Merge the compiler & tokenize output such that the raw text hangs off of nodes? Especially assignment expressions (RHS). +What I'd like to do is to take a module, read in the text, run it through the +module parser (using compiler.py and tokenize.py) and produce a high-level AST +full of nodes that are interesting from an auto-documentation standpoint. For +example, given this module (x.py):: + + # comment + + '''Docstring''' + + '''Additional docstring''' + + __docformat__ = 'reStructuredText' + + a = 1 + '''Attribute docstring''' + + class C(Super): + + '''C's docstring''' + + class_attribute = 1 + '''class_attribute's docstring''' + + def __init__(self, text=None): + '''__init__'s docstring''' + + self.instance_attribute = (text * 7 + + ' whaddyaknow') + '''instance_attribute's docstring''' + + + def f(x, # parameter x + y=a*5, # parameter y + *args): # parameter args + '''f's docstring''' + return [x + item for item in args] + + f.function_attribute = 1 + '''f.function_attribute's docstring''' + +The module parser should produce a high-level AST, something like this:: + + + + comment + + Docstring + (I'll leave out the lineno's) + Additional docstring + + + 'reStructuredText' + + + 1 + + Attribute docstring + + + C's docstring + + + 1 + + class_attribute's docstring + + + __init__'s docstring + + + (text * 7 + + ' whaddyaknow') + + class_attribute's docstring + + + + + # parameter x + + + a*5 + + # parameter y + + + # parameter args + + f's docstring + + + 1 + + f.function_attribute's docstring + +compiler.parse() provides most of what's needed for this AST. I think that +"tokenize" can be used to get the rest, and all that's left is to hunker down +and figure out how. We can determine the line number from the +compiler.parse() AST, and a get_rhs(lineno) method would provide the rest. + +The Docutils Python reader component will transform this AST into a +Python-specific doctree, and then a `stylist transform`_ would further +transform it into a generic doctree. Namespaces will have to be compiled for +each of the scopes, but I'm not certain at what stage of processing. + +It's very important to keep all docstring processing out of this, so that it's +a completely generic and not tool-specific. + +> Why perform all of those transformations? Why not go from the AST to a +> generic doctree? Or, even from the AST to the final output? + +I want the docutils.readers.python.moduleparser.parse_module() function to +produce a standard documentation-oriented AST that can be used by any tool. +We can develop it together without having to compromise on the rest of our +design (i.e., HappyDoc doesn't have to be made to work like Docutils, and +vice-versa). It would be a higher-level version of what compiler.py provides. + +The Python reader component transforms this generic AST into a Python-specific +doctree (it knows about modules, classes, functions, etc.), but this is +specific to Docutils and cannot be used by HappyDoc or others. The stylist +transform does the final layout, converting Python-specific structures +("class" sections, etc.) into a generic doctree using primitives (tables, +sections, lists, etc.). This generic doctree does *not* know about Python +structures any more. The advantage is that this doctree can be handed off to +any of the output writers to create any output format we like. + +The latter two transforms are separate because I want to be able to have +multiple independent layout styles (multiple runtime-selectable "stylist +transforms"). Each of the existing tools (HappyDoc, pydoc, epydoc, Crystal, +etc.) has its own fixed format. I personally don't like the tables-based +format produced by these tools, and I'd like to be able to customize the +format easily. That's the goal of stylist transforms, which are independent +from the Reader component itself. One stylist transform could produce +HappyDoc-like output, another could produce output similar to module docs in +the Python library reference manual, and so on. + +It's for exactly this reason: + +>> It's very important to keep all docstring processing out of this, so that +>> it's a completely generic and not tool-specific. + +... but it goes past docstring processing. It's also important to keep style +decisions and tool-specific data transforms out of this module parser. + """ __docformat__ = 'reStructuredText' @@ -21,8 +165,10 @@ __docformat__ = 'reStructuredText' import sys import compiler import compiler.ast -import compiler.visitor +import tokenize +import token from compiler.consts import OP_ASSIGN +from compiler.visitor import ASTVisitor from types import StringType, UnicodeType @@ -33,10 +179,10 @@ def parse_module(module_text, filename): return visitor.module -class ModuleVisitor(compiler.visitor.ASTVisitor): +class ModuleVisitor(ASTVisitor): def __init__(self, filename): - compiler.visitor.ASTVisitor.__init__(self) + ASTVisitor.__init__(self) self.filename = filename self.module = None self.context = [] @@ -45,11 +191,11 @@ class ModuleVisitor(compiler.visitor.ASTVisitor): def default(self, node, *args): self.documentable = None #print 'in default (%s)' % node.__class__.__name__ - #compiler.visitor.ASTVisitor.default(self, node, *args) + #ASTVisitor.default(self, node, *args) def default_ignore(self, node, *args): #print 'in default_ignore (%s)' % node.__class__.__name__ - compiler.visitor.ASTVisitor.default(self, node, *args) + ASTVisitor.default(self, node, *args) def visitModule(self, node): #print dir(node) @@ -95,23 +241,66 @@ class ModuleVisitor(compiler.visitor.ASTVisitor): self.documentable = None -class AssignmentVisitor(compiler.visitor.ASTVisitor): +class AssignmentVisitor(ASTVisitor): + + """ + Tried reconstructing expressions (the RHS of assignments) by + visiting the compiler.parse() tree, but a lot of information is + missing, like parenthesis-grouping of expressions. + + Gotta do it by parsing tokens. + """ def __init__(self): - compiler.visitor.ASTVisitor.__init__(self) + ASTVisitor.__init__(self) self.attributes = [] + self.parts = [] def default(self, node, *args): - pass + print >>sys.stderr, '%s not visited!' % node.__class__.__name__ + ASTVisitor.default(self, node) def visitAssign(self, node): - compiler.visitor.ASTVisitor.default(self, node) + ASTVisitor.default(self, node) + self.attributes[-1].append(Expression(node, ''.join(self.parts))) def visitAssName(self, node): self.attributes.append(Attribute(node, node.name)) - def get_rhs(self, node): - return "'TBD'" + def visitAdd(self, node): + ASTVisitor.default(self, node) + self.parts[-2:] = ' + '.join(self.parts[-2:]) + + def visitAnd(self, node): + ASTVisitor.default(self, node) + self.parts.insert(len(self.parts) - 1, ' and ') + + def visitBackquote(self, node): + self.parts.append('`') + ASTVisitor.default(self, node) + self.parts.append('`') + + def visitBitand(self, node): + ASTVisitor.default(self, node) + self.parts.insert(len(self.parts) - 1, ' & ') + + def visitBitor(self, node): + ASTVisitor.default(self, node) + self.parts.insert(len(self.parts) - 1, ' | ') + + def visitBitxor(self, node): + ASTVisitor.default(self, node) + self.parts.insert(len(self.parts) - 1, ' ^ ') + + def visitConst(self, node): + self.parts.append(repr(node.value)) + + def visitConst(self, node): + self.parts.append(repr(node.value)) + + def visitInvert(self, node): + self.parts.append('~ ') + ASTVisitor.default(self, node) class Node: @@ -211,7 +400,44 @@ class Expression(Node): def __init__(self, node, text): Node.__init__(self, node) self.text = text - + + def __str__(self, indent=' ', level=0): + prefix = indent * (level + 1) + return '%s%s%s\n' % (Node.__str__(self, indent, level), + prefix, self.text) + + +class TokenReader: + + def __init__(self, text): + self.text = text + self.lines = text.splitlines(1) + self.generator = tokenize.generate_tokens(iter(self.lines).next) + + def __iter__(self): + return self + + def next(self): + token = self.generator.next() + self.type, self.string, self.start, self.end, self.line = token + return token + + def goto_line(self, lineno): + for token in self: + if self.start[0] >= lineno: + return token + else: + raise IndexError + + def rhs(self, name, lineno): + self.goto_line(lineno) + while self.start[0] == lineno: + if self.type == token.OP and self.string == '=': + break + self.next() + else: + raise IndexError + def trim_docstring(text): """ -- cgit v1.2.1 From 47ff214eaec8f59ac08c78614811e12eb9b06fde Mon Sep 17 00:00:00 2001 From: goodger Date: Sat, 14 Dec 2002 01:38:31 +0000 Subject: making good progress git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1020 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/readers/python/moduleparser.py | 293 ++++++++++++++++++++++---------- 1 file changed, 199 insertions(+), 94 deletions(-) (limited to 'docutils/readers/python') diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py index 9ab3eea79..cbca876a7 100644 --- a/docutils/readers/python/moduleparser.py +++ b/docutils/readers/python/moduleparser.py @@ -12,7 +12,7 @@ Ideas: * Tokenize the module in parallel to extract initial values, comments, etc. * Merge the compiler & tokenize output such that the raw text hangs off of - nodes? Especially assignment expressions (RHS). + nodes. Useful for assignment expressions (RHS). What I'd like to do is to take a module, read in the text, run it through the module parser (using compiler.py and tokenize.py) and produce a high-level AST @@ -79,7 +79,12 @@ The module parser should produce a high-level AST, something like this:: 1 class_attribute's docstring - + + + + + + None __init__'s docstring @@ -109,10 +114,10 @@ The module parser should produce a high-level AST, something like this:: f.function_attribute's docstring -compiler.parse() provides most of what's needed for this AST. I think that -"tokenize" can be used to get the rest, and all that's left is to hunker down -and figure out how. We can determine the line number from the -compiler.parse() AST, and a get_rhs(lineno) method would provide the rest. +compiler.parse() provides most of what's needed for this AST, and "tokenize" +can be used to get the rest. We can determine the line number from the +compiler.parse() AST, and the TokenParser.rhs(lineno) method provides the +rest. The Docutils Python reader component will transform this AST into a Python-specific doctree, and then a `stylist transform`_ would further @@ -174,17 +179,17 @@ from types import StringType, UnicodeType def parse_module(module_text, filename): ast = compiler.parse(module_text) - visitor = ModuleVisitor(filename) + token_parser = TokenParser(module_text) + visitor = ModuleVisitor(filename, token_parser) compiler.walk(ast, visitor, walker=visitor) return visitor.module -class ModuleVisitor(ASTVisitor): +class BaseVisitor(ASTVisitor): - def __init__(self, filename): + def __init__(self, token_parser): ASTVisitor.__init__(self) - self.filename = filename - self.module = None + self.token_parser = token_parser self.context = [] self.documentable = None @@ -193,22 +198,12 @@ class ModuleVisitor(ASTVisitor): #print 'in default (%s)' % node.__class__.__name__ #ASTVisitor.default(self, node, *args) - def default_ignore(self, node, *args): - #print 'in default_ignore (%s)' % node.__class__.__name__ + def default_visit(self, node, *args): + #print 'in default_visit (%s)' % node.__class__.__name__ ASTVisitor.default(self, node, *args) - def visitModule(self, node): - #print dir(node) - self.module = module = Module(node, self.filename) - if node.doc is not None: - module.append(Docstring(node, node.doc)) - self.context.append(module) - self.documentable = module - self.visit(node.node) - self.context.pop() - def visitStmt(self, node): - self.default_ignore(node) +class DocstringVisitor(BaseVisitor): def visitDiscard(self, node): if self.documentable: @@ -221,17 +216,14 @@ class ModuleVisitor(ASTVisitor): else: self.documentable = None - def visitImport(self, node): - self.context[-1].append(Import(node, node.names)) - self.documentable = None + def visitStmt(self, node): + self.default_visit(node) - def visitFrom(self, node): - self.context[-1].append( - Import(node, node.names, from_name=node.modname)) - self.documentable = None + +class AssignmentVisitor(DocstringVisitor): def visitAssign(self, node): - visitor = AssignmentVisitor() + visitor = AttributeVisitor(self.token_parser) compiler.walk(node, visitor, walker=visitor) if visitor.attributes: self.context[-1].extend(visitor.attributes) @@ -241,66 +233,111 @@ class ModuleVisitor(ASTVisitor): self.documentable = None -class AssignmentVisitor(ASTVisitor): +class ModuleVisitor(AssignmentVisitor): - """ - Tried reconstructing expressions (the RHS of assignments) by - visiting the compiler.parse() tree, but a lot of information is - missing, like parenthesis-grouping of expressions. + def __init__(self, filename, token_parser): + AssignmentVisitor.__init__(self, token_parser) + self.filename = filename + self.module = None - Gotta do it by parsing tokens. - """ + def visitModule(self, node): + self.module = module = Module(node, self.filename) + if node.doc is not None: + module.append(Docstring(node, node.doc)) + self.context.append(module) + self.documentable = module + self.visit(node.node) + self.context.pop() - def __init__(self): - ASTVisitor.__init__(self) - self.attributes = [] - self.parts = [] + def visitImport(self, node): + self.context[-1].append(Import(node, node.names)) + self.documentable = None - def default(self, node, *args): - print >>sys.stderr, '%s not visited!' % node.__class__.__name__ - ASTVisitor.default(self, node) + def visitFrom(self, node): + self.context[-1].append( + Import(node, node.names, from_name=node.modname)) + self.documentable = None + + def visitFunction(self, node): + visitor = FunctionVisitor(self.token_parser) + compiler.walk(node, visitor, walker=visitor) + self.context[-1].append(visitor.function) + + +class AttributeVisitor(BaseVisitor): + + def __init__(self, token_parser): + BaseVisitor.__init__(self, token_parser) + self.attributes = [] def visitAssign(self, node): - ASTVisitor.default(self, node) - self.attributes[-1].append(Expression(node, ''.join(self.parts))) + # Don't visit the expression itself, just the attribute nodes: + for child in node.nodes: + self.dispatch(child) + expression_text = self.token_parser.rhs(node.lineno) + expression = Expression(node, expression_text) + for attribute in self.attributes: + attribute.append(expression) def visitAssName(self, node): self.attributes.append(Attribute(node, node.name)) - def visitAdd(self, node): - ASTVisitor.default(self, node) - self.parts[-2:] = ' + '.join(self.parts[-2:]) - - def visitAnd(self, node): - ASTVisitor.default(self, node) - self.parts.insert(len(self.parts) - 1, ' and ') + def visitAssTuple(self, node): + attributes = self.attributes + self.attributes = [] + self.default_visit(node) + names = [attribute.name for attribute in self.attributes] + att_tuple = AttributeTuple(node, names) + att_tuple.lineno = self.attributes[0].lineno + self.attributes = attributes + self.attributes.append(att_tuple) - def visitBackquote(self, node): - self.parts.append('`') - ASTVisitor.default(self, node) - self.parts.append('`') + def visitAssAttr(self, node): + self.default_visit(node, node.attrname) - def visitBitand(self, node): - ASTVisitor.default(self, node) - self.parts.insert(len(self.parts) - 1, ' & ') + def visitGetattr(self, node, suffix): + self.default_visit(node, node.attrname + '.' + suffix) - def visitBitor(self, node): - ASTVisitor.default(self, node) - self.parts.insert(len(self.parts) - 1, ' | ') + def visitName(self, node, suffix): + self.attributes.append(Attribute(node, node.name + '.' + suffix)) - def visitBitxor(self, node): - ASTVisitor.default(self, node) - self.parts.insert(len(self.parts) - 1, ' ^ ') - def visitConst(self, node): - self.parts.append(repr(node.value)) +class FunctionVisitor(DocstringVisitor): - def visitConst(self, node): - self.parts.append(repr(node.value)) + def visitFunction(self, node): + self.function = function = Function(node, node.name) + if node.doc is not None: + function.append(Docstring(node, node.doc)) + self.context.append(function) + self.documentable = function + self.parse_parameter_list(node) + self.visit(node.code) + self.context.pop() - def visitInvert(self, node): - self.parts.append('~ ') - ASTVisitor.default(self, node) + def parse_parameter_list(self, node): + parameters = [] + special = [] + argnames = list(node.argnames) + if node.kwargs: + special.append(ExcessKeywordArguments(node, argnames[-1])) + argnames.pop() + if node.varargs: + special.append(ExcessPositionalArguments(node, argnames[-1])) + argnames.pop() + defaults = list(node.defaults) + defaults = [None] * (len(argnames) - len(defaults)) + defaults + for argname, default in zip(argnames, defaults): + parameter = Parameter(node, argname) + if default: + default_text = self.token_parser.default(node.lineno) + parameter.append(Default(node, default_text)) + parameters.append(parameter) + if parameters or special: + special.reverse() + parameters.extend(special) + parameter_list = ParameterList(node) + parameter_list.extend(parameters) + self.function.append(parameter_list) class Node: @@ -395,6 +432,16 @@ class Attribute(Node): return Node.attlist(self, name=self.name) +class AttributeTuple(Node): + + def __init__(self, node, names): + Node.__init__(self, node) + self.names = names + + def attlist(self): + return Node.attlist(self, names=' '.join(self.names)) + + class Expression(Node): def __init__(self, node, text): @@ -404,40 +451,98 @@ class Expression(Node): def __str__(self, indent=' ', level=0): prefix = indent * (level + 1) return '%s%s%s\n' % (Node.__str__(self, indent, level), - prefix, self.text) + prefix, self.text.encode('unicode-escape')) -class TokenReader: +class Function(Attribute): pass + + +class ParameterList(Node): pass + + +class Parameter(Attribute): pass + + +class ExcessPositionalArguments(Parameter): pass + + +class ExcessKeywordArguments(Parameter): pass + + +class Default(Expression): pass + + +class TokenParser: def __init__(self, text): - self.text = text - self.lines = text.splitlines(1) + self.text = text + '\n\n' + self.lines = self.text.splitlines(1) self.generator = tokenize.generate_tokens(iter(self.lines).next) + self.next() def __iter__(self): return self def next(self): - token = self.generator.next() - self.type, self.string, self.start, self.end, self.line = token - return token + self.token = self.generator.next() + self.type, self.string, self.start, self.end, self.line = self.token + return self.token def goto_line(self, lineno): - for token in self: - if self.start[0] >= lineno: - return token - else: - raise IndexError + while self.start[0] < lineno: + self.next() + return token - def rhs(self, name, lineno): + def rhs(self, lineno): + """ + Return a whitespace-normalized expression string from the right-hand + side of an assignment at line `lineno`. + """ self.goto_line(lineno) - while self.start[0] == lineno: - if self.type == token.OP and self.string == '=': - break + while self.string != '=': self.next() - else: - raise IndexError - + while self.type != token.NEWLINE and self.string != ';': + append = 1 + append_ws = 1 + del_ws = 0 + if self.string == '=': + start_row, start_col = self.end + tokens = [] + last_type = None + last_string = None + backquote = 0 + append = 0 + elif self.string == '.': + del_ws = 1 + append_ws = 0 + elif self.string in ('(', '[', '{'): + append_ws = 0 + if self.string in '([' and (last_type == token.NAME or + last_string in (')', ']', '}')): + del_ws = 1 + elif self.string in (')', ']', '}', ':', ','): + del_ws = 1 + elif self.string == '`': + if backquote: + del_ws = 1 + else: + append_ws = 0 + backquote = not backquote + elif self.type == tokenize.NL: + append = 0 + if append: + if del_ws and tokens and tokens[-1] == ' ': + del tokens[-1] + tokens.append(self.string) + last_type = self.type + last_string = self.string + if append_ws: + tokens.append(' ') + self.next() + self.next() + text = ''.join(tokens) + return text.strip() + def trim_docstring(text): """ -- cgit v1.2.1 From 173c7a539ca4e2b3a9f8c3a06f67d4a868fd850c Mon Sep 17 00:00:00 2001 From: goodger Date: Wed, 18 Dec 2002 01:44:49 +0000 Subject: More progress; functions done. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1027 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/readers/python/moduleparser.py | 197 +++++++++++++++++++++++++------- 1 file changed, 157 insertions(+), 40 deletions(-) (limited to 'docutils/readers/python') diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py index cbca876a7..8dcf432b2 100644 --- a/docutils/readers/python/moduleparser.py +++ b/docutils/readers/python/moduleparser.py @@ -163,6 +163,14 @@ It's for exactly this reason: ... but it goes past docstring processing. It's also important to keep style decisions and tool-specific data transforms out of this module parser. + +Issues +====== + +* At what point should namespaces be computed? Should they be part of the + basic AST produced by the ASTVisitor walk, or generated by another tree + traversal? + """ __docformat__ = 'reStructuredText' @@ -174,7 +182,7 @@ import tokenize import token from compiler.consts import OP_ASSIGN from compiler.visitor import ASTVisitor -from types import StringType, UnicodeType +from types import StringType, UnicodeType, TupleType def parse_module(module_text, filename): @@ -304,7 +312,13 @@ class AttributeVisitor(BaseVisitor): class FunctionVisitor(DocstringVisitor): + in_function = 0 + def visitFunction(self, node): + if self.in_function: + # Don't bother with nested function definitions. + return + self.in_function = 1 self.function = function = Function(node, node.name) if node.doc is not None: function.append(Docstring(node, node.doc)) @@ -326,11 +340,17 @@ class FunctionVisitor(DocstringVisitor): argnames.pop() defaults = list(node.defaults) defaults = [None] * (len(argnames) - len(defaults)) + defaults + function_parameters = self.token_parser.function_parameters( + node.lineno) + #print >>sys.stderr, function_parameters for argname, default in zip(argnames, defaults): - parameter = Parameter(node, argname) + if type(argname) is TupleType: + parameter = ParameterTuple(node, argname) + argname = normalize_parameter_name(argname) + else: + parameter = Parameter(node, argname) if default: - default_text = self.token_parser.default(node.lineno) - parameter.append(Default(node, default_text)) + parameter.append(Default(node, function_parameters[argname])) parameters.append(parameter) if parameters or special: special.reverse() @@ -463,6 +483,12 @@ class ParameterList(Node): pass class Parameter(Attribute): pass +class ParameterTuple(AttributeTuple): + + def attlist(self): + return Node.attlist(self, names=normalize_parameter_name(self.names)) + + class ExcessPositionalArguments(Parameter): pass @@ -502,47 +528,129 @@ class TokenParser: while self.string != '=': self.next() while self.type != token.NEWLINE and self.string != ';': - append = 1 - append_ws = 1 - del_ws = 0 if self.string == '=': - start_row, start_col = self.end - tokens = [] - last_type = None - last_string = None - backquote = 0 - append = 0 - elif self.string == '.': - del_ws = 1 - append_ws = 0 - elif self.string in ('(', '[', '{'): - append_ws = 0 - if self.string in '([' and (last_type == token.NAME or - last_string in (')', ']', '}')): - del_ws = 1 - elif self.string in (')', ']', '}', ':', ','): - del_ws = 1 - elif self.string == '`': - if backquote: - del_ws = 1 - else: - append_ws = 0 - backquote = not backquote - elif self.type == tokenize.NL: - append = 0 - if append: - if del_ws and tokens and tokens[-1] == ' ': - del tokens[-1] - tokens.append(self.string) - last_type = self.type - last_string = self.string - if append_ws: - tokens.append(' ') + self.tokens = [] + self.stack = [] + self._type = None + self._string = None + self._backquote = 0 + else: + self.note_token() self.next() self.next() - text = ''.join(tokens) + text = ''.join(self.tokens) return text.strip() + openers = {')': '(', ']': '[', '}': '{'} + + def note_token(self): + append = 1 + append_ws = 1 + del_ws = 0 + if self.string == '.': + del_ws = 1 + append_ws = 0 + elif self.string in ('(', '[', '{'): + append_ws = 0 + if self.string in '([' and (self._type == token.NAME or + self._string in (')', ']', '}')): + del_ws = 1 + self.stack.append(self.string) + elif self.string in (')', ']', '}'): + del_ws = 1 + assert self.stack[-1] == self.openers[self.string] + self.stack.pop() + elif self.string in (':', ','): + del_ws = 1 + elif self.string == '`': + if self._backquote: + del_ws = 1 + assert self.stack[-1] == self.string + self.stack.pop() + else: + append_ws = 0 + self.stack.append(self.string) + self._backquote = not self._backquote + elif self.type == tokenize.NL: + append = 0 + if append: + if del_ws and self.tokens and self.tokens[-1] == ' ': + del self.tokens[-1] + self.tokens.append(self.string) + self._type = self.type + self._string = self.string + if append_ws: + self.tokens.append(' ') + + def function_parameters(self, lineno): + """ + Return a dictionary mapping parameters to defaults + (whitespace-normalized strings). + """ + self.goto_line(lineno) + while self.string != 'def': + self.next() + while self.string != '(': + self.next() + name = None + default = None + parameter_tuple = None + self.tokens = [] + parameters = {} + self.stack = [self.string] + self.next() + while 1: + if len(self.stack) == 1: + if parameter_tuple: + # Just encountered ")". + #print >>sys.stderr, 'parameter_tuple: %r' % self.tokens + name = ''.join(self.tokens).strip() + self.tokens = [] + parameter_tuple = None + if self.string in (')', ','): + if name: + if self.tokens: + default_text = ''.join(self.tokens).strip() + else: + default_text = None + parameters[name] = default_text + self.tokens = [] + name = None + default = None + if self.string == ')': + break + elif self.type == token.NAME: + if name and default: + self.note_token() + else: + assert name is None, ( + 'token=%r name=%r parameters=%r stack=%r' + % (self.token, name, parameters, self.stack)) + name = self.string + #print >>sys.stderr, 'name=%r' % name + elif self.string == '=': + assert name is not None, 'token=%r' % (self.token,) + assert default is None, 'token=%r' % (self.token,) + assert self.tokens == [], 'token=%r' % (self.token,) + default = 1 + self._type = None + self._string = None + self._backquote = 0 + elif name: + self.note_token() + elif self.string == '(': + parameter_tuple = 1 + self._type = None + self._string = None + self._backquote = 0 + self.note_token() + else: # ignore these tokens: + assert self.string in ('*', '**', '\n'), ( + 'token=%r' % (self.token,)) + else: + self.note_token() + self.next() + return parameters def trim_docstring(text): """ @@ -573,3 +681,12 @@ def trim_docstring(text): trimmed.pop(0) # Return a single string: return '\n'.join(trimmed) + +def normalize_parameter_name(name): + """ + Converts a tuple like ``('a', ('b', 'c'), 'd')`` into ``'(a, (b, c), d)'`` + """ + if type(name) is TupleType: + return '(%s)' % ', '.join([normalize_parameter_name(n) for n in name]) + else: + return name -- cgit v1.2.1 From 34762d707e5fbe502b302027e4318c8301d7a34d Mon Sep 17 00:00:00 2001 From: goodger Date: Thu, 19 Dec 2002 01:08:01 +0000 Subject: Added classes & methods. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1032 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/readers/python/moduleparser.py | 464 +++++++++++++++++++------------- 1 file changed, 277 insertions(+), 187 deletions(-) (limited to 'docutils/readers/python') diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py index 8dcf432b2..dee7810ad 100644 --- a/docutils/readers/python/moduleparser.py +++ b/docutils/readers/python/moduleparser.py @@ -7,17 +7,11 @@ """ Parser for Python modules. -Ideas: - -* Tokenize the module in parallel to extract initial values, comments, etc. - -* Merge the compiler & tokenize output such that the raw text hangs off of - nodes. Useful for assignment expressions (RHS). - -What I'd like to do is to take a module, read in the text, run it through the -module parser (using compiler.py and tokenize.py) and produce a high-level AST -full of nodes that are interesting from an auto-documentation standpoint. For -example, given this module (x.py):: +The `parse_module()` function takes a module's text and file name, runs it +through the module parser (using compiler.py and tokenize.py) and produces a +"module documentation tree": a high-level AST full of nodes that are +interesting from an auto-documentation standpoint. For example, given this +module (x.py):: # comment @@ -54,75 +48,77 @@ example, given this module (x.py):: f.function_attribute = 1 '''f.function_attribute's docstring''' -The module parser should produce a high-level AST, something like this:: +The module parser will produce this module documentation tree:: - + comment - + Docstring - (I'll leave out the lineno's) + Additional docstring - - + + 'reStructuredText' - - + + 1 - + Attribute docstring - - + + C's docstring - - + + 1 - + class_attribute's docstring - - - - - - None - + + __init__'s docstring - - - (text * 7 - + ' whaddyaknow') - - class_attribute's docstring - - - + + + + + None + + + (text * 7 + ' whaddyaknow') + + instance_attribute's docstring + + + f's docstring + + # parameter x - - - a*5 + + + a * 5 # parameter y - + # parameter args - - f's docstring - - - 1 - - f.function_attribute's docstring + + + 1 + + f.function_attribute's docstring + +(Comments are not implemented yet.) -compiler.parse() provides most of what's needed for this AST, and "tokenize" -can be used to get the rest. We can determine the line number from the -compiler.parse() AST, and the TokenParser.rhs(lineno) method provides the +compiler.parse() provides most of what's needed for this doctree, and +"tokenize" can be used to get the rest. We can determine the line number from +the compiler.parse() AST, and the TokenParser.rhs(lineno) method provides the rest. -The Docutils Python reader component will transform this AST into a -Python-specific doctree, and then a `stylist transform`_ would further -transform it into a generic doctree. Namespaces will have to be compiled for -each of the scopes, but I'm not certain at what stage of processing. +The Docutils Python reader component will transform this module doctree into a +Python-specific Docutils doctree, and then a `stylist transform`_ will +further transform it into a generic doctree. Namespaces will have to be +compiled for each of the scopes, but I'm not certain at what stage of +processing. It's very important to keep all docstring processing out of this, so that it's a completely generic and not tool-specific. @@ -131,7 +127,7 @@ a completely generic and not tool-specific. > generic doctree? Or, even from the AST to the final output? I want the docutils.readers.python.moduleparser.parse_module() function to -produce a standard documentation-oriented AST that can be used by any tool. +produce a standard documentation-oriented tree that can be used by any tool. We can develop it together without having to compromise on the rest of our design (i.e., HappyDoc doesn't have to be made to work like Docutils, and vice-versa). It would be a higher-level version of what compiler.py provides. @@ -171,6 +167,17 @@ Issues basic AST produced by the ASTVisitor walk, or generated by another tree traversal? +* At what point should a distinction be made between local variables & + instance attributes in __init__ methods? + +* Docstrings are getting their lineno from their parents. Should the + TokenParser find the real line no's? + +* Comments: include them? How and when? Only full-line comments, or + parameter comments too? (See function "f" above for an example.) + +* Module could use more docstrings & refactoring in places. + """ __docformat__ = 'reStructuredText' @@ -186,6 +193,7 @@ from types import StringType, UnicodeType, TupleType def parse_module(module_text, filename): + """Return a module documentation tree from `module_text`.""" ast = compiler.parse(module_text) token_parser = TokenParser(module_text) visitor = ModuleVisitor(filename, token_parser) @@ -193,6 +201,161 @@ def parse_module(module_text, filename): return visitor.module +class Node: + + def __init__(self, node): + self.children = [] + """List of child nodes.""" + + self.lineno = node.lineno + """Line number of this node (or ``None``).""" + + def __str__(self, indent=' ', level=0): + return ''.join(['%s%s\n' % (indent * level, repr(self))] + + [child.__str__(indent, level+1) + for child in self.children]) + + def __repr__(self): + parts = [self.__class__.__name__] + for name, value in self.attlist(): + parts.append('%s="%s"' % (name, value)) + return '<%s>' % ' '.join(parts) + + def attlist(self, **atts): + if self.lineno is not None: + atts['lineno'] = self.lineno + attlist = atts.items() + attlist.sort() + return attlist + + def append(self, node): + self.children.append(node) + + def extend(self, node_list): + self.children.extend(node_list) + + +class Module(Node): + + def __init__(self, node, filename): + Node.__init__(self, node) + self.filename = filename + + def attlist(self): + return Node.attlist(self, filename=self.filename) + + +class Docstring(Node): + + def __init__(self, node, text): + Node.__init__(self, node) + self.text = trim_docstring(text) + + def __str__(self, indent=' ', level=0): + prefix = indent * (level + 1) + text = '\n'.join([prefix + line for line in self.text.splitlines()]) + return Node.__str__(self, indent, level) + text + '\n' + + +class Import(Node): + + def __init__(self, node, names, from_name=None): + Node.__init__(self, node) + self.names = names + self.from_name = from_name + + def __str__(self, indent=' ', level=0): + prefix = indent * (level + 1) + lines = [] + for name, as in self.names: + if as: + lines.append('%s%s as %s' % (prefix, name, as)) + else: + lines.append('%s%s' % (prefix, name)) + text = '\n'.join(lines) + return Node.__str__(self, indent, level) + text + '\n' + + def attlist(self): + if self.from_name: + atts = {'from': self.from_name} + else: + atts = {} + return Node.attlist(self, **atts) + + +class Attribute(Node): + + def __init__(self, node, name): + Node.__init__(self, node) + self.name = name + + def attlist(self): + return Node.attlist(self, name=self.name) + + +class AttributeTuple(Node): + + def __init__(self, node, names): + Node.__init__(self, node) + self.names = names + + def attlist(self): + return Node.attlist(self, names=' '.join(self.names)) + + +class Expression(Node): + + def __init__(self, node, text): + Node.__init__(self, node) + self.text = text + + def __str__(self, indent=' ', level=0): + prefix = indent * (level + 1) + return '%s%s%s\n' % (Node.__str__(self, indent, level), + prefix, self.text.encode('unicode-escape')) + + +class Function(Attribute): pass + + +class ParameterList(Node): pass + + +class Parameter(Attribute): pass + + +class ParameterTuple(AttributeTuple): + + def attlist(self): + return Node.attlist(self, names=normalize_parameter_name(self.names)) + + +class ExcessPositionalArguments(Parameter): pass + + +class ExcessKeywordArguments(Parameter): pass + + +class Default(Expression): pass + + +class Class(Node): + + def __init__(self, node, name, bases=None): + Node.__init__(self, node) + self.name = name + self.bases = bases or [] + + def attlist(self): + atts = {'name': self.name} + if self.bases: + atts['bases'] = ' '.join(self.bases) + return Node.attlist(self, **atts) + + +class Method(Function): pass + + class BaseVisitor(ASTVisitor): def __init__(self, token_parser): @@ -271,6 +434,11 @@ class ModuleVisitor(AssignmentVisitor): compiler.walk(node, visitor, walker=visitor) self.context[-1].append(visitor.function) + def visitClass(self, node): + visitor = ClassVisitor(self.token_parser) + compiler.walk(node, visitor, walker=visitor) + self.context[-1].append(visitor.klass) + class AttributeVisitor(BaseVisitor): @@ -313,13 +481,15 @@ class AttributeVisitor(BaseVisitor): class FunctionVisitor(DocstringVisitor): in_function = 0 + function_class = Function def visitFunction(self, node): if self.in_function: + self.documentable = None # Don't bother with nested function definitions. return self.in_function = 1 - self.function = function = Function(node, node.name) + self.function = function = self.function_class(node, node.name) if node.doc is not None: function.append(Docstring(node, node.doc)) self.context.append(function) @@ -360,142 +530,61 @@ class FunctionVisitor(DocstringVisitor): self.function.append(parameter_list) -class Node: - - def __init__(self, node): - self.children = [] - """List of child nodes.""" - - self.lineno = node.lineno - """Line number of this node (or ``None``).""" - - def __str__(self, indent=' ', level=0): - return ''.join(['%s%s\n' % (indent * level, repr(self))] + - [child.__str__(indent, level+1) - for child in self.children]) +class ClassVisitor(AssignmentVisitor): - def __repr__(self): - parts = [self.__class__.__name__] - for name, value in self.attlist(): - parts.append('%s="%s"' % (name, value)) - return '<%s>' % ' '.join(parts) + in_class = 0 - def attlist(self, **atts): - if self.lineno is not None: - atts['lineno'] = self.lineno - attlist = atts.items() - attlist.sort() - return attlist - - def append(self, node): - self.children.append(node) - - def extend(self, node_list): - self.children.extend(node_list) - - -class Module(Node): - - def __init__(self, node, filename): - Node.__init__(self, node) - self.filename = filename - - def attlist(self): - return Node.attlist(self, filename=self.filename) - - -class Docstring(Node): - - def __init__(self, node, text): - Node.__init__(self, node) - self.text = trim_docstring(text) - - def __str__(self, indent=' ', level=0): - prefix = indent * (level + 1) - text = '\n'.join([prefix + line for line in self.text.splitlines()]) - return Node.__str__(self, indent, level) + text + '\n' - - -class Import(Node): - - def __init__(self, node, names, from_name=None): - Node.__init__(self, node) - self.names = names - self.from_name = from_name + def __init__(self, token_parser): + AssignmentVisitor.__init__(self, token_parser) + self.bases = [] - def __str__(self, indent=' ', level=0): - prefix = indent * (level + 1) - lines = [] - for name, as in self.names: - if as: - lines.append('%s%s as %s' % (prefix, name, as)) - else: - lines.append('%s%s' % (prefix, name)) - text = '\n'.join(lines) - return Node.__str__(self, indent, level) + text + '\n' + def visitClass(self, node): + if self.in_class: + self.documentable = None + # Don't bother with nested class definitions. + return + self.in_class = 1 + #import mypdb as pdb + #pdb.set_trace() + for base in node.bases: + self.visit(base) + self.klass = klass = Class(node, node.name, self.bases) + if node.doc is not None: + klass.append(Docstring(node, node.doc)) + self.context.append(klass) + self.documentable = klass + self.visit(node.code) + self.context.pop() - def attlist(self): - if self.from_name: - atts = {'from': self.from_name} + def visitGetattr(self, node, suffix=None): + if suffix: + name = node.attrname + '.' + suffix else: - atts = {} - return Node.attlist(self, **atts) - - -class Attribute(Node): - - def __init__(self, node, name): - Node.__init__(self, node) - self.name = name - - def attlist(self): - return Node.attlist(self, name=self.name) - - -class AttributeTuple(Node): - - def __init__(self, node, names): - Node.__init__(self, node) - self.names = names + name = node.attrname + self.default_visit(node, name) - def attlist(self): - return Node.attlist(self, names=' '.join(self.names)) - - -class Expression(Node): - - def __init__(self, node, text): - Node.__init__(self, node) - self.text = text - - def __str__(self, indent=' ', level=0): - prefix = indent * (level + 1) - return '%s%s%s\n' % (Node.__str__(self, indent, level), - prefix, self.text.encode('unicode-escape')) - - -class Function(Attribute): pass - - -class ParameterList(Node): pass - - -class Parameter(Attribute): pass - - -class ParameterTuple(AttributeTuple): - - def attlist(self): - return Node.attlist(self, names=normalize_parameter_name(self.names)) + def visitName(self, node, suffix=None): + if suffix: + name = node.name + '.' + suffix + else: + name = node.name + self.bases.append(name) + def visitFunction(self, node): + if node.name == '__init__': + visitor = InitMethodVisitor(self.token_parser) + else: + visitor = MethodVisitor(self.token_parser) + compiler.walk(node, visitor, walker=visitor) + self.context[-1].append(visitor.function) -class ExcessPositionalArguments(Parameter): pass +class MethodVisitor(FunctionVisitor): -class ExcessKeywordArguments(Parameter): pass + function_class = Method -class Default(Expression): pass +class InitMethodVisitor(MethodVisitor, AssignmentVisitor): pass class TokenParser: @@ -645,7 +734,8 @@ class TokenParser: self._backquote = 0 self.note_token() else: # ignore these tokens: - assert self.string in ('*', '**', '\n'), ( + assert (self.string in ('*', '**', '\n') + or self.type == tokenize.COMMENT), ( 'token=%r' % (self.token,)) else: self.note_token() @@ -659,7 +749,7 @@ def trim_docstring(text): See PEP 257. """ if not text: - return '' + return text # Convert tabs to spaces (following the normal Python rules) # and split into a list of lines: lines = text.expandtabs().splitlines() -- cgit v1.2.1 From 6c7003a050d919f5b10d85b28c0dcafc45f4686f Mon Sep 17 00:00:00 2001 From: goodger Date: Thu, 19 Dec 2002 04:40:45 +0000 Subject: fixed RHS parse bug, found by Richard Jones git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1035 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/readers/python/moduleparser.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'docutils/readers/python') diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py index dee7810ad..15f05a869 100644 --- a/docutils/readers/python/moduleparser.py +++ b/docutils/readers/python/moduleparser.py @@ -616,8 +616,9 @@ class TokenParser: self.goto_line(lineno) while self.string != '=': self.next() + self.stack = None while self.type != token.NEWLINE and self.string != ';': - if self.string == '=': + if self.string == '=' and not self.stack: self.tokens = [] self.stack = [] self._type = None -- cgit v1.2.1 From acb0feec0ebec5110c719a661b253cd37bf9a721 Mon Sep 17 00:00:00 2001 From: goodger Date: Sun, 29 Dec 2002 18:37:18 +0000 Subject: refactored a bit git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1045 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/readers/python/moduleparser.py | 81 ++++++++++++++++----------------- 1 file changed, 39 insertions(+), 42 deletions(-) (limited to 'docutils/readers/python') diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py index 15f05a869..2262ddbd5 100644 --- a/docutils/readers/python/moduleparser.py +++ b/docutils/readers/python/moduleparser.py @@ -235,6 +235,18 @@ class Node: self.children.extend(node_list) +class TextNode(Node): + + def __init__(self, node, text): + Node.__init__(self, node) + self.text = trim_docstring(text) + + def __str__(self, indent=' ', level=0): + prefix = indent * (level + 1) + text = '\n'.join([prefix + line for line in self.text.splitlines()]) + return Node.__str__(self, indent, level) + text + '\n' + + class Module(Node): def __init__(self, node, filename): @@ -245,16 +257,10 @@ class Module(Node): return Node.attlist(self, filename=self.filename) -class Docstring(Node): +class Docstring(TextNode): pass - def __init__(self, node, text): - Node.__init__(self, node) - self.text = trim_docstring(text) - def __str__(self, indent=' ', level=0): - prefix = indent * (level + 1) - text = '\n'.join([prefix + line for line in self.text.splitlines()]) - return Node.__str__(self, indent, level) + text + '\n' +class Comment(TextNode): pass class Import(Node): @@ -303,11 +309,7 @@ class AttributeTuple(Node): return Node.attlist(self, names=' '.join(self.names)) -class Expression(Node): - - def __init__(self, node, text): - Node.__init__(self, node) - self.text = text +class Expression(TextNode): def __str__(self, indent=' ', level=0): prefix = indent * (level + 1) @@ -631,46 +633,40 @@ class TokenParser: text = ''.join(self.tokens) return text.strip() - openers = {')': '(', ']': '[', '}': '{'} + closers = {')': '(', ']': '[', '}': '{'} + openers = {'(': 1, '[': 1, '{': 1} + del_ws_prefix = {'.': 1, '=': 1, ')': 1, ']': 1, '}': 1, ':': 1, ',': 1} + no_ws_suffix = {'.': 1, '=': 1, '(': 1, '[': 1, '{': 1} def note_token(self): - append = 1 - append_ws = 1 - del_ws = 0 - if self.string == '.': - del_ws = 1 - append_ws = 0 - elif self.string in ('(', '[', '{'): - append_ws = 0 - if self.string in '([' and (self._type == token.NAME or - self._string in (')', ']', '}')): - del_ws = 1 + if self.type == tokenize.NL: + return + del_ws = self.del_ws_prefix.has_key(self.string) + append_ws = not self.no_ws_suffix.has_key(self.string) + if self.openers.has_key(self.string): self.stack.append(self.string) - elif self.string in (')', ']', '}'): - del_ws = 1 - assert self.stack[-1] == self.openers[self.string] + if (self._type == token.NAME + or self.closers.has_key(self._string)): + del_ws = 1 + elif self.closers.has_key(self.string): + assert self.stack[-1] == self.closers[self.string] self.stack.pop() - elif self.string in (':', ','): - del_ws = 1 elif self.string == '`': if self._backquote: del_ws = 1 - assert self.stack[-1] == self.string + assert self.stack[-1] == '`' self.stack.pop() else: append_ws = 0 - self.stack.append(self.string) + self.stack.append('`') self._backquote = not self._backquote - elif self.type == tokenize.NL: - append = 0 - if append: - if del_ws and self.tokens and self.tokens[-1] == ' ': - del self.tokens[-1] - self.tokens.append(self.string) - self._type = self.type - self._string = self.string - if append_ws: - self.tokens.append(' ') + if del_ws and self.tokens and self.tokens[-1] == ' ': + del self.tokens[-1] + self.tokens.append(self.string) + self._type = self.type + self._string = self.string + if append_ws: + self.tokens.append(' ') def function_parameters(self, lineno): """ @@ -743,6 +739,7 @@ class TokenParser: self.next() return parameters + def trim_docstring(text): """ Trim indentation and blank lines from docstring text & return it. -- cgit v1.2.1 From df2ba997c490308d849dfc5c67f3ed550dd8138c Mon Sep 17 00:00:00 2001 From: goodger Date: Sat, 4 Jan 2003 00:18:58 +0000 Subject: docstring git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1060 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/readers/python/moduleparser.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'docutils/readers/python') diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py index 2262ddbd5..9fcd1ec07 100644 --- a/docutils/readers/python/moduleparser.py +++ b/docutils/readers/python/moduleparser.py @@ -203,6 +203,10 @@ def parse_module(module_text, filename): class Node: + """ + Base class for module documentation tree nodes. + """ + def __init__(self, node): self.children = [] """List of child nodes.""" -- cgit v1.2.1 From ffa71e84747057982955e2bad3de2f8d3ed97c69 Mon Sep 17 00:00:00 2001 From: goodger Date: Wed, 27 Aug 2003 20:50:43 +0000 Subject: Updated for configuration file reorganization. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1645 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/readers/python/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'docutils/readers/python') diff --git a/docutils/readers/python/__init__.py b/docutils/readers/python/__init__.py index a346ce32a..5bc832183 100644 --- a/docutils/readers/python/__init__.py +++ b/docutils/readers/python/__init__.py @@ -16,4 +16,6 @@ import docutils.readers class Reader(docutils.readers.Reader): - pass + + config_section = 'python reader' + config_section_dependencies = ('readers',) -- cgit v1.2.1 From 9f31157a98faacf4c2b8f2bbdf2bbbd5078d6093 Mon Sep 17 00:00:00 2001 From: ianbicking Date: Sun, 21 Mar 2004 20:18:22 +0000 Subject: Incomplete changes to the moduleparser python source reader git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1847 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/readers/python/moduleparser.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'docutils/readers/python') diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py index 9fcd1ec07..a425d2738 100644 --- a/docutils/readers/python/moduleparser.py +++ b/docutils/readers/python/moduleparser.py @@ -190,6 +190,7 @@ import token from compiler.consts import OP_ASSIGN from compiler.visitor import ASTVisitor from types import StringType, UnicodeType, TupleType +from docutils.readers.python import pynodes def parse_module(module_text, filename): @@ -418,6 +419,7 @@ class ModuleVisitor(AssignmentVisitor): self.module = None def visitModule(self, node): + self.module = module = Module(node, self.filename) if node.doc is not None: module.append(Docstring(node, node.doc)) @@ -782,3 +784,9 @@ def normalize_parameter_name(name): return '(%s)' % ', '.join([normalize_parameter_name(n) for n in name]) else: return name + +if __name__ == '__main__': + import sys + filename = sys.argv[1] + content = open(filename).read() + print parse_module(content, filename) -- cgit v1.2.1 From d1e833c256b95461a2d5af437c4ce16b71345c22 Mon Sep 17 00:00:00 2001 From: ianbicking Date: Mon, 22 Mar 2004 15:11:35 +0000 Subject: Added the pynodes file I missed yesterday. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1858 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/readers/python/pynodes.py | 80 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 docutils/readers/python/pynodes.py (limited to 'docutils/readers/python') diff --git a/docutils/readers/python/pynodes.py b/docutils/readers/python/pynodes.py new file mode 100644 index 000000000..f246a5dc6 --- /dev/null +++ b/docutils/readers/python/pynodes.py @@ -0,0 +1,80 @@ +#! /usr/bin/env python + +""" +:Author: David Goodger +:Contact: goodger@users.sourceforge.net +:Revision: $Revision$ +:Date: $Date$ +:Copyright: This module has been placed in the public domain. + +""" + +from docutils import nodes +from docutils.nodes import Element, TextElement, Structural, Inline, Part + + +# ===================== +# Structural Elements +# ===================== + +class package_section(Structural, Element): pass +class module_section(Structural, Element): pass +class class_section(Structural, Element): pass +class method_section(Structural, Element): pass +class function_section(Structural, Element): pass +class module_attribute_section(Structural, Element): pass +class class_attribute_section(Structural, Element): pass +class instance_attribute_section(Structural, Element): pass + +# Structural Support Elements +# --------------------------- + +class inheritance_list(Part, Element): pass +class parameter_list(Part, Element): pass +class parameter_item(Part, Element): pass +class optional_parameters(Part, Element): pass +class parameter_tuple(Part, Element): pass +class parameter_default(Part, TextElement): pass +class initial_value(Part, TextElement): pass +class import_item(Part, TextElement): pass + +# ================= +# Inline Elements +# ================= + +# These elements cannot become references until the second +# pass. Initially, we'll use "reference" or "name". + +class package(Part, Inline, TextElement): pass +class module(Part, Inline, TextElement): pass + + +class inline_class(Part, Inline, TextElement): + + tagname = 'class' + + +class method(Part, Inline, TextElement): pass +class function(Part, Inline, TextElement): pass +class variable(Inline, TextElement): pass +class parameter(Part, Inline, TextElement): pass +class inline_type(Inline, TextElement): + tagname = 'type' +class class_attribute(Part, Inline, TextElement): pass +class module_attribute(Part, Inline, TextElement): pass +class instance_attribute(Part, Inline, TextElement): pass +class exception_class(Inline, TextElement): pass +class warning_class(Inline, TextElement): pass + +# Collect all the classes we've written above +node_class_names = [] +def build_node_class_names(): + for name, var in globals().items(): + if type(var) is types.ClassType \ + and issubclass(var, nodes.Node) \ + and name.lower() == name: + node_class_names.append(var.tagname or name) + +# Register the new node names with GenericNodeVisitor and +# SpecificNodeVisitor: +nodes._add_node_class_names(node_class_names) -- cgit v1.2.1 From db10b42915077db1a6cbb936145d0d803142b12a Mon Sep 17 00:00:00 2001 From: ianbicking Date: Tue, 23 Mar 2004 19:57:14 +0000 Subject: * Bug fixes to python reader * Getting tests up-to-date * Trimming unused nodes from pynodes git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1876 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/readers/python/__init__.py | 6 + docutils/readers/python/moduleparser.py | 343 +++++++++++++------------------- docutils/readers/python/pynodes.py | 81 ++++---- 3 files changed, 190 insertions(+), 240 deletions(-) (limited to 'docutils/readers/python') diff --git a/docutils/readers/python/__init__.py b/docutils/readers/python/__init__.py index 5bc832183..3087f9147 100644 --- a/docutils/readers/python/__init__.py +++ b/docutils/readers/python/__init__.py @@ -13,9 +13,15 @@ __docformat__ = 'reStructuredText' import sys import docutils.readers +from docutils.readers.python import moduleparser class Reader(docutils.readers.Reader): config_section = 'python reader' config_section_dependencies = ('readers',) + + def parse(self): + """Parse `self.input` into a document tree.""" + self.document = document = moduleparser.parse_module(self.input) + document.current_source = document.current_line = None diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py index a425d2738..c95d997c8 100644 --- a/docutils/readers/python/moduleparser.py +++ b/docutils/readers/python/moduleparser.py @@ -191,6 +191,7 @@ from compiler.consts import OP_ASSIGN from compiler.visitor import ASTVisitor from types import StringType, UnicodeType, TupleType from docutils.readers.python import pynodes +from docutils.nodes import Text def parse_module(module_text, filename): @@ -201,168 +202,6 @@ def parse_module(module_text, filename): compiler.walk(ast, visitor, walker=visitor) return visitor.module - -class Node: - - """ - Base class for module documentation tree nodes. - """ - - def __init__(self, node): - self.children = [] - """List of child nodes.""" - - self.lineno = node.lineno - """Line number of this node (or ``None``).""" - - def __str__(self, indent=' ', level=0): - return ''.join(['%s%s\n' % (indent * level, repr(self))] + - [child.__str__(indent, level+1) - for child in self.children]) - - def __repr__(self): - parts = [self.__class__.__name__] - for name, value in self.attlist(): - parts.append('%s="%s"' % (name, value)) - return '<%s>' % ' '.join(parts) - - def attlist(self, **atts): - if self.lineno is not None: - atts['lineno'] = self.lineno - attlist = atts.items() - attlist.sort() - return attlist - - def append(self, node): - self.children.append(node) - - def extend(self, node_list): - self.children.extend(node_list) - - -class TextNode(Node): - - def __init__(self, node, text): - Node.__init__(self, node) - self.text = trim_docstring(text) - - def __str__(self, indent=' ', level=0): - prefix = indent * (level + 1) - text = '\n'.join([prefix + line for line in self.text.splitlines()]) - return Node.__str__(self, indent, level) + text + '\n' - - -class Module(Node): - - def __init__(self, node, filename): - Node.__init__(self, node) - self.filename = filename - - def attlist(self): - return Node.attlist(self, filename=self.filename) - - -class Docstring(TextNode): pass - - -class Comment(TextNode): pass - - -class Import(Node): - - def __init__(self, node, names, from_name=None): - Node.__init__(self, node) - self.names = names - self.from_name = from_name - - def __str__(self, indent=' ', level=0): - prefix = indent * (level + 1) - lines = [] - for name, as in self.names: - if as: - lines.append('%s%s as %s' % (prefix, name, as)) - else: - lines.append('%s%s' % (prefix, name)) - text = '\n'.join(lines) - return Node.__str__(self, indent, level) + text + '\n' - - def attlist(self): - if self.from_name: - atts = {'from': self.from_name} - else: - atts = {} - return Node.attlist(self, **atts) - - -class Attribute(Node): - - def __init__(self, node, name): - Node.__init__(self, node) - self.name = name - - def attlist(self): - return Node.attlist(self, name=self.name) - - -class AttributeTuple(Node): - - def __init__(self, node, names): - Node.__init__(self, node) - self.names = names - - def attlist(self): - return Node.attlist(self, names=' '.join(self.names)) - - -class Expression(TextNode): - - def __str__(self, indent=' ', level=0): - prefix = indent * (level + 1) - return '%s%s%s\n' % (Node.__str__(self, indent, level), - prefix, self.text.encode('unicode-escape')) - - -class Function(Attribute): pass - - -class ParameterList(Node): pass - - -class Parameter(Attribute): pass - - -class ParameterTuple(AttributeTuple): - - def attlist(self): - return Node.attlist(self, names=normalize_parameter_name(self.names)) - - -class ExcessPositionalArguments(Parameter): pass - - -class ExcessKeywordArguments(Parameter): pass - - -class Default(Expression): pass - - -class Class(Node): - - def __init__(self, node, name, bases=None): - Node.__init__(self, node) - self.name = name - self.bases = bases or [] - - def attlist(self): - atts = {'name': self.name} - if self.bases: - atts['bases'] = ' '.join(self.bases) - return Node.attlist(self, **atts) - - -class Method(Function): pass - - class BaseVisitor(ASTVisitor): def __init__(self, token_parser): @@ -390,7 +229,7 @@ class DocstringVisitor(BaseVisitor): def visitConst(self, node): if self.documentable: if type(node.value) in (StringType, UnicodeType): - self.documentable.append(Docstring(node, node.value)) + self.documentable.append(make_docstring(node.value, node.lineno)) else: self.documentable = None @@ -419,26 +258,28 @@ class ModuleVisitor(AssignmentVisitor): self.module = None def visitModule(self, node): - - self.module = module = Module(node, self.filename) - if node.doc is not None: - module.append(Docstring(node, node.doc)) + self.module = module = pynodes.module_section() + module['filename'] = self.filename + append_docstring(module, node.doc, node.lineno) self.context.append(module) self.documentable = module self.visit(node.node) self.context.pop() def visitImport(self, node): - self.context[-1].append(Import(node, node.names)) + self.context[-1] += make_import_group(names=node.names, + lineno=node.lineno) self.documentable = None def visitFrom(self, node): self.context[-1].append( - Import(node, node.names, from_name=node.modname)) + make_import_group(names=node.names, from_name=node.modname, + lineno=node.lineno)) self.documentable = None def visitFunction(self, node): - visitor = FunctionVisitor(self.token_parser) + visitor = FunctionVisitor(self.token_parser, + function_class=pynodes.function_section) compiler.walk(node, visitor, walker=visitor) self.context[-1].append(visitor.function) @@ -452,29 +293,32 @@ class AttributeVisitor(BaseVisitor): def __init__(self, token_parser): BaseVisitor.__init__(self, token_parser) - self.attributes = [] + self.attributes = pynodes.class_attribute_section() def visitAssign(self, node): # Don't visit the expression itself, just the attribute nodes: for child in node.nodes: self.dispatch(child) expression_text = self.token_parser.rhs(node.lineno) - expression = Expression(node, expression_text) + expression = pynodes.expression_value() + expression.append(Text(expression_text)) for attribute in self.attributes: attribute.append(expression) def visitAssName(self, node): - self.attributes.append(Attribute(node, node.name)) + self.attributes.append(make_attribute(node.name, + lineno=node.lineno)) def visitAssTuple(self, node): attributes = self.attributes self.attributes = [] self.default_visit(node) - names = [attribute.name for attribute in self.attributes] - att_tuple = AttributeTuple(node, names) - att_tuple.lineno = self.attributes[0].lineno + n = pynodes.attribute_tuple() + n.extend(self.attributes) + n['lineno'] = self.attributes[0]['lineno'] + attributes.append(n) self.attributes = attributes - self.attributes.append(att_tuple) + #self.attributes.append(att_tuple) def visitAssAttr(self, node): self.default_visit(node, node.attrname) @@ -483,13 +327,17 @@ class AttributeVisitor(BaseVisitor): self.default_visit(node, node.attrname + '.' + suffix) def visitName(self, node, suffix): - self.attributes.append(Attribute(node, node.name + '.' + suffix)) + self.attributes.append(make_attribute(node.name + '.' + suffix, + lineno=node.lineno)) class FunctionVisitor(DocstringVisitor): in_function = 0 - function_class = Function + + def __init__(self, token_parser, function_class): + DocstringVisitor.__init__(self, token_parser) + self.function_class = function_class def visitFunction(self, node): if self.in_function: @@ -497,9 +345,11 @@ class FunctionVisitor(DocstringVisitor): # Don't bother with nested function definitions. return self.in_function = 1 - self.function = function = self.function_class(node, node.name) - if node.doc is not None: - function.append(Docstring(node, node.doc)) + self.function = function = make_function_like_section( + name=node.name, + lineno=node.lineno, + doc=node.doc, + function_class=self.function_class) self.context.append(function) self.documentable = function self.parse_parameter_list(node) @@ -511,10 +361,11 @@ class FunctionVisitor(DocstringVisitor): special = [] argnames = list(node.argnames) if node.kwargs: - special.append(ExcessKeywordArguments(node, argnames[-1])) + special.append(make_parameter(argnames[-1], excess_keyword=True)) argnames.pop() if node.varargs: - special.append(ExcessPositionalArguments(node, argnames[-1])) + special.append(make_parameter(argnames[-1], + excess_positional=True)) argnames.pop() defaults = list(node.defaults) defaults = [None] * (len(argnames) - len(defaults)) + defaults @@ -523,17 +374,21 @@ class FunctionVisitor(DocstringVisitor): #print >>sys.stderr, function_parameters for argname, default in zip(argnames, defaults): if type(argname) is TupleType: - parameter = ParameterTuple(node, argname) + parameter = pynodes.parameter_tuple() + for tuplearg in argname: + parameter.append(make_parameter(tuplearg)) argname = normalize_parameter_name(argname) else: - parameter = Parameter(node, argname) + parameter = make_parameter(argname) if default: - parameter.append(Default(node, function_parameters[argname])) + n_default = pynodes.parameter_default() + n_default.append(Text(function_parameters[argname])) + parameter.append(n_default) parameters.append(parameter) if parameters or special: special.reverse() parameters.extend(special) - parameter_list = ParameterList(node) + parameter_list = pynodes.parameter_list() parameter_list.extend(parameters) self.function.append(parameter_list) @@ -556,9 +411,9 @@ class ClassVisitor(AssignmentVisitor): #pdb.set_trace() for base in node.bases: self.visit(base) - self.klass = klass = Class(node, node.name, self.bases) - if node.doc is not None: - klass.append(Docstring(node, node.doc)) + self.klass = klass = make_class_section(node.name, self.bases, + doc=node.doc, + lineno=node.lineno) self.context.append(klass) self.documentable = klass self.visit(node.code) @@ -580,19 +435,17 @@ class ClassVisitor(AssignmentVisitor): def visitFunction(self, node): if node.name == '__init__': - visitor = InitMethodVisitor(self.token_parser) + visitor = InitMethodVisitor(self.token_parser, + function_class=pynodes.method_section) + compiler.walk(node, visitor, walker=visitor) else: - visitor = MethodVisitor(self.token_parser) - compiler.walk(node, visitor, walker=visitor) + visitor = FunctionVisitor(self.token_parser, + function_class=pynodes.method_section) + compiler.walk(node, visitor, walker=visitor) self.context[-1].append(visitor.function) -class MethodVisitor(FunctionVisitor): - - function_class = Method - - -class InitMethodVisitor(MethodVisitor, AssignmentVisitor): pass +class InitMethodVisitor(FunctionVisitor, AssignmentVisitor): pass class TokenParser: @@ -746,6 +599,81 @@ class TokenParser: return parameters +def make_docstring(doc, lineno): + n = pynodes.docstring() + if lineno: + # Really, only module docstrings don't have a line + # (@@: but maybe they should) + n['lineno'] = lineno + n.append(Text(doc)) + return n + +def append_docstring(node, doc, lineno): + if doc: + node.append(make_docstring(doc, lineno)) + +def make_class_section(name, bases, lineno, doc): + n = pynodes.class_section() + n['lineno'] = lineno + n.append(make_object_name(name)) + for base in bases: + b = pynodes.class_base() + b.append(make_object_name(base)) + n.append(b) + append_docstring(n, doc, lineno) + return n + +def make_object_name(name): + n = pynodes.object_name() + n.append(Text(name)) + return n + +def make_function_like_section(name, lineno, doc, function_class): + n = function_class() + n['lineno'] = lineno + n.append(make_object_name(name)) + append_docstring(n, doc, lineno) + return n + +def make_import_group(names, lineno, from_name=None): + n = pynodes.import_group() + n['lineno'] = lineno + if from_name: + n_from = pynodes.import_from() + n_from.append(Text(from_name)) + n.append(n_from) + for name, alias in names: + n_name = pynodes.import_name() + n_name.append(Text(name)) + if alias: + n_alias = pynodes.import_alias() + n_alias.append(Text(alias)) + n_name.append(n_alias) + n.append(n_name) + return n + +def make_class_attribute(name, lineno): + n = pynodes.class_attribute() + n['lineno'] = lineno + n.append(Text(name)) + return n + +def make_attribute(name, lineno): + n = pynodes.attribute() + n['lineno'] = lineno + n.append(make_object_name(name)) + return n + +def make_parameter(name, excess_keyword=False, excess_positional=False): + n = pynodes.parameter() + n.append(make_object_name(name)) + assert not excess_keyword or not excess_positional + if excess_keyword: + n['excess_keyword'] = 1 + if excess_positional: + n['excess_positional'] = 1 + return n + def trim_docstring(text): """ Trim indentation and blank lines from docstring text & return it. @@ -787,6 +715,15 @@ def normalize_parameter_name(name): if __name__ == '__main__': import sys - filename = sys.argv[1] - content = open(filename).read() - print parse_module(content, filename) + args = sys.argv[1:] + if args[0] == '-v': + filename = args[1] + module_text = open(filename).read() + ast = compiler.parse(module_text) + visitor = compiler.visitor.ExampleASTVisitor() + compiler.walk(ast, visitor, walker=visitor, verbose=1) + else: + filename = args[0] + content = open(filename).read() + print parse_module(content, filename).pformat() + diff --git a/docutils/readers/python/pynodes.py b/docutils/readers/python/pynodes.py index f246a5dc6..40db6975e 100644 --- a/docutils/readers/python/pynodes.py +++ b/docutils/readers/python/pynodes.py @@ -10,33 +10,39 @@ """ from docutils import nodes -from docutils.nodes import Element, TextElement, Structural, Inline, Part +from docutils.nodes import Element, TextElement, Structural, Inline, Part, \ + Text +import types +# This is the parent class of all the other pynode classes: +class PythonStructural(Structural): pass # ===================== # Structural Elements # ===================== -class package_section(Structural, Element): pass -class module_section(Structural, Element): pass -class class_section(Structural, Element): pass -class method_section(Structural, Element): pass -class function_section(Structural, Element): pass -class module_attribute_section(Structural, Element): pass -class class_attribute_section(Structural, Element): pass -class instance_attribute_section(Structural, Element): pass +class module_section(PythonStructural, Element): pass +class class_section(PythonStructural, Element): pass +class class_base(PythonStructural, Element): pass +class method_section(PythonStructural, Element): pass +class attribute(PythonStructural, Element): pass +class function_section(PythonStructural, Element): pass +class class_attribute_section(PythonStructural, Element): pass +class class_attribute(PythonStructural, Element): pass +class expression_value(PythonStructural, Element): pass +class attribute(PythonStructural, Element): pass # Structural Support Elements # --------------------------- -class inheritance_list(Part, Element): pass -class parameter_list(Part, Element): pass -class parameter_item(Part, Element): pass -class optional_parameters(Part, Element): pass -class parameter_tuple(Part, Element): pass -class parameter_default(Part, TextElement): pass -class initial_value(Part, TextElement): pass -class import_item(Part, TextElement): pass +class parameter_list(PythonStructural, Element): pass +class parameter_tuple(PythonStructural, Element): pass +class parameter_default(PythonStructural, TextElement): pass +class import_group(PythonStructural, TextElement): pass +class import_from(PythonStructural, TextElement): pass +class import_name(PythonStructural, TextElement): pass +class import_alias(PythonStructural, TextElement): pass +class docstring(PythonStructural, Element): pass # ================= # Inline Elements @@ -45,34 +51,35 @@ class import_item(Part, TextElement): pass # These elements cannot become references until the second # pass. Initially, we'll use "reference" or "name". -class package(Part, Inline, TextElement): pass -class module(Part, Inline, TextElement): pass +class object_name(PythonStructural, TextElement): pass +class parameter_list(PythonStructural, TextElement): pass +class parameter(PythonStructural, TextElement): pass +class parameter_default(PythonStructural, TextElement): pass +class class_attribute(PythonStructural, TextElement): pass +class attribute_tuple(PythonStructural, TextElement): pass +# ================= +# Unused Elements +# ================= -class inline_class(Part, Inline, TextElement): - - tagname = 'class' - +# These were part of the model, and maybe should be in the future, but +# aren't now. +#class package_section(PythonStructural, Element): pass +#class module_attribute_section(PythonStructural, Element): pass +#class instance_attribute_section(PythonStructural, Element): pass +#class module_attribute(PythonStructural, TextElement): pass +#class instance_attribute(PythonStructural, TextElement): pass +#class exception_class(PythonStructural, TextElement): pass +#class warning_class(PythonStructural, TextElement): pass -class method(Part, Inline, TextElement): pass -class function(Part, Inline, TextElement): pass -class variable(Inline, TextElement): pass -class parameter(Part, Inline, TextElement): pass -class inline_type(Inline, TextElement): - tagname = 'type' -class class_attribute(Part, Inline, TextElement): pass -class module_attribute(Part, Inline, TextElement): pass -class instance_attribute(Part, Inline, TextElement): pass -class exception_class(Inline, TextElement): pass -class warning_class(Inline, TextElement): pass # Collect all the classes we've written above node_class_names = [] def build_node_class_names(): for name, var in globals().items(): - if type(var) is types.ClassType \ - and issubclass(var, nodes.Node) \ - and name.lower() == name: + if (type(var) is types.ClassType + and issubclass(var, PythonStructural) \ + and name.lower() == name): node_class_names.append(var.tagname or name) # Register the new node names with GenericNodeVisitor and -- cgit v1.2.1 From c75f5e75d567181ab0267cf4224a3cea59adc53f Mon Sep 17 00:00:00 2001 From: ianbicking Date: Tue, 23 Mar 2004 23:21:11 +0000 Subject: Reader parses docstrings (according to __docformat__) and produces full output. The reader should thus be "done". Run readers/python/__init__.py with a filename argument to get output in the DOM format. A transformer will be necessary to translate this into the standard docutils DOM. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1881 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/readers/python/__init__.py | 105 ++++++++++++++++++++++++++++- docutils/readers/python/moduleparser.py | 115 +++++++++++++++++++------------- docutils/readers/python/pynodes.py | 12 ++-- 3 files changed, 178 insertions(+), 54 deletions(-) (limited to 'docutils/readers/python') diff --git a/docutils/readers/python/__init__.py b/docutils/readers/python/__init__.py index 3087f9147..0da4ba938 100644 --- a/docutils/readers/python/__init__.py +++ b/docutils/readers/python/__init__.py @@ -14,14 +14,113 @@ __docformat__ = 'reStructuredText' import sys import docutils.readers from docutils.readers.python import moduleparser - +from docutils import parsers +from docutils import nodes +from docutils.readers.python import pynodes +from docutils import readers class Reader(docutils.readers.Reader): config_section = 'python reader' config_section_dependencies = ('readers',) + default_parser = 'restructuredtext' + def parse(self): """Parse `self.input` into a document tree.""" - self.document = document = moduleparser.parse_module(self.input) - document.current_source = document.current_line = None + self.document = document = self.new_document() + module_section = moduleparser.parse_module(self.input, + self.source.source_path) + module_section.walk(DocformatVisitor(self.document)) + visitor = DocstringFormattingVisitor( + document=document, + default_parser=self.default_parser) + module_section.walk(visitor) + self.document.append(module_section) + +class DocformatVisitor(nodes.SparseNodeVisitor): + + """ + This sets docformat attributes in a module. Wherever an assignment + to __docformat__ is found, we look for the enclosing scope -- a class, + a module, or a function -- and set the docformat attribute there. + + We can't do this during the DocstringFormattingVisitor walking, + because __docformat__ may appear below a docstring in that format + (typically below the module docstring). + """ + + def visit_attribute(self, node): + assert isinstance(node[0], pynodes.object_name) + name = node[0][0].data + if name != '__docformat__': + return + value = None + for child in children: + if isinstance(child, pynodes.expression_value): + value = child[0].data + break + assert value.startswith("'") or value.startswith('"'), "__docformat__ must be assigned a string literal (not %s); line: %s" % (value, node['lineno']) + name = name[1:-1] + looking_in = node.parent + while not isinstance(looking_in, (pynodes.module_section, + pynodes.function_section, + pynodes.class_section)): + looking_in = looking_in.parent + looking_in['docformat'] = name + +class DocstringFormattingVisitor(nodes.SparseNodeVisitor): + + def __init__(self, document, default_parser): + self.document = document + self.default_parser = default_parser + self.parsers = {} + + def visit_docstring(self, node): + text = node[0].data + docformat = self.find_docformat(node) + del node[0] + node['docformat'] = docformat + parser = self.get_parser(docformat) + parser.parse(text, self.document) + for child in self.document.get_children(): + node.append(child) + self.document.current_source = self.document.current_line = None + del self.document[:] + + def get_parser(self, parser_name): + """ + Get a parser based on its name. We reuse parsers during this + visitation, so parser instances are cached. + """ + parser_name = parsers._parser_aliases.get(parser_name, parser_name) + if not self.parsers.has_key(parser_name): + cls = parsers.get_parser_class(parser_name) + self.parsers[parser_name] = cls() + return self.parsers[parser_name] + + def find_docformat(self, node): + """ + Find the __docformat__ closest to this node (i.e., look in the + class or module) + """ + while node: + if node.get('docformat'): + return node['docformat'] + node = node.parent + return self.default_parser + +if __name__ == '__main__': + import locale + try: + locale.setlocale(locale.LC_ALL, '') + except: + pass + + from docutils.core import publish_cmdline, default_description + + description = ('Generates pseudo-XML from standalone reStructuredText ' + 'sources (for testing purposes). ' + default_description) + + publish_cmdline(description=description, + reader=Reader()) diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py index c95d997c8..7f965e6e2 100644 --- a/docutils/readers/python/moduleparser.py +++ b/docutils/readers/python/moduleparser.py @@ -7,11 +7,10 @@ """ Parser for Python modules. -The `parse_module()` function takes a module's text and file name, runs it -through the module parser (using compiler.py and tokenize.py) and produces a -"module documentation tree": a high-level AST full of nodes that are -interesting from an auto-documentation standpoint. For example, given this -module (x.py):: +The `parse_module()` function takes a module's text and file name, +runs it through the module parser (using compiler.py and tokenize.py) +and produces a parse tree of the source code, using the nodes as found +in pynodes.py. For example, given this module (x.py):: # comment @@ -50,69 +49,95 @@ module (x.py):: The module parser will produce this module documentation tree:: - - - comment - + + Docstring - + Additional docstring - - + + + __docformat__ + 'reStructuredText' - - + + + a + 1 - + Attribute docstring - - + + + C + + Super + C's docstring - - + + + class_attribute + 1 - + class_attribute's docstring - - + + + __init__ + __init__'s docstring - - - - + + + + self + + + text + None - - + + + self.instance_attribute + (text * 7 + ' whaddyaknow') - + instance_attribute's docstring - - + + + f + f's docstring - - - + + + + x + # parameter x - - + + + y + a * 5 - + # parameter y - - + + + args + # parameter args - - + + + f.function_attribute + 1 - + f.function_attribute's docstring (Comments are not implemented yet.) compiler.parse() provides most of what's needed for this doctree, and -"tokenize" can be used to get the rest. We can determine the line number from -the compiler.parse() AST, and the TokenParser.rhs(lineno) method provides the -rest. +"tokenize" can be used to get the rest. We can determine the line +number from the compiler.parse() AST, and the TokenParser.rhs(lineno) +method provides the rest. The Docutils Python reader component will transform this module doctree into a Python-specific Docutils doctree, and then a `stylist transform`_ will diff --git a/docutils/readers/python/pynodes.py b/docutils/readers/python/pynodes.py index 40db6975e..61e21f10e 100644 --- a/docutils/readers/python/pynodes.py +++ b/docutils/readers/python/pynodes.py @@ -74,14 +74,14 @@ class attribute_tuple(PythonStructural, TextElement): pass # Collect all the classes we've written above -node_class_names = [] -def build_node_class_names(): +def install_node_class_names(): + node_class_names = [] for name, var in globals().items(): if (type(var) is types.ClassType and issubclass(var, PythonStructural) \ and name.lower() == name): node_class_names.append(var.tagname or name) - -# Register the new node names with GenericNodeVisitor and -# SpecificNodeVisitor: -nodes._add_node_class_names(node_class_names) + # Register the new node names with GenericNodeVisitor and + # SpecificNodeVisitor: + nodes._add_node_class_names(node_class_names) +install_node_class_names() -- cgit v1.2.1 From f877afc0916de9f9177df646b34f4d6157808693 Mon Sep 17 00:00:00 2001 From: orutherfurd Date: Sun, 28 Mar 2004 15:39:27 +0000 Subject: moved locale imports into try blocks, for Jython compatibility git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1896 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/readers/python/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'docutils/readers/python') diff --git a/docutils/readers/python/__init__.py b/docutils/readers/python/__init__.py index 0da4ba938..1bd9b151c 100644 --- a/docutils/readers/python/__init__.py +++ b/docutils/readers/python/__init__.py @@ -111,8 +111,8 @@ class DocstringFormattingVisitor(nodes.SparseNodeVisitor): return self.default_parser if __name__ == '__main__': - import locale try: + import locale locale.setlocale(locale.LC_ALL, '') except: pass -- cgit v1.2.1 From 00a18ecf4a86081753e27d394473a70dc287e9ed Mon Sep 17 00:00:00 2001 From: wiemann Date: Fri, 7 May 2004 12:07:30 +0000 Subject: Python 2.1 compatibility fix git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@2037 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/readers/python/moduleparser.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'docutils/readers/python') diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py index 7f965e6e2..ddfe21ea7 100644 --- a/docutils/readers/python/moduleparser.py +++ b/docutils/readers/python/moduleparser.py @@ -386,11 +386,11 @@ class FunctionVisitor(DocstringVisitor): special = [] argnames = list(node.argnames) if node.kwargs: - special.append(make_parameter(argnames[-1], excess_keyword=True)) + special.append(make_parameter(argnames[-1], excess_keyword=1)) argnames.pop() if node.varargs: special.append(make_parameter(argnames[-1], - excess_positional=True)) + excess_positional=1)) argnames.pop() defaults = list(node.defaults) defaults = [None] * (len(argnames) - len(defaults)) + defaults @@ -689,7 +689,11 @@ def make_attribute(name, lineno): n.append(make_object_name(name)) return n -def make_parameter(name, excess_keyword=False, excess_positional=False): +def make_parameter(name, excess_keyword=0, excess_positional=0): + """ + excess_keyword and excess_positional must be either 1 or 0, and + not both of them can be 1. + """ n = pynodes.parameter() n.append(make_object_name(name)) assert not excess_keyword or not excess_positional -- cgit v1.2.1 From 57a9c0577f41546be5adc5d0de742c0f4d487314 Mon Sep 17 00:00:00 2001 From: goodger Date: Sat, 8 May 2004 18:56:43 +0000 Subject: corrected frontend script description git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@2049 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/readers/python/__init__.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'docutils/readers/python') diff --git a/docutils/readers/python/__init__.py b/docutils/readers/python/__init__.py index 1bd9b151c..14d01c0c8 100644 --- a/docutils/readers/python/__init__.py +++ b/docutils/readers/python/__init__.py @@ -38,6 +38,7 @@ class Reader(docutils.readers.Reader): module_section.walk(visitor) self.document.append(module_section) + class DocformatVisitor(nodes.SparseNodeVisitor): """ @@ -69,6 +70,7 @@ class DocformatVisitor(nodes.SparseNodeVisitor): looking_in = looking_in.parent looking_in['docformat'] = name + class DocstringFormattingVisitor(nodes.SparseNodeVisitor): def __init__(self, document, default_parser): @@ -109,7 +111,8 @@ class DocstringFormattingVisitor(nodes.SparseNodeVisitor): return node['docformat'] node = node.parent return self.default_parser - + + if __name__ == '__main__': try: import locale @@ -119,8 +122,8 @@ if __name__ == '__main__': from docutils.core import publish_cmdline, default_description - description = ('Generates pseudo-XML from standalone reStructuredText ' - 'sources (for testing purposes). ' + default_description) + description = ('Generates pseudo-XML from Python modules ' + '(for testing purposes). ' + default_description) publish_cmdline(description=description, reader=Reader()) -- cgit v1.2.1 From 2384d0cf6cd5f596511784ff8b54c8ae1e57e8fa Mon Sep 17 00:00:00 2001 From: cben Date: Sun, 25 Jul 2004 01:45:27 +0000 Subject: Allow the test suite to survive unimportable test modules. Notably, this fixes a crash on importing `moduleparser` under Python 2.1 from ``test/test_readers/test_python/test_functions.py``. (This shouldn't happen anyway, added to BUGS.txt) git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@2449 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/readers/python/__init__.py | 3 +++ docutils/readers/python/moduleparser.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'docutils/readers/python') diff --git a/docutils/readers/python/__init__.py b/docutils/readers/python/__init__.py index 14d01c0c8..6c027641d 100644 --- a/docutils/readers/python/__init__.py +++ b/docutils/readers/python/__init__.py @@ -6,6 +6,9 @@ """ This package contains the Python Source Reader modules. + +It requires Python 2.2 or higher (`moduleparser` depends on `compiler` and +`tokenizer` modules). """ __docformat__ = 'reStructuredText' diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py index ddfe21ea7..8fd7ed67b 100644 --- a/docutils/readers/python/moduleparser.py +++ b/docutils/readers/python/moduleparser.py @@ -5,7 +5,7 @@ # Copyright: This module has been placed in the public domain. """ -Parser for Python modules. +Parser for Python modules. Requires Python 2.2 or higher. The `parse_module()` function takes a module's text and file name, runs it through the module parser (using compiler.py and tokenize.py) -- cgit v1.2.1 From d1d7913a9f7f3a589852c652ad167c64b60fbf99 Mon Sep 17 00:00:00 2001 From: wiemann Date: Tue, 27 Jul 2004 22:59:40 +0000 Subject: typo git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@2487 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/readers/python/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'docutils/readers/python') diff --git a/docutils/readers/python/__init__.py b/docutils/readers/python/__init__.py index 6c027641d..22cfe3b8d 100644 --- a/docutils/readers/python/__init__.py +++ b/docutils/readers/python/__init__.py @@ -7,8 +7,8 @@ """ This package contains the Python Source Reader modules. -It requires Python 2.2 or higher (`moduleparser` depends on `compiler` and -`tokenizer` modules). +It requires Python 2.2 or higher (`moduleparser` depends on the +`compiler` and `tokenize` modules). """ __docformat__ = 'reStructuredText' -- cgit v1.2.1 From 43ca6a79125d28fce6c2ab2142f93733319382d4 Mon Sep 17 00:00:00 2001 From: wiemann Date: Mon, 14 Mar 2005 16:16:57 +0000 Subject: removed redundant get_children(); in case we want to change the behavior later, be can use __getattr__ or a descriptor; (the list is modified in place anyway, so there'd be not much to change about get_children) git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@3038 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/readers/python/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'docutils/readers/python') diff --git a/docutils/readers/python/__init__.py b/docutils/readers/python/__init__.py index 22cfe3b8d..ba147a3d5 100644 --- a/docutils/readers/python/__init__.py +++ b/docutils/readers/python/__init__.py @@ -88,7 +88,7 @@ class DocstringFormattingVisitor(nodes.SparseNodeVisitor): node['docformat'] = docformat parser = self.get_parser(docformat) parser.parse(text, self.document) - for child in self.document.get_children(): + for child in self.document.children: node.append(child) self.document.current_source = self.document.current_line = None del self.document[:] -- cgit v1.2.1 From c6a87ac3b3db32003899bd2a9d45f96c0fee2193 Mon Sep 17 00:00:00 2001 From: goodger Date: Thu, 5 Jan 2006 23:28:53 +0000 Subject: fixed markup bugs in docstrings; now works with Endo git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@4242 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/readers/python/moduleparser.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'docutils/readers/python') diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py index 8fd7ed67b..03d57c948 100644 --- a/docutils/readers/python/moduleparser.py +++ b/docutils/readers/python/moduleparser.py @@ -140,7 +140,7 @@ number from the compiler.parse() AST, and the TokenParser.rhs(lineno) method provides the rest. The Docutils Python reader component will transform this module doctree into a -Python-specific Docutils doctree, and then a `stylist transform`_ will +Python-specific Docutils doctree, and then a "stylist transform" will further transform it into a generic doctree. Namespaces will have to be compiled for each of the scopes, but I'm not certain at what stage of processing. @@ -148,6 +148,8 @@ processing. It's very important to keep all docstring processing out of this, so that it's a completely generic and not tool-specific. +:: + > Why perform all of those transformations? Why not go from the AST to a > generic doctree? Or, even from the AST to the final output? @@ -176,7 +178,7 @@ from the Reader component itself. One stylist transform could produce HappyDoc-like output, another could produce output similar to module docs in the Python library reference manual, and so on. -It's for exactly this reason: +It's for exactly this reason:: >> It's very important to keep all docstring processing out of this, so that >> it's a completely generic and not tool-specific. -- cgit v1.2.1