From 34762d707e5fbe502b302027e4318c8301d7a34d Mon Sep 17 00:00:00 2001
From: goodger <goodger@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>
Date: Thu, 19 Dec 2002 01:08:01 +0000
Subject: Added classes & methods.

git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@1032 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
---
 docutils/readers/python/moduleparser.py | 464 +++++++++++++++++++-------------
 1 file changed, 277 insertions(+), 187 deletions(-)

(limited to 'docutils/readers/python/moduleparser.py')
diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py
index 8dcf432b2..dee7810ad 100644
--- a/docutils/readers/python/moduleparser.py
+++ b/docutils/readers/python/moduleparser.py
@@ -7,17 +7,11 @@
 """
 Parser for Python modules.
 
-Ideas:
-
-* Tokenize the module in parallel to extract initial values, comments, etc.
-
-* Merge the compiler & tokenize output such that the raw text hangs off of
-  nodes.  Useful for assignment expressions (RHS).
-
-What I'd like to do is to take a module, read in the text, run it through the
-module parser (using compiler.py and tokenize.py) and produce a high-level AST
-full of nodes that are interesting from an auto-documentation standpoint.  For
-example, given this module (x.py)::
+The `parse_module()` function takes a module's text and file name, runs it
+through the module parser (using compiler.py and tokenize.py) and produces a
+"module documentation tree": a high-level AST full of nodes that are
+interesting from an auto-documentation standpoint.  For example, given this
+module (x.py)::
 
     # comment
 
@@ -54,75 +48,77 @@ example, given this module (x.py)::
     f.function_attribute = 1
     '''f.function_attribute's docstring'''
 
-The module parser should produce a high-level AST, something like this::
+The module parser will produce this module documentation tree::
 
-    <Module filename="x.py">
+    <Module filename="test data">
         <Comment lineno=1>
             comment
-        <Docstring lineno=3>
+        <Docstring>
             Docstring
-        <Docstring lineno=...>           (I'll leave out the lineno's)
+        <Docstring lineno="5">
             Additional docstring
-        <Attribute name="__docformat__">
-            <Expression>
+        <Attribute lineno="7" name="__docformat__">
+            <Expression lineno="7">
                 'reStructuredText'
-        <Attribute name="a">
-            <Expression>
+        <Attribute lineno="9" name="a">
+            <Expression lineno="9">
                 1
-            <Docstring>
+            <Docstring lineno="10">
                 Attribute docstring
-        <Class name="C" inheritance="Super">
-            <Docstring>
+        <Class bases="Super" lineno="12" name="C">
+            <Docstring lineno="12">
                 C's docstring
-            <Attribute name="class_attribute">
-                <Expression>
+            <Attribute lineno="16" name="class_attribute">
+                <Expression lineno="16">
                     1
-                <Docstring>
+                <Docstring lineno="17">
                     class_attribute's docstring
-            <Method name="__init__">
-                <Parameters>
-                    <Parameter name="self">
-                    <Parameter name="text">
-                        <Expression>
-                            None
-                <Docstring>
+            <Method lineno="19" name="__init__">
+                <Docstring lineno="19">
                     __init__'s docstring
-                <Attribute name="instance_attribute" instance=True>
-                    <Expression>
-                        (text * 7
-                         + ' whaddyaknow')
-                    <Docstring>
-                        class_attribute's docstring
-        <Function name="f">
-            <Parameters>
-                <Parameter name="x">
+                <ParameterList lineno="19">
+                    <Parameter lineno="19" name="self">
+                    <Parameter lineno="19" name="text">
+                        <Default lineno="19">
+                            None
+                <Attribute lineno="22" name="self.instance_attribute">
+                    <Expression lineno="22">
+                        (text * 7 + ' whaddyaknow')
+                    <Docstring lineno="24">
+                        instance_attribute's docstring
+        <Function lineno="27" name="f">
+            <Docstring lineno="27">
+                f's docstring
+            <ParameterList lineno="27">
+                <Parameter lineno="27" name="x">
                     <Comment>
                         # parameter x
-                <Parameter name="y">
-                    <Expression>
-                        a*5
+                <Parameter lineno="27" name="y">
+                    <Default lineno="27">
+                        a * 5
                     <Comment>
                         # parameter y
-                <Parameter name="args" varargs=True>
+                <ExcessPositionalArguments lineno="27" name="args">
                     <Comment>
                         # parameter args
-            <Docstring>
-                f's docstring
-            <Attribute name="function_attribute">
-                <Expression>
-                    1
-                <Docstring>
-                    f.function_attribute's docstring
+        <Attribute lineno="33" name="f.function_attribute">
+            <Expression lineno="33">
+                1
+            <Docstring lineno="34">
+                f.function_attribute's docstring
+
+(Comments are not implemented yet.)
 
-compiler.parse() provides most of what's needed for this AST, and "tokenize"
-can be used to get the rest.  We can determine the line number from the
-compiler.parse() AST, and the TokenParser.rhs(lineno) method provides the
+compiler.parse() provides most of what's needed for this doctree, and
+"tokenize" can be used to get the rest.  We can determine the line number from
+the compiler.parse() AST, and the TokenParser.rhs(lineno) method provides the
 rest.
 
-The Docutils Python reader component will transform this AST into a
-Python-specific doctree, and then a `stylist transform`_ would further
-transform it into a generic doctree.  Namespaces will have to be compiled for
-each of the scopes, but I'm not certain at what stage of processing.
+The Docutils Python reader component will transform this module doctree into a
+Python-specific Docutils doctree, and then a `stylist transform`_ will
+further transform it into a generic doctree.  Namespaces will have to be
+compiled for each of the scopes, but I'm not certain at what stage of
+processing.
 
 It's very important to keep all docstring processing out of this, so that it's
 a completely generic and not tool-specific.
@@ -131,7 +127,7 @@ a completely generic and not tool-specific.
 > generic doctree?  Or, even from the AST to the final output?
 
 I want the docutils.readers.python.moduleparser.parse_module() function to
-produce a standard documentation-oriented AST that can be used by any tool.
+produce a standard documentation-oriented tree that can be used by any tool.
 We can develop it together without having to compromise on the rest of our
 design (i.e., HappyDoc doesn't have to be made to work like Docutils, and
 vice-versa).  It would be a higher-level version of what compiler.py provides.
@@ -171,6 +167,17 @@ Issues
   basic AST produced by the ASTVisitor walk, or generated by another tree
   traversal?
 
+* At what point should a distinction be made between local variables &
+  instance attributes in __init__ methods?
+
+* Docstrings are getting their lineno from their parents.  Should the
+  TokenParser find the real line no's?
+
+* Comments: include them?  How and when?  Only full-line comments, or
+  parameter comments too?  (See function "f" above for an example.)
+
+* Module could use more docstrings & refactoring in places.
+
 """
 
 __docformat__ = 'reStructuredText'
@@ -186,6 +193,7 @@ from types import StringType, UnicodeType, TupleType
 
 
 def parse_module(module_text, filename):
+    """Return a module documentation tree from `module_text`."""
     ast = compiler.parse(module_text)
     token_parser = TokenParser(module_text)
     visitor = ModuleVisitor(filename, token_parser)
@@ -193,6 +201,161 @@ def parse_module(module_text, filename):
     return visitor.module
 
 
+class Node:
+
+    def __init__(self, node):
+        self.children = []
+        """List of child nodes."""
+
+        self.lineno = node.lineno
+        """Line number of this node (or ``None``)."""
+
+    def __str__(self, indent='    ', level=0):
+        return ''.join(['%s%s\n' % (indent * level, repr(self))] +
+                       [child.__str__(indent, level+1)
+                        for child in self.children])
+
+    def __repr__(self):
+        parts = [self.__class__.__name__]
+        for name, value in self.attlist():
+            parts.append('%s="%s"' % (name, value))
+        return '<%s>' % ' '.join(parts)
+
+    def attlist(self, **atts):
+        if self.lineno is not None:
+            atts['lineno'] = self.lineno
+        attlist = atts.items()
+        attlist.sort()
+        return attlist
+
+    def append(self, node):
+        self.children.append(node)
+
+    def extend(self, node_list):
+        self.children.extend(node_list)
+
+
+class Module(Node):
+
+    def __init__(self, node, filename):
+        Node.__init__(self, node)
+        self.filename = filename
+
+    def attlist(self):
+        return Node.attlist(self, filename=self.filename)
+
+
+class Docstring(Node):
+
+    def __init__(self, node, text):
+        Node.__init__(self, node)
+        self.text = trim_docstring(text)
+
+    def __str__(self, indent='    ', level=0):
+        prefix = indent * (level + 1)
+        text = '\n'.join([prefix + line for line in self.text.splitlines()])
+        return Node.__str__(self, indent, level) + text + '\n'
+
+
+class Import(Node):
+
+    def __init__(self, node, names, from_name=None):
+        Node.__init__(self, node)
+        self.names = names
+        self.from_name = from_name
+
+    def __str__(self, indent='    ', level=0):
+        prefix = indent * (level + 1)
+        lines = []
+        for name, as in self.names:
+            if as:
+                lines.append('%s%s as %s' % (prefix, name, as))
+            else:
+                lines.append('%s%s' % (prefix, name))
+        text = '\n'.join(lines)
+        return Node.__str__(self, indent, level) + text + '\n'
+
+    def attlist(self):
+        if self.from_name:
+            atts = {'from': self.from_name}
+        else:
+            atts = {}
+        return Node.attlist(self, **atts)
+
+
+class Attribute(Node):
+
+    def __init__(self, node, name):
+        Node.__init__(self, node)
+        self.name = name
+
+    def attlist(self):
+        return Node.attlist(self, name=self.name)
+
+
+class AttributeTuple(Node):
+
+    def __init__(self, node, names):
+        Node.__init__(self, node)
+        self.names = names
+
+    def attlist(self):
+        return Node.attlist(self, names=' '.join(self.names))
+
+
+class Expression(Node):
+
+    def __init__(self, node, text):
+        Node.__init__(self, node)
+        self.text = text
+
+    def __str__(self, indent='    ', level=0):
+        prefix = indent * (level + 1)
+        return '%s%s%s\n' % (Node.__str__(self, indent, level),
+                             prefix, self.text.encode('unicode-escape'))
+
+
+class Function(Attribute): pass
+
+
+class ParameterList(Node): pass
+
+
+class Parameter(Attribute): pass
+
+
+class ParameterTuple(AttributeTuple):
+
+    def attlist(self):
+        return Node.attlist(self, names=normalize_parameter_name(self.names))
+
+
+class ExcessPositionalArguments(Parameter): pass
+
+
+class ExcessKeywordArguments(Parameter): pass
+
+
+class Default(Expression): pass
+
+
+class Class(Node):
+
+    def __init__(self, node, name, bases=None):
+        Node.__init__(self, node)
+        self.name = name
+        self.bases = bases or []
+
+    def attlist(self):
+        atts = {'name': self.name}
+        if self.bases:
+            atts['bases'] = ' '.join(self.bases)
+        return Node.attlist(self, **atts)
+
+
+class Method(Function): pass
+
+
 class BaseVisitor(ASTVisitor):
 
     def __init__(self, token_parser):
@@ -271,6 +434,11 @@ class ModuleVisitor(AssignmentVisitor):
         compiler.walk(node, visitor, walker=visitor)
         self.context[-1].append(visitor.function)
 
+    def visitClass(self, node):
+        visitor = ClassVisitor(self.token_parser)
+        compiler.walk(node, visitor, walker=visitor)
+        self.context[-1].append(visitor.klass)
+
 
 class AttributeVisitor(BaseVisitor):
 
@@ -313,13 +481,15 @@ class AttributeVisitor(BaseVisitor):
 class FunctionVisitor(DocstringVisitor):
 
     in_function = 0
+    function_class = Function
 
     def visitFunction(self, node):
         if self.in_function:
+            self.documentable = None
             # Don't bother with nested function definitions.
             return
         self.in_function = 1
-        self.function = function = Function(node, node.name)
+        self.function = function = self.function_class(node, node.name)
         if node.doc is not None:
             function.append(Docstring(node, node.doc))
         self.context.append(function)
@@ -360,142 +530,61 @@ class FunctionVisitor(DocstringVisitor):
             self.function.append(parameter_list)
 
 
-class Node:
-
-    def __init__(self, node):
-        self.children = []
-        """List of child nodes."""
-
-        self.lineno = node.lineno
-        """Line number of this node (or ``None``)."""
-
-    def __str__(self, indent='    ', level=0):
-        return ''.join(['%s%s\n' % (indent * level, repr(self))] +
-                       [child.__str__(indent, level+1)
-                        for child in self.children])
+class ClassVisitor(AssignmentVisitor):
 
-    def __repr__(self):
-        parts = [self.__class__.__name__]
-        for name, value in self.attlist():
-            parts.append('%s="%s"' % (name, value))
-        return '<%s>' % ' '.join(parts)
+    in_class = 0
 
-    def attlist(self, **atts):
-        if self.lineno is not None:
-            atts['lineno'] = self.lineno
-        attlist = atts.items()
-        attlist.sort()
-        return attlist
-
-    def append(self, node):
-        self.children.append(node)
-
-    def extend(self, node_list):
-        self.children.extend(node_list)
-
-
-class Module(Node):
-
-    def __init__(self, node, filename):
-        Node.__init__(self, node)
-        self.filename = filename
-
-    def attlist(self):
-        return Node.attlist(self, filename=self.filename)
-
-
-class Docstring(Node):
-
-    def __init__(self, node, text):
-        Node.__init__(self, node)
-        self.text = trim_docstring(text)
-
-    def __str__(self, indent='    ', level=0):
-        prefix = indent * (level + 1)
-        text = '\n'.join([prefix + line for line in self.text.splitlines()])
-        return Node.__str__(self, indent, level) + text + '\n'
-
-
-class Import(Node):
-
-    def __init__(self, node, names, from_name=None):
-        Node.__init__(self, node)
-        self.names = names
-        self.from_name = from_name
+    def __init__(self, token_parser):
+        AssignmentVisitor.__init__(self, token_parser)
+        self.bases = []
 
-    def __str__(self, indent='    ', level=0):
-        prefix = indent * (level + 1)
-        lines = []
-        for name, as in self.names:
-            if as:
-                lines.append('%s%s as %s' % (prefix, name, as))
-            else:
-                lines.append('%s%s' % (prefix, name))
-        text = '\n'.join(lines)
-        return Node.__str__(self, indent, level) + text + '\n'
+    def visitClass(self, node):
+        if self.in_class:
+            self.documentable = None
+            # Don't bother with nested class definitions.
+            return
+        self.in_class = 1
+        #import mypdb as pdb
+        #pdb.set_trace()
+        for base in node.bases:
+            self.visit(base)
+        self.klass = klass = Class(node, node.name, self.bases)
+        if node.doc is not None:
+            klass.append(Docstring(node, node.doc))
+        self.context.append(klass)
+        self.documentable = klass
+        self.visit(node.code)
+        self.context.pop()
 
-    def attlist(self):
-        if self.from_name:
-            atts = {'from': self.from_name}
+    def visitGetattr(self, node, suffix=None):
+        if suffix:
+            name = node.attrname + '.' + suffix
         else:
-            atts = {}
-        return Node.attlist(self, **atts)
-
-
-class Attribute(Node):
-
-    def __init__(self, node, name):
-        Node.__init__(self, node)
-        self.name = name
-
-    def attlist(self):
-        return Node.attlist(self, name=self.name)
-
-
-class AttributeTuple(Node):
-
-    def __init__(self, node, names):
-        Node.__init__(self, node)
-        self.names = names
+            name = node.attrname
+        self.default_visit(node, name)
 
-    def attlist(self):
-        return Node.attlist(self, names=' '.join(self.names))
-
-
-class Expression(Node):
-
-    def __init__(self, node, text):
-        Node.__init__(self, node)
-        self.text = text
-
-    def __str__(self, indent='    ', level=0):
-        prefix = indent * (level + 1)
-        return '%s%s%s\n' % (Node.__str__(self, indent, level),
-                             prefix, self.text.encode('unicode-escape'))
-
-
-class Function(Attribute): pass
-
-
-class ParameterList(Node): pass
-
-
-class Parameter(Attribute): pass
-
-
-class ParameterTuple(AttributeTuple):
-
-    def attlist(self):
-        return Node.attlist(self, names=normalize_parameter_name(self.names))
+    def visitName(self, node, suffix=None):
+        if suffix:
+            name = node.name + '.' + suffix
+        else:
+            name = node.name
+        self.bases.append(name)
 
+    def visitFunction(self, node):
+        if node.name == '__init__':
+            visitor = InitMethodVisitor(self.token_parser)
+        else:
+            visitor = MethodVisitor(self.token_parser)
+        compiler.walk(node, visitor, walker=visitor)
+        self.context[-1].append(visitor.function)
 
-class ExcessPositionalArguments(Parameter): pass
 
+class MethodVisitor(FunctionVisitor):
 
-class ExcessKeywordArguments(Parameter): pass
+    function_class = Method
 
 
-class Default(Expression): pass
+class InitMethodVisitor(MethodVisitor, AssignmentVisitor): pass
 
 
 class TokenParser:
@@ -645,7 +734,8 @@ class TokenParser:
                     self._backquote = 0
                     self.note_token()
                 else:                   # ignore these tokens:
-                    assert self.string in ('*', '**', '\n'), (
+                    assert (self.string in ('*', '**', '\n') 
+                            or self.type == tokenize.COMMENT), (
                         'token=%r' % (self.token,))
             else:
                 self.note_token()
@@ -659,7 +749,7 @@ def trim_docstring(text):
     See PEP 257.
     """
     if not text:
-        return ''
+        return text
     # Convert tabs to spaces (following the normal Python rules)
     # and split into a list of lines:
     lines = text.expandtabs().splitlines()
-- 
cgit v1.2.1