summaryrefslogtreecommitdiff
path: root/sphinx/pycode/parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'sphinx/pycode/parser.py')
-rw-r--r--sphinx/pycode/parser.py471
1 files changed, 471 insertions, 0 deletions
diff --git a/sphinx/pycode/parser.py b/sphinx/pycode/parser.py
new file mode 100644
index 000000000..7460dcfce
--- /dev/null
+++ b/sphinx/pycode/parser.py
@@ -0,0 +1,471 @@
+# -*- coding: utf-8 -*-
+"""
+ sphinx.pycode.parser
+ ~~~~~~~~~~~~~~~~~~~~
+
+ Utilities parsing and analyzing Python code.
+
+ :copyright: Copyright 2007-2017 by the Sphinx team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+import re
+import ast
+import inspect
+import tokenize
+import itertools
+from token import NAME, NEWLINE, INDENT, DEDENT, NUMBER, OP, STRING
+from tokenize import COMMENT, NL
+
+from six import PY2, text_type
+
+if False:
+ # For type annotation
+ from typing import Any, Dict, IO, List, Tuple # NOQA
+
+comment_re = re.compile(u'^\\s*#: ?(.*)\r?\n?$')
+indent_re = re.compile(u'^\\s*$')
+emptyline_re = re.compile(u'^\\s*(#.*)?$')
+
+
+def get_lvar_names(node, self=None):
+ # type: (ast.AST, ast.expr) -> List[unicode]
+ """Convert assignment-AST to variable names.
+
+ This raises `TypeError` if the assignment does not create new variable::
+
+ ary[0] = 'foo'
+ dic["bar"] = 'baz'
+ # => TypeError
+ """
+ if self:
+ if PY2:
+ self_id = self.id # type: ignore
+ else:
+ self_id = self.arg
+
+ node_name = node.__class__.__name__
+ if node_name in ('Index', 'Num', 'Slice', 'Str', 'Subscript'):
+ raise TypeError('%r does not create new variable' % node)
+ elif node_name == 'Name':
+ if self is None or node.id == self_id: # type: ignore
+ return [node.id] # type: ignore
+ else:
+ raise TypeError('The assignment %r is not instance variable' % node)
+ elif node_name in ('Tuple', 'List'):
+ members = []
+ for elt in node.elts: # type: ignore
+ try:
+ members.extend(get_lvar_names(elt, self))
+ except TypeError:
+ pass
+ return members
+ elif node_name == 'Attribute':
+ if node.value.__class__.__name__ == 'Name' and self and node.value.id == self_id: # type: ignore # NOQA
+ # instance variable
+ return ["%s" % get_lvar_names(node.attr, self)[0]] # type: ignore
+ else:
+ raise TypeError('The assignment %r is not instance variable' % node)
+ elif node_name == 'str':
+ return [node] # type: ignore
+ elif node_name == 'Starred':
+ return get_lvar_names(node.value, self) # type: ignore
+ else:
+ raise NotImplementedError('Unexpected node name %r' % node_name)
+
+
+def dedent_docstring(s):
+ # type: (unicode) -> unicode
+ """Remove common leading indentation from docstring."""
+ def dummy():
+ # dummy function to mock `inspect.getdoc`.
+ pass
+
+ dummy.__doc__ = s # type: ignore
+ docstring = inspect.getdoc(dummy)
+ return docstring.lstrip("\r\n").rstrip("\r\n")
+
+
+class Token(object):
+ """Better token wrapper for tokenize module."""
+
+ def __init__(self, kind, value, start, end, source):
+ # type: (int, Any, Tuple[int, int], Tuple[int, int], unicode) -> None # NOQA
+ self.kind = kind
+ self.value = value
+ self.start = start
+ self.end = end
+ self.source = source
+
+ def __eq__(self, other):
+ # type: (Any) -> bool
+ if isinstance(other, int):
+ return self.kind == other
+ elif isinstance(other, str):
+ return self.value == other
+ elif isinstance(other, (list, tuple)):
+ return [self.kind, self.value] == list(other)
+ elif other is None:
+ return False
+ else:
+ raise ValueError('Unknown value: %r' % other)
+
+ def __ne__(self, other):
+ # type: (Any) -> bool
+ return not (self == other)
+
+ def match(self, *conditions):
+ # type: (Any) -> bool
+ return any(self == candidate for candidate in conditions)
+
+ def __repr__(self):
+ # type: () -> str
+ return '<Token kind=%r value=%r>' % (tokenize.tok_name[self.kind],
+ self.value.strip())
+
+
+class TokenProcessor(object):
+ def __init__(self, buffers):
+ # type: (List[unicode]) -> None
+ lines = iter(buffers)
+ self.buffers = buffers
+ self.tokens = tokenize.generate_tokens(lambda: next(lines)) # type: ignore # NOQA
+ self.current = None # type: Token
+ self.previous = None # type: Token
+
+ def get_line(self, lineno):
+ # type: (int) -> unicode
+ """Returns specified line."""
+ return self.buffers[lineno - 1]
+
+ def fetch_token(self):
+ # type: () -> Token
+ """Fetch a next token from source code.
+
+ Returns ``False`` if sequence finished.
+ """
+ try:
+ self.previous = self.current
+ self.current = Token(*next(self.tokens))
+ except StopIteration:
+ self.current = None
+
+ return self.current
+
+ def fetch_until(self, condition):
+ # type: (Any) -> List[Token]
+ """Fetch tokens until specified token appeared.
+
+ .. note:: This also handles parenthesis well.
+ """
+ tokens = []
+ while self.fetch_token():
+ tokens.append(self.current)
+ if self.current == condition:
+ break
+ elif self.current == [OP, '(']:
+ tokens += self.fetch_until([OP, ')'])
+ elif self.current == [OP, '{']:
+ tokens += self.fetch_until([OP, '}'])
+ elif self.current == [OP, '[']:
+ tokens += self.fetch_until([OP, ']'])
+
+ return tokens
+
+
+class AfterCommentParser(TokenProcessor):
+ """Python source code parser to pick up comment after assignment.
+
+ This parser takes a python code starts with assignment statement,
+ and returns the comments for variable if exists.
+ """
+
+ def __init__(self, lines):
+ # type: (List[unicode]) -> None
+ super(AfterCommentParser, self).__init__(lines)
+ self.comment = None # type: unicode
+
+ def fetch_rvalue(self):
+ # type: () -> List[Token]
+ """Fetch right-hand value of assignment."""
+ tokens = []
+ while self.fetch_token():
+ tokens.append(self.current)
+ if self.current == [OP, '(']:
+ tokens += self.fetch_until([OP, ')'])
+ elif self.current == [OP, '{']:
+ tokens += self.fetch_until([OP, '}'])
+ elif self.current == [OP, '[']:
+ tokens += self.fetch_until([OP, ']'])
+ elif self.current == INDENT:
+ tokens += self.fetch_until(DEDENT)
+ elif self.current == [OP, ';']:
+ break
+ elif self.current.kind not in (OP, NAME, NUMBER, STRING):
+ break
+
+ return tokens
+
+ def parse(self):
+ # type: () -> None
+ """Parse the code and obtain comment after assignment."""
+ # skip lvalue (until '=' operator)
+ while self.fetch_token() != [OP, '=']:
+ assert self.current
+
+ # skip rvalue
+ self.fetch_rvalue()
+
+ if self.current == COMMENT:
+ self.comment = self.current.value
+
+
+class VariableCommentPicker(ast.NodeVisitor):
+ """Python source code parser to pick up variable comments."""
+
+ def __init__(self, buffers, encoding):
+ # type: (List[unicode], unicode) -> None
+ self.counter = itertools.count()
+ self.buffers = buffers
+ self.encoding = encoding
+ self.context = [] # type: List[unicode]
+ self.current_classes = [] # type: List[unicode]
+ self.current_function = None # type: ast.FunctionDef
+ self.comments = {} # type: Dict[Tuple[unicode, unicode], unicode]
+ self.previous = None # type: ast.AST
+ self.deforders = {} # type: Dict[unicode, int]
+ super(VariableCommentPicker, self).__init__()
+
+ def add_entry(self, name):
+ # type: (unicode) -> None
+ if self.current_function:
+ if self.current_classes and self.context[-1] == "__init__":
+ # store variable comments inside __init__ method of classes
+ definition = self.context[:-1] + [name]
+ else:
+ return
+ else:
+ definition = self.context + [name]
+
+ self.deforders[".".join(definition)] = next(self.counter)
+
+ def add_variable_comment(self, name, comment):
+ # type: (unicode, unicode) -> None
+ if self.current_function:
+ if self.current_classes and self.context[-1] == "__init__":
+ # store variable comments inside __init__ method of classes
+ context = ".".join(self.context[:-1])
+ else:
+ return
+ else:
+ context = ".".join(self.context)
+
+ self.comments[(context, name)] = comment
+
+ def get_self(self):
+ # type: () -> ast.expr
+ """Returns the name of first argument if in function."""
+ if self.current_function and self.current_function.args.args:
+ return self.current_function.args.args[0]
+ else:
+ return None
+
+ def get_line(self, lineno):
+ # type: (int) -> unicode
+ """Returns specified line."""
+ return self.buffers[lineno - 1]
+
+ def visit(self, node):
+ # type: (ast.AST) -> None
+ """Updates self.previous to ."""
+ super(VariableCommentPicker, self).visit(node)
+ self.previous = node
+
+ def visit_Assign(self, node):
+ # type: (ast.Assign) -> None
+ """Handles Assign node and pick up a variable comment."""
+ try:
+ varnames = sum([get_lvar_names(t, self=self.get_self()) for t in node.targets], [])
+ current_line = self.get_line(node.lineno)
+ except TypeError:
+ return # this assignment is not new definition!
+
+ # check comments after assignment
+ parser = AfterCommentParser([current_line[node.col_offset:]] +
+ self.buffers[node.lineno:])
+ parser.parse()
+ if parser.comment and comment_re.match(parser.comment):
+ for varname in varnames:
+ self.add_variable_comment(varname, comment_re.sub('\\1', parser.comment))
+ self.add_entry(varname)
+ return
+
+ # check comments before assignment
+ if indent_re.match(current_line[:node.col_offset]):
+ comment_lines = []
+ for i in range(node.lineno - 1):
+ before_line = self.get_line(node.lineno - 1 - i)
+ if comment_re.match(before_line):
+ comment_lines.append(comment_re.sub('\\1', before_line))
+ else:
+ break
+
+ if comment_lines:
+ comment = dedent_docstring('\n'.join(reversed(comment_lines)))
+ for varname in varnames:
+ self.add_variable_comment(varname, comment)
+ self.add_entry(varname)
+ return
+
+ # not commented (record deforders only)
+ for varname in varnames:
+ self.add_entry(varname)
+
+ def visit_Expr(self, node):
+ # type: (ast.Expr) -> None
+ """Handles Expr node and pick up a comment if string."""
+ if (isinstance(self.previous, ast.Assign) and isinstance(node.value, ast.Str)):
+ try:
+ varnames = get_lvar_names(self.previous.targets[0], self.get_self())
+ for varname in varnames:
+ if isinstance(node.value.s, text_type):
+ docstring = node.value.s
+ else:
+ docstring = node.value.s.decode(self.encoding or 'utf-8')
+
+ self.add_variable_comment(varname, dedent_docstring(docstring))
+ self.add_entry(varname)
+ except TypeError:
+ pass # this assignment is not new definition!
+
+ def visit_ClassDef(self, node):
+ # type: (ast.ClassDef) -> None
+ """Handles ClassDef node and set context."""
+ self.current_classes.append(node.name)
+ self.add_entry(node.name)
+ self.context.append(node.name)
+ self.previous = node
+ for child in node.body:
+ self.visit(child)
+ self.context.pop()
+ self.current_classes.pop()
+
+ def visit_FunctionDef(self, node):
+ # type: (ast.FunctionDef) -> None
+ """Handles FunctionDef node and set context."""
+ if self.current_function is None:
+ self.add_entry(node.name) # should be called before setting self.current_function
+ self.context.append(node.name)
+ self.current_function = node
+ for child in node.body:
+ self.visit(child)
+ self.context.pop()
+ self.current_function = None
+
+
+class DefinitionFinder(TokenProcessor):
+ def __init__(self, lines):
+ # type: (List[unicode]) -> None
+ super(DefinitionFinder, self).__init__(lines)
+ self.decorator = None # type: Token
+ self.context = [] # type: List[unicode]
+ self.indents = [] # type: List
+ self.definitions = {} # type: Dict[unicode, Tuple[unicode, int, int]]
+
+ def add_definition(self, name, entry):
+ # type: (unicode, Tuple[unicode, int, int]) -> None
+ if self.indents and self.indents[-1][0] == 'def' and entry[0] == 'def':
+ # ignore definition of inner function
+ pass
+ else:
+ self.definitions[name] = entry
+
+ def parse(self):
+ # type: () -> None
+ while True:
+ token = self.fetch_token()
+ if token is None:
+ break
+ elif token == COMMENT:
+ pass
+ elif token == [OP, '@'] and (self.previous is None or
+ self.previous.match(NEWLINE, NL, INDENT, DEDENT)):
+ if self.decorator is None:
+ self.decorator = token
+ elif token.match([NAME, 'class']):
+ self.parse_definition('class')
+ elif token.match([NAME, 'def']):
+ self.parse_definition('def')
+ elif token == INDENT:
+ self.indents.append(('other', None, None))
+ elif token == DEDENT:
+ self.finalize_block()
+
+ def parse_definition(self, typ):
+ # type: (unicode) -> None
+ name = self.fetch_token()
+ self.context.append(name.value)
+ funcname = '.'.join(self.context)
+
+ if self.decorator:
+ start_pos = self.decorator.start[0]
+ self.decorator = None
+ else:
+ start_pos = name.start[0]
+
+ self.fetch_until([OP, ':'])
+ if self.fetch_token().match(COMMENT, NEWLINE):
+ self.fetch_until(INDENT)
+ self.indents.append((typ, funcname, start_pos))
+ else:
+ # one-liner
+ self.add_definition(funcname, (typ, start_pos, name.end[0]))
+ self.context.pop()
+
+ def finalize_block(self):
+ # type: () -> None
+ definition = self.indents.pop()
+ if definition[0] != 'other':
+ typ, funcname, start_pos = definition
+ end_pos = self.current.end[0] - 1
+ while emptyline_re.match(self.get_line(end_pos)):
+ end_pos -= 1
+
+ self.add_definition(funcname, (typ, start_pos, end_pos))
+ self.context.pop()
+
+
+class Parser(object):
+ """Python source code parser to pick up variable comments.
+
+ This is a better wrapper for ``VariableCommentPicker``.
+ """
+
+ def __init__(self, code, encoding='utf-8'):
+ # type: (unicode, unicode) -> None
+ self.code = code
+ self.encoding = encoding
+ self.comments = {} # type: Dict[Tuple[unicode, unicode], unicode]
+ self.deforders = {} # type: Dict[unicode, int]
+ self.definitions = {} # type: Dict[unicode, Tuple[unicode, int, int]]
+
+ def parse(self):
+ # type: () -> None
+ """Parse the source code."""
+ self.parse_comments()
+ self.parse_definition()
+
+ def parse_comments(self):
+ # type: () -> None
+ """Parse the code and pick up comments."""
+ tree = ast.parse(self.code.encode('utf-8'))
+ picker = VariableCommentPicker(self.code.splitlines(True), self.encoding)
+ picker.visit(tree)
+ self.comments = picker.comments
+ self.deforders = picker.deforders
+
+ def parse_definition(self):
+ # type: () -> None
+ """Parse the location of definitions from the code."""
+ parser = DefinitionFinder(self.code.splitlines(True))
+ parser.parse()
+ self.definitions = parser.definitions