diff options
Diffstat (limited to 'sphinx/pycode/parser.py')
-rw-r--r-- | sphinx/pycode/parser.py | 471 |
1 files changed, 471 insertions, 0 deletions
diff --git a/sphinx/pycode/parser.py b/sphinx/pycode/parser.py new file mode 100644 index 000000000..7460dcfce --- /dev/null +++ b/sphinx/pycode/parser.py @@ -0,0 +1,471 @@ +# -*- coding: utf-8 -*- +""" + sphinx.pycode.parser + ~~~~~~~~~~~~~~~~~~~~ + + Utilities parsing and analyzing Python code. + + :copyright: Copyright 2007-2017 by the Sphinx team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" +import re +import ast +import inspect +import tokenize +import itertools +from token import NAME, NEWLINE, INDENT, DEDENT, NUMBER, OP, STRING +from tokenize import COMMENT, NL + +from six import PY2, text_type + +if False: + # For type annotation + from typing import Any, Dict, IO, List, Tuple # NOQA + +comment_re = re.compile(u'^\\s*#: ?(.*)\r?\n?$') +indent_re = re.compile(u'^\\s*$') +emptyline_re = re.compile(u'^\\s*(#.*)?$') + + +def get_lvar_names(node, self=None): + # type: (ast.AST, ast.expr) -> List[unicode] + """Convert assignment-AST to variable names. + + This raises `TypeError` if the assignment does not create new variable:: + + ary[0] = 'foo' + dic["bar"] = 'baz' + # => TypeError + """ + if self: + if PY2: + self_id = self.id # type: ignore + else: + self_id = self.arg + + node_name = node.__class__.__name__ + if node_name in ('Index', 'Num', 'Slice', 'Str', 'Subscript'): + raise TypeError('%r does not create new variable' % node) + elif node_name == 'Name': + if self is None or node.id == self_id: # type: ignore + return [node.id] # type: ignore + else: + raise TypeError('The assignment %r is not instance variable' % node) + elif node_name in ('Tuple', 'List'): + members = [] + for elt in node.elts: # type: ignore + try: + members.extend(get_lvar_names(elt, self)) + except TypeError: + pass + return members + elif node_name == 'Attribute': + if node.value.__class__.__name__ == 'Name' and self and node.value.id == self_id: # type: ignore # NOQA + # instance variable + return ["%s" % get_lvar_names(node.attr, self)[0]] # type: ignore + else: + raise TypeError('The assignment %r is not instance variable' % node) + elif node_name == 'str': + return [node] # type: ignore + elif node_name == 'Starred': + return get_lvar_names(node.value, self) # type: ignore + else: + raise NotImplementedError('Unexpected node name %r' % node_name) + + +def dedent_docstring(s): + # type: (unicode) -> unicode + """Remove common leading indentation from docstring.""" + def dummy(): + # dummy function to mock `inspect.getdoc`. + pass + + dummy.__doc__ = s # type: ignore + docstring = inspect.getdoc(dummy) + return docstring.lstrip("\r\n").rstrip("\r\n") + + +class Token(object): + """Better token wrapper for tokenize module.""" + + def __init__(self, kind, value, start, end, source): + # type: (int, Any, Tuple[int, int], Tuple[int, int], unicode) -> None # NOQA + self.kind = kind + self.value = value + self.start = start + self.end = end + self.source = source + + def __eq__(self, other): + # type: (Any) -> bool + if isinstance(other, int): + return self.kind == other + elif isinstance(other, str): + return self.value == other + elif isinstance(other, (list, tuple)): + return [self.kind, self.value] == list(other) + elif other is None: + return False + else: + raise ValueError('Unknown value: %r' % other) + + def __ne__(self, other): + # type: (Any) -> bool + return not (self == other) + + def match(self, *conditions): + # type: (Any) -> bool + return any(self == candidate for candidate in conditions) + + def __repr__(self): + # type: () -> str + return '<Token kind=%r value=%r>' % (tokenize.tok_name[self.kind], + self.value.strip()) + + +class TokenProcessor(object): + def __init__(self, buffers): + # type: (List[unicode]) -> None + lines = iter(buffers) + self.buffers = buffers + self.tokens = tokenize.generate_tokens(lambda: next(lines)) # type: ignore # NOQA + self.current = None # type: Token + self.previous = None # type: Token + + def get_line(self, lineno): + # type: (int) -> unicode + """Returns specified line.""" + return self.buffers[lineno - 1] + + def fetch_token(self): + # type: () -> Token + """Fetch a next token from source code. + + Returns ``False`` if sequence finished. + """ + try: + self.previous = self.current + self.current = Token(*next(self.tokens)) + except StopIteration: + self.current = None + + return self.current + + def fetch_until(self, condition): + # type: (Any) -> List[Token] + """Fetch tokens until specified token appeared. + + .. note:: This also handles parenthesis well. + """ + tokens = [] + while self.fetch_token(): + tokens.append(self.current) + if self.current == condition: + break + elif self.current == [OP, '(']: + tokens += self.fetch_until([OP, ')']) + elif self.current == [OP, '{']: + tokens += self.fetch_until([OP, '}']) + elif self.current == [OP, '[']: + tokens += self.fetch_until([OP, ']']) + + return tokens + + +class AfterCommentParser(TokenProcessor): + """Python source code parser to pick up comment after assignment. + + This parser takes a python code starts with assignment statement, + and returns the comments for variable if exists. + """ + + def __init__(self, lines): + # type: (List[unicode]) -> None + super(AfterCommentParser, self).__init__(lines) + self.comment = None # type: unicode + + def fetch_rvalue(self): + # type: () -> List[Token] + """Fetch right-hand value of assignment.""" + tokens = [] + while self.fetch_token(): + tokens.append(self.current) + if self.current == [OP, '(']: + tokens += self.fetch_until([OP, ')']) + elif self.current == [OP, '{']: + tokens += self.fetch_until([OP, '}']) + elif self.current == [OP, '[']: + tokens += self.fetch_until([OP, ']']) + elif self.current == INDENT: + tokens += self.fetch_until(DEDENT) + elif self.current == [OP, ';']: + break + elif self.current.kind not in (OP, NAME, NUMBER, STRING): + break + + return tokens + + def parse(self): + # type: () -> None + """Parse the code and obtain comment after assignment.""" + # skip lvalue (until '=' operator) + while self.fetch_token() != [OP, '=']: + assert self.current + + # skip rvalue + self.fetch_rvalue() + + if self.current == COMMENT: + self.comment = self.current.value + + +class VariableCommentPicker(ast.NodeVisitor): + """Python source code parser to pick up variable comments.""" + + def __init__(self, buffers, encoding): + # type: (List[unicode], unicode) -> None + self.counter = itertools.count() + self.buffers = buffers + self.encoding = encoding + self.context = [] # type: List[unicode] + self.current_classes = [] # type: List[unicode] + self.current_function = None # type: ast.FunctionDef + self.comments = {} # type: Dict[Tuple[unicode, unicode], unicode] + self.previous = None # type: ast.AST + self.deforders = {} # type: Dict[unicode, int] + super(VariableCommentPicker, self).__init__() + + def add_entry(self, name): + # type: (unicode) -> None + if self.current_function: + if self.current_classes and self.context[-1] == "__init__": + # store variable comments inside __init__ method of classes + definition = self.context[:-1] + [name] + else: + return + else: + definition = self.context + [name] + + self.deforders[".".join(definition)] = next(self.counter) + + def add_variable_comment(self, name, comment): + # type: (unicode, unicode) -> None + if self.current_function: + if self.current_classes and self.context[-1] == "__init__": + # store variable comments inside __init__ method of classes + context = ".".join(self.context[:-1]) + else: + return + else: + context = ".".join(self.context) + + self.comments[(context, name)] = comment + + def get_self(self): + # type: () -> ast.expr + """Returns the name of first argument if in function.""" + if self.current_function and self.current_function.args.args: + return self.current_function.args.args[0] + else: + return None + + def get_line(self, lineno): + # type: (int) -> unicode + """Returns specified line.""" + return self.buffers[lineno - 1] + + def visit(self, node): + # type: (ast.AST) -> None + """Updates self.previous to .""" + super(VariableCommentPicker, self).visit(node) + self.previous = node + + def visit_Assign(self, node): + # type: (ast.Assign) -> None + """Handles Assign node and pick up a variable comment.""" + try: + varnames = sum([get_lvar_names(t, self=self.get_self()) for t in node.targets], []) + current_line = self.get_line(node.lineno) + except TypeError: + return # this assignment is not new definition! + + # check comments after assignment + parser = AfterCommentParser([current_line[node.col_offset:]] + + self.buffers[node.lineno:]) + parser.parse() + if parser.comment and comment_re.match(parser.comment): + for varname in varnames: + self.add_variable_comment(varname, comment_re.sub('\\1', parser.comment)) + self.add_entry(varname) + return + + # check comments before assignment + if indent_re.match(current_line[:node.col_offset]): + comment_lines = [] + for i in range(node.lineno - 1): + before_line = self.get_line(node.lineno - 1 - i) + if comment_re.match(before_line): + comment_lines.append(comment_re.sub('\\1', before_line)) + else: + break + + if comment_lines: + comment = dedent_docstring('\n'.join(reversed(comment_lines))) + for varname in varnames: + self.add_variable_comment(varname, comment) + self.add_entry(varname) + return + + # not commented (record deforders only) + for varname in varnames: + self.add_entry(varname) + + def visit_Expr(self, node): + # type: (ast.Expr) -> None + """Handles Expr node and pick up a comment if string.""" + if (isinstance(self.previous, ast.Assign) and isinstance(node.value, ast.Str)): + try: + varnames = get_lvar_names(self.previous.targets[0], self.get_self()) + for varname in varnames: + if isinstance(node.value.s, text_type): + docstring = node.value.s + else: + docstring = node.value.s.decode(self.encoding or 'utf-8') + + self.add_variable_comment(varname, dedent_docstring(docstring)) + self.add_entry(varname) + except TypeError: + pass # this assignment is not new definition! + + def visit_ClassDef(self, node): + # type: (ast.ClassDef) -> None + """Handles ClassDef node and set context.""" + self.current_classes.append(node.name) + self.add_entry(node.name) + self.context.append(node.name) + self.previous = node + for child in node.body: + self.visit(child) + self.context.pop() + self.current_classes.pop() + + def visit_FunctionDef(self, node): + # type: (ast.FunctionDef) -> None + """Handles FunctionDef node and set context.""" + if self.current_function is None: + self.add_entry(node.name) # should be called before setting self.current_function + self.context.append(node.name) + self.current_function = node + for child in node.body: + self.visit(child) + self.context.pop() + self.current_function = None + + +class DefinitionFinder(TokenProcessor): + def __init__(self, lines): + # type: (List[unicode]) -> None + super(DefinitionFinder, self).__init__(lines) + self.decorator = None # type: Token + self.context = [] # type: List[unicode] + self.indents = [] # type: List + self.definitions = {} # type: Dict[unicode, Tuple[unicode, int, int]] + + def add_definition(self, name, entry): + # type: (unicode, Tuple[unicode, int, int]) -> None + if self.indents and self.indents[-1][0] == 'def' and entry[0] == 'def': + # ignore definition of inner function + pass + else: + self.definitions[name] = entry + + def parse(self): + # type: () -> None + while True: + token = self.fetch_token() + if token is None: + break + elif token == COMMENT: + pass + elif token == [OP, '@'] and (self.previous is None or + self.previous.match(NEWLINE, NL, INDENT, DEDENT)): + if self.decorator is None: + self.decorator = token + elif token.match([NAME, 'class']): + self.parse_definition('class') + elif token.match([NAME, 'def']): + self.parse_definition('def') + elif token == INDENT: + self.indents.append(('other', None, None)) + elif token == DEDENT: + self.finalize_block() + + def parse_definition(self, typ): + # type: (unicode) -> None + name = self.fetch_token() + self.context.append(name.value) + funcname = '.'.join(self.context) + + if self.decorator: + start_pos = self.decorator.start[0] + self.decorator = None + else: + start_pos = name.start[0] + + self.fetch_until([OP, ':']) + if self.fetch_token().match(COMMENT, NEWLINE): + self.fetch_until(INDENT) + self.indents.append((typ, funcname, start_pos)) + else: + # one-liner + self.add_definition(funcname, (typ, start_pos, name.end[0])) + self.context.pop() + + def finalize_block(self): + # type: () -> None + definition = self.indents.pop() + if definition[0] != 'other': + typ, funcname, start_pos = definition + end_pos = self.current.end[0] - 1 + while emptyline_re.match(self.get_line(end_pos)): + end_pos -= 1 + + self.add_definition(funcname, (typ, start_pos, end_pos)) + self.context.pop() + + +class Parser(object): + """Python source code parser to pick up variable comments. + + This is a better wrapper for ``VariableCommentPicker``. + """ + + def __init__(self, code, encoding='utf-8'): + # type: (unicode, unicode) -> None + self.code = code + self.encoding = encoding + self.comments = {} # type: Dict[Tuple[unicode, unicode], unicode] + self.deforders = {} # type: Dict[unicode, int] + self.definitions = {} # type: Dict[unicode, Tuple[unicode, int, int]] + + def parse(self): + # type: () -> None + """Parse the source code.""" + self.parse_comments() + self.parse_definition() + + def parse_comments(self): + # type: () -> None + """Parse the code and pick up comments.""" + tree = ast.parse(self.code.encode('utf-8')) + picker = VariableCommentPicker(self.code.splitlines(True), self.encoding) + picker.visit(tree) + self.comments = picker.comments + self.deforders = picker.deforders + + def parse_definition(self): + # type: () -> None + """Parse the location of definitions from the code.""" + parser = DefinitionFinder(self.code.splitlines(True)) + parser.parse() + self.definitions = parser.definitions |