sphinx/pycode/__init__.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139

# -*- coding: utf-8 -*-
"""
    sphinx.pycode
    ~~~~~~~~~~~~~

    Utilities parsing and analyzing Python code.

    :copyright: Copyright 2007-2018 by the Sphinx team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""
from __future__ import print_function

from six import iteritems, BytesIO, StringIO

from sphinx.errors import PycodeError
from sphinx.pycode.parser import Parser
from sphinx.util import get_module_source, detect_encoding

if False:
    # For type annotation
    from typing import Any, Dict, IO, List, Tuple  # NOQA


class ModuleAnalyzer(object):
    # cache for analyzer objects -- caches both by module and file name
    cache = {}  # type: Dict[Tuple[unicode, unicode], Any]

    @classmethod
    def for_string(cls, string, modname, srcname='<string>'):
        if isinstance(string, bytes):
            return cls(BytesIO(string), modname, srcname)
        return cls(StringIO(string), modname, srcname, decoded=True)

    @classmethod
    def for_file(cls, filename, modname):
        if ('file', filename) in cls.cache:
            return cls.cache['file', filename]
        try:
            with open(filename, 'rb') as f:
                obj = cls(f, modname, filename)
                cls.cache['file', filename] = obj
        except Exception as err:
            raise PycodeError('error opening %r' % filename, err)
        return obj

    @classmethod
    def for_module(cls, modname):
        if ('module', modname) in cls.cache:
            entry = cls.cache['module', modname]
            if isinstance(entry, PycodeError):
                raise entry
            return entry

        try:
            type, source = get_module_source(modname)
            if type == 'string':
                obj = cls.for_string(source, modname)
            else:
                obj = cls.for_file(source, modname)
        except PycodeError as err:
            cls.cache['module', modname] = err
            raise
        cls.cache['module', modname] = obj
        return obj

    def __init__(self, source, modname, srcname, decoded=False):
        # type: (IO, unicode, unicode, bool) -> None
        self.modname = modname  # name of the module
        self.srcname = srcname  # name of the source file

        # cache the source code as well
        pos = source.tell()
        if not decoded:
            self.encoding = detect_encoding(source.readline)
            source.seek(pos)
            self.code = source.read().decode(self.encoding)
        else:
            self.encoding = None
            self.code = source.read()

        # will be filled by parse()
        self.attr_docs = None   # type: Dict[Tuple[unicode, unicode], List[unicode]]
        self.tagorder = None    # type: Dict[unicode, int]
        self.tags = None        # type: Dict[unicode, Tuple[unicode, int, int]]

    def parse(self):
        # type: () -> None
        """Parse the source code."""
        try:
            parser = Parser(self.code, self.encoding)
            parser.parse()

            self.attr_docs = {}
            for (scope, comment) in iteritems(parser.comments):
                if comment:
                    self.attr_docs[scope] = comment.splitlines() + ['']
                else:
                    self.attr_docs[scope] = ['']

            self.tags = parser.definitions
            self.tagorder = parser.deforders
        except Exception as exc:
            raise PycodeError('parsing %r failed: %r' % (self.srcname, exc))

    def find_attr_docs(self):
        # type: () -> Dict[Tuple[unicode, unicode], List[unicode]]
        """Find class and module-level attributes and their documentation."""
        if self.attr_docs is None:
            self.parse()

        return self.attr_docs

    def find_tags(self):
        # type: () -> Dict[unicode, Tuple[unicode, int, int]]
        """Find class, function and method definitions and their location."""
        if self.tags is None:
            self.parse()

        return self.tags


if __name__ == '__main__':
    import time
    import pprint
    x0 = time.time()
    # ma = ModuleAnalyzer.for_file(__file__.rstrip('c'), 'sphinx.builders.html')
    ma = ModuleAnalyzer.for_file('sphinx/environment.py',
                                 'sphinx.environment')
    ma.tokenize()
    x1 = time.time()
    ma.parse()
    x2 = time.time()
    # for (ns, name), doc in iteritems(ma.find_attr_docs()):
    #     print '>>', ns, name
    #     print '\n'.join(doc)
    pprint.pprint(ma.find_tags())
    x3 = time.time()
    # print nodes.nice_repr(ma.parsetree, number2name)
    print("tokenizing %.4f, parsing %.4f, finding %.4f" % (x1 - x0, x2 - x1, x3 - x2))