summaryrefslogtreecommitdiff
path: root/coverage/codeunit.py
blob: da617913acd17ad89aeea7efb1fbc675b5eb4e14 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
"""Code unit (module) handling for Coverage."""

import os

from coverage.backward import open_python_source, string_class
from coverage.misc import CoverageException, NoSource
from coverage.parser import CodeParser, PythonParser
from coverage.phystokens import source_token_lines, source_encoding


def code_unit_factory(morfs, file_locator, get_plugin=None):
    """Construct a list of CodeUnits from polymorphic inputs.

    `morfs` is a module or a filename, or a list of same.

    `file_locator` is a FileLocator that can help resolve filenames.

    `get_plugin` is a function taking a filename, and returning a plugin
    responsible for the file.  It can also return None if there is no plugin
    claiming the file.

    Returns a list of CodeUnit objects.

    """
    # Be sure we have a list.
    if not isinstance(morfs, (list, tuple)):
        morfs = [morfs]

    code_units = []
    for morf in morfs:
        plugin = None
        if isinstance(morf, string_class) and get_plugin:
            plugin = get_plugin(morf)
        if plugin:
            file_reporter = plugin.file_reporter(morf)
            if file_reporter is None:
                raise CoverageException(
                    "Plugin %r did not provide a file reporter for %r." % (
                        plugin.plugin_name, morf
                    )
                )
        else:
            file_reporter = PythonCodeUnit(morf, file_locator)
        code_units.append(file_reporter)

    return code_units


class CodeUnit(object):
    """Code unit: a filename or module.

    Instance attributes:

    `name` is a human-readable name for this code unit.
    `filename` is the os path from which we can read the source.
    `relative` is a boolean.

    """

    def __init__(self, morf, file_locator):
        self.file_locator = file_locator

        if hasattr(morf, '__file__'):
            f = morf.__file__
        else:
            f = morf
        f = self._adjust_filename(f)
        self.filename = self.file_locator.canonical_filename(f)

        if hasattr(morf, '__name__'):
            n = modname = morf.__name__
            self.relative = True
        else:
            n = os.path.splitext(morf)[0]
            rel = self.file_locator.relative_filename(n)
            if os.path.isabs(n):
                self.relative = (rel != n)
            else:
                self.relative = True
            n = rel
            modname = None
        self.name = n
        self.modname = modname

        self._source = None

    def __repr__(self):
        return "<CodeUnit name=%r filename=%r>" % (self.name, self.filename)

    def _adjust_filename(self, f):
        # TODO: This shouldn't be in the base class, right?
        return f

    # Annoying comparison operators. Py3k wants __lt__ etc, and Py2k needs all
    # of them defined.

    def __lt__(self, other):
        return self.name < other.name
    def __le__(self, other):
        return self.name <= other.name
    def __eq__(self, other):
        return self.name == other.name
    def __ne__(self, other):
        return self.name != other.name
    def __gt__(self, other):
        return self.name > other.name
    def __ge__(self, other):
        return self.name >= other.name

    def flat_rootname(self):
        """A base for a flat filename to correspond to this code unit.

        Useful for writing files about the code where you want all the files in
        the same directory, but need to differentiate same-named files from
        different directories.

        For example, the file a/b/c.py will return 'a_b_c'

        """
        if self.modname:
            return self.modname.replace('.', '_')
        else:
            root = os.path.splitdrive(self.name)[1]
            return root.replace('\\', '_').replace('/', '_').replace('.', '_')

    def source(self):
        if self._source is None:
            self._source = self.get_source()
        return self._source

    def get_source(self):
        """Return the source code, as a string."""
        if os.path.exists(self.filename):
            # A regular text file: open it.
            with open_python_source(self.filename) as f:
                return f.read()

        # Maybe it's in a zip file?
        source = self.file_locator.get_zip_data(self.filename)
        if source is not None:
            return source

        # Couldn't find source.
        raise CoverageException(
            "No source for code '%s'." % self.filename
            )

    def source_token_lines(self):
        """Return the 'tokenized' text for the code."""
        for line in self.source().splitlines():
            yield [('txt', line)]

    def should_be_python(self):
        """Does it seem like this file should contain Python?

        This is used to decide if a file reported as part of the execution of
        a program was really likely to have contained Python in the first
        place.
        """
        return False

    def get_parser(self, exclude=None):
        raise NotImplementedError


class PythonCodeUnit(CodeUnit):
    """Represents a Python file."""

    def _adjust_filename(self, fname):
        # .pyc files should always refer to a .py instead.
        if fname.endswith(('.pyc', '.pyo')):
            fname = fname[:-1]
        elif fname.endswith('$py.class'): # Jython
            fname = fname[:-9] + ".py"
        return fname

    def get_parser(self, exclude=None):
        actual_filename, source = self._find_source(self.filename)
        return PythonParser(
            text=source, filename=actual_filename, exclude=exclude,
        )

    def _find_source(self, filename):
        """Find the source for `filename`.

        Returns two values: the actual filename, and the source.

        The source returned depends on which of these cases holds:

            * The filename seems to be a non-source file: returns None

            * The filename is a source file, and actually exists: returns None.

            * The filename is a source file, and is in a zip file or egg:
              returns the source.

            * The filename is a source file, but couldn't be found: raises
              `NoSource`.

        """
        source = None

        base, ext = os.path.splitext(filename)
        TRY_EXTS = {
            '.py':  ['.py', '.pyw'],
            '.pyw': ['.pyw'],
        }
        try_exts = TRY_EXTS.get(ext)
        if not try_exts:
            return filename, None

        for try_ext in try_exts:
            try_filename = base + try_ext
            if os.path.exists(try_filename):
                return try_filename, None
            source = self.file_locator.get_zip_data(try_filename)
            if source:
                return try_filename, source
        raise NoSource("No source for code: '%s'" % filename)

    def should_be_python(self):
        """Does it seem like this file should contain Python?

        This is used to decide if a file reported as part of the execution of
        a program was really likely to have contained Python in the first
        place.

        """
        # Get the file extension.
        _, ext = os.path.splitext(self.filename)

        # Anything named *.py* should be Python.
        if ext.startswith('.py'):
            return True
        # A file with no extension should be Python.
        if not ext:
            return True
        # Everything else is probably not Python.
        return False

    def source_token_lines(self):
        return source_token_lines(self.source())

    def source_encoding(self):
        return source_encoding(self.source())


class MakoParser(CodeParser):
    def __init__(self, metadata):
        self.metadata = metadata

    def parse_source(self):
        """Returns executable_line_numbers, excluded_line_numbers"""
        executable = set(self.metadata['line_map'].values())
        return executable, set()

    def translate_lines(self, lines):
        tlines = set()
        for l in lines:
            try:
                tlines.add(self.metadata['full_line_map'][l])
            except IndexError:
                pass
        return tlines


class MakoCodeUnit(CodeUnit):
    def __init__(self, *args, **kwargs):
        super(MakoCodeUnit, self).__init__(*args, **kwargs)
        from mako.template import ModuleInfo
        py_source = open(self.filename).read()
        self.metadata = ModuleInfo.get_module_source_metadata(py_source, full_line_map=True)

    def get_source(self):
        return open(self.metadata['filename']).read()

    def get_parser(self, exclude=None):
        return MakoParser(self.metadata)

    def source_encoding(self):
        return self.metadata['source_encoding']


class DjangoCodeUnit(CodeUnit):
    def get_source(self):
        with open(self.filename) as f:
            return f.read()

    def get_parser(self, exclude=None):
        return DjangoParser(self.filename)

    def source_encoding(self):
        return "utf8"


class DjangoParser(CodeParser):
    def __init__(self, filename):
        self.filename = filename

    def parse_source(self):
        with open(self.filename) as f:
            source = f.read()
        executable = set(range(1, len(source.splitlines())+1))
        return executable, set()