diff options
author | Ned Batchelder <ned@nedbatchelder.com> | 2015-07-18 14:09:54 -0400 |
---|---|---|
committer | Ned Batchelder <ned@nedbatchelder.com> | 2015-07-18 14:09:54 -0400 |
commit | aa9af88224fac4d25d5bf1d2f4757b8ffd2c22ee (patch) | |
tree | 3697e6162d46fdb6e17b2c6d694314dff19280ec /coverage | |
parent | ed2266434af1582cd94c1b89f7172bad62f88745 (diff) | |
download | python-coveragepy-git-aa9af88224fac4d25d5bf1d2f4757b8ffd2c22ee.tar.gz |
Refactor collector->data; data has only one of lines and arcs.
Now the collector communicates directly with the data, and control is less
involved. In the data, when measuring arcs, only arcs are stored. Lines
are calculated as needed. This saves space in the data file, and is faster.
Diffstat (limited to 'coverage')
-rw-r--r-- | coverage/collector.py | 52 | ||||
-rw-r--r-- | coverage/control.py | 10 | ||||
-rw-r--r-- | coverage/data.py | 63 |
3 files changed, 52 insertions, 73 deletions
diff --git a/coverage/collector.py b/coverage/collector.py index 57c35605..eec8703e 100644 --- a/coverage/collector.py +++ b/coverage/collector.py @@ -3,6 +3,8 @@ import os, sys from coverage import env +from coverage.backward import iitems +from coverage.files import abs_file from coverage.misc import CoverageException from coverage.pytracer import PyTracer @@ -20,7 +22,7 @@ except ImportError: # exception here causes all sorts of other noise in unittest. sys.stderr.write( "*** COVERAGE_TEST_TRACER is 'c' but can't import CTracer!\n" - ) + ) sys.exit(1) CTracer = None @@ -46,7 +48,8 @@ class Collector(object): # the top, and resumed when they become the top again. _collectors = [] - def __init__(self, + def __init__( + self, should_trace, check_include, timid, branch, warn, concurrency, ): """Create a collector. @@ -289,45 +292,20 @@ class Collector(object): else: self._start_tracer() - def get_line_data(self): - """Return the line data collected. + def save_data(self, covdata): + """Save the collected data to a `CoverageData`. - Data is { filename: { lineno: None, ...}, ...} + Also resets the collector. """ - if self.branch: - # If we were measuring branches, then we have to re-build the dict - # to show line data. We'll use the first lines of all the arcs, - # if they are actual lines. We don't need the second lines, because - # the second lines will also be first lines, sometimes to exits. - line_data = {} - for f, arcs in self.data.items(): - line_data[f] = dict( - (l1, None) for l1, _ in arcs.keys() if l1 > 0 - ) - return line_data - else: - return self.data - - def get_arc_data(self): - """Return the arc data collected. - - Data is { filename: { (l1, l2): None, ...}, ...} + def abs_file_dict(d): + """Return a dict like d, but with keys modified by `abs_file`.""" + return dict((abs_file(k), v) for k, v in iitems(d)) - Note that no data is collected or returned if the Collector wasn't - created with `branch` true. - - """ if self.branch: - return self.data + covdata.add_arcs(abs_file_dict(self.data)) else: - return {} - - def get_plugin_data(self): - """Return the mapping of source files to plugins. + covdata.add_lines(abs_file_dict(self.data)) + covdata.add_plugins(abs_file_dict(self.plugin_data)) - Returns: - dict: { filename: plugin_name, ... } - - """ - return self.plugin_data + self.reset() diff --git a/coverage/control.py b/coverage/control.py index 7c14e1b0..3f6f5aca 100644 --- a/coverage/control.py +++ b/coverage/control.py @@ -744,15 +744,7 @@ class Coverage(object): if not self._measured: return - def abs_file_dict(d): - """Return a dict like d, but with keys modified by `abs_file`.""" - return dict((abs_file(k), v) for k,v in iitems(d)) - - # TODO: seems like this parallel structure is getting kinda old... - self.data.add_lines(abs_file_dict(self.collector.get_line_data())) - self.data.add_arcs(abs_file_dict(self.collector.get_arc_data())) - self.data.add_plugins(abs_file_dict(self.collector.get_plugin_data())) - self.collector.reset() + self.collector.save_data(self.data) # If there are still entries in the source_pkgs list, then we never # encountered those packages. diff --git a/coverage/data.py b/coverage/data.py index db205811..adacaecc 100644 --- a/coverage/data.py +++ b/coverage/data.py @@ -8,7 +8,7 @@ import socket from coverage.backward import iitems, pickle from coverage.debug import _TEST_NAME_FILE from coverage.files import PathAliases -from coverage.misc import file_be_gone +from coverage.misc import CoverageException, file_be_gone class CoverageData(object): @@ -18,12 +18,12 @@ class CoverageData(object): * collector: a string identifying the collecting software - * lines: a dict mapping filenames to sorted lists of line numbers + * lines: a dict mapping filenames to lists of line numbers executed:: { 'file1': [17,23,45], 'file2': [1,2,3], ... } - * arcs: a dict mapping filenames to sorted lists of line number pairs:: + * arcs: a dict mapping filenames to lists of line number pairs:: { 'file1': [(17,23), (17,25), (25,26)], ... } @@ -31,6 +31,11 @@ class CoverageData(object): { 'file1': "django.coverage", ... } + Only one of `lines` or `arcs` will be present: with branch coverage, data + is stored as arcs. Without branch coverage, it is stored as lines. The + line data is easily recovered from the arcs: it is all the first elements + of the pairs that are greater than zero. + """ def __init__(self, collector=None, debug=None): @@ -82,7 +87,12 @@ class CoverageData(object): def lines(self, filename): """Get the list of lines executed for a file.""" - return list((self._lines.get(filename) or {}).keys()) + if self._arcs: + arcs = self._arcs.get(filename) or {} + return [s for s, __ in arcs if s > 0] + else: + lines = self._lines.get(filename) or {} + return list(lines) def arcs(self, filename): """Get the list of arcs executed for a file.""" @@ -107,30 +117,29 @@ class CoverageData(object): Should only be used on an empty CoverageData object. """ - try: - data = pickle.load(file_obj) - if isinstance(data, dict): - # Unpack the 'lines' item. - self._lines = dict([ - (f, dict.fromkeys(linenos, None)) - for f, linenos in iitems(data.get('lines', {})) - ]) - # Unpack the 'arcs' item. - self._arcs = dict([ - (f, dict.fromkeys(arcpairs, None)) - for f, arcpairs in iitems(data.get('arcs', {})) - ]) - self._plugins = data.get('plugins', {}) - except Exception: - # TODO: this used to handle file-doesnt-exist problems. Do we still need it? - pass + data = pickle.load(file_obj) + + # Unpack the 'lines' item. + self._lines = dict([ + (f, dict.fromkeys(linenos, None)) + for f, linenos in iitems(data.get('lines', {})) + ]) + # Unpack the 'arcs' item. + self._arcs = dict([ + (f, dict.fromkeys(arcpairs, None)) + for f, arcpairs in iitems(data.get('arcs', {})) + ]) + self._plugins = data.get('plugins', {}) def read_file(self, filename): """Read the coverage data from `filename`.""" if self._debug and self._debug.should('dataio'): self._debug.write("Reading data from %r" % (filename,)) - with open(filename, "rb") as f: - self.read(f) + try: + with open(filename, "rb") as f: + self.read(f) + except Exception as exc: + raise CoverageException("Couldn't read data from '%s': %s" % (filename, exc)) def write(self, file_obj): """Write the coverage data to `file_obj`.""" @@ -202,11 +211,11 @@ class CoverageData(object): def touch_file(self, filename): """Ensure that `filename` appears in the data, empty if needed.""" - self._lines.setdefault(filename, {}) + (self._arcs or self._lines).setdefault(filename, {}) def measured_files(self): """A list of all files that had been measured.""" - return list(self._lines.keys()) + return list(self._arcs or self._lines) def add_to_hash(self, filename, hasher): """Contribute `filename`'s data to the Md5Hash `hasher`.""" @@ -231,8 +240,8 @@ class CoverageData(object): filename_fn = lambda f: f else: filename_fn = os.path.basename - for filename, lines in iitems(self._lines): - summ[filename_fn(filename)] = len(lines) + for filename in self.measured_files(): + summ[filename_fn(filename)] = len(self.lines(filename)) return summ def __nonzero__(self): |