Refactor collector->data; data has only one of lines and arcs.

Now the collector communicates directly with the data, and control is less involved. In the data, when measuring arcs, only arcs are stored. Lines are calculated as needed. This saves space in the data file, and is faster.
author: Ned Batchelder <ned@nedbatchelder.com> 2015-07-18 14:09:54 -0400
committer: Ned Batchelder <ned@nedbatchelder.com> 2015-07-18 14:09:54 -0400
commit: f749a4bd0a01b452e3378cf85c8e760be9bb1ade (patch)
tree: 6bc6929b8a9ecf913767131a80f5207b3ac61e51 /coverage/data.py
parent: efc976cfe783bff8af548e60c6146b489dde96e4 (diff)
download: python-coveragepy-f749a4bd0a01b452e3378cf85c8e760be9bb1ade.tar.gz
1 files changed, 36 insertions, 27 deletions
diff --git a/coverage/data.py b/coverage/data.py
index db20581..adacaec 100644
--- a/coverage/data.py
+++ b/coverage/data.py
@@ -8,7 +8,7 @@ import socket
 from coverage.backward import iitems, pickle
 from coverage.debug import _TEST_NAME_FILE
 from coverage.files import PathAliases
-from coverage.misc import file_be_gone
+from coverage.misc import CoverageException, file_be_gone
 
 
 class CoverageData(object):
@@ -18,12 +18,12 @@ class CoverageData(object):
 
         * collector: a string identifying the collecting software
 
-        * lines: a dict mapping filenames to sorted lists of line numbers
+        * lines: a dict mapping filenames to lists of line numbers
           executed::
 
             { 'file1': [17,23,45], 'file2': [1,2,3], ... }
 
-        * arcs: a dict mapping filenames to sorted lists of line number pairs::
+        * arcs: a dict mapping filenames to lists of line number pairs::
 
             { 'file1': [(17,23), (17,25), (25,26)], ... }
 
@@ -31,6 +31,11 @@ class CoverageData(object):
 
             { 'file1': "django.coverage", ... }
 
+    Only one of `lines` or `arcs` will be present: with branch coverage, data
+    is stored as arcs. Without branch coverage, it is stored as lines.  The
+    line data is easily recovered from the arcs: it is all the first elements
+    of the pairs that are greater than zero.
+
     """
 
     def __init__(self, collector=None, debug=None):
@@ -82,7 +87,12 @@ class CoverageData(object):
 
     def lines(self, filename):
         """Get the list of lines executed for a file."""
-        return list((self._lines.get(filename) or {}).keys())
+        if self._arcs:
+            arcs = self._arcs.get(filename) or {}
+            return [s for s, __ in arcs if s > 0]
+        else:
+            lines = self._lines.get(filename) or {}
+            return list(lines)
 
     def arcs(self, filename):
         """Get the list of arcs executed for a file."""
@@ -107,30 +117,29 @@ class CoverageData(object):
         Should only be used on an empty CoverageData object.
 
         """
-        try:
-            data = pickle.load(file_obj)
-            if isinstance(data, dict):
-                # Unpack the 'lines' item.
-                self._lines = dict([
-                    (f, dict.fromkeys(linenos, None))
-                    for f, linenos in iitems(data.get('lines', {}))
-                ])
-                # Unpack the 'arcs' item.
-                self._arcs = dict([
-                    (f, dict.fromkeys(arcpairs, None))
-                    for f, arcpairs in iitems(data.get('arcs', {}))
-                ])
-                self._plugins = data.get('plugins', {})
-        except Exception:
-            # TODO: this used to handle file-doesnt-exist problems.  Do we still need it?
-            pass
+        data = pickle.load(file_obj)
+
+        # Unpack the 'lines' item.
+        self._lines = dict([
+            (f, dict.fromkeys(linenos, None))
+            for f, linenos in iitems(data.get('lines', {}))
+        ])
+        # Unpack the 'arcs' item.
+        self._arcs = dict([
+            (f, dict.fromkeys(arcpairs, None))
+            for f, arcpairs in iitems(data.get('arcs', {}))
+        ])
+        self._plugins = data.get('plugins', {})
 
     def read_file(self, filename):
         """Read the coverage data from `filename`."""
         if self._debug and self._debug.should('dataio'):
             self._debug.write("Reading data from %r" % (filename,))
-        with open(filename, "rb") as f:
-            self.read(f)
+        try:
+            with open(filename, "rb") as f:
+                self.read(f)
+        except Exception as exc:
+            raise CoverageException("Couldn't read data from '%s': %s" % (filename, exc))
 
     def write(self, file_obj):
         """Write the coverage data to `file_obj`."""
@@ -202,11 +211,11 @@ class CoverageData(object):
 
     def touch_file(self, filename):
         """Ensure that `filename` appears in the data, empty if needed."""
-        self._lines.setdefault(filename, {})
+        (self._arcs or self._lines).setdefault(filename, {})
 
     def measured_files(self):
         """A list of all files that had been measured."""
-        return list(self._lines.keys())
+        return list(self._arcs or self._lines)
 
     def add_to_hash(self, filename, hasher):
         """Contribute `filename`'s data to the Md5Hash `hasher`."""
@@ -231,8 +240,8 @@ class CoverageData(object):
             filename_fn = lambda f: f
         else:
             filename_fn = os.path.basename
-        for filename, lines in iitems(self._lines):
-            summ[filename_fn(filename)] = len(lines)
+        for filename in self.measured_files():
+            summ[filename_fn(filename)] = len(self.lines(filename))
         return summ
 
     def __nonzero__(self):
author	Ned Batchelder <ned@nedbatchelder.com>	2015-07-18 14:09:54 -0400
committer	Ned Batchelder <ned@nedbatchelder.com>	2015-07-18 14:09:54 -0400
commit	f749a4bd0a01b452e3378cf85c8e760be9bb1ade (patch)
tree	6bc6929b8a9ecf913767131a80f5207b3ac61e51 /coverage/data.py
parent	efc976cfe783bff8af548e60c6146b489dde96e4 (diff)
download	python-coveragepy-f749a4bd0a01b452e3378cf85c8e760be9bb1ade.tar.gz