Merged in twexler/coverage.py (pull request #58)

Don't use SourceForge anymore for Cobertura DTD
author: Ned Batchelder <nedbat@gmail.com> 2015-07-21 21:06:05 -0400
committer: Ned Batchelder <nedbat@gmail.com> 2015-07-21 21:06:05 -0400
commit: cf43af31d35ba527e778267c14e51c56c9c3a773 (patch)
tree: 75d7a4c751862f64f79474d41815ef11d224486a /coverage
parent: 130f0dcdff98a1f947784f6989d1984b73b28335 (diff)
parent: a591430903ed9108c8cb50369be0d9d9c1a0b200 (diff)
download: python-coveragepy-cf43af31d35ba527e778267c14e51c56c9c3a773.tar.gz
5 files changed, 202 insertions, 140 deletions
diff --git a/coverage/backunittest.py b/coverage/backunittest.py
index 95b6fcc..5aff043 100644
--- a/coverage/backunittest.py
+++ b/coverage/backunittest.py
@@ -22,10 +22,13 @@ class TestCase(unittest.TestCase):
     """
     # pylint: disable=missing-docstring
 
-    if not unittest_has('assertCountEqual'):
-        def assertCountEqual(self, s1, s2):
-            """Assert these have the same elements, regardless of order."""
-            self.assertEqual(set(s1), set(s2))
+    # Many Pythons have this method defined.  But PyPy3 has a bug with it
+    # somehow (https://bitbucket.org/pypy/pypy/issues/2092), so always use our
+    # own implementation that works everywhere, at least for the ways we're
+    # calling it.
+    def assertCountEqual(self, s1, s2):
+        """Assert these have the same elements, regardless of order."""
+        self.assertEqual(sorted(s1), sorted(s2))
 
     if not unittest_has('assertRaisesRegex'):
         def assertRaisesRegex(self, *args, **kwargs):
diff --git a/coverage/backward.py b/coverage/backward.py
index 58d9cfe..46c70fb 100644
--- a/coverage/backward.py
+++ b/coverage/backward.py
@@ -28,12 +28,6 @@ try:
 except NameError:
     unicode_class = str
 
-# Where do pickles come from?
-try:
-    import cPickle as pickle
-except ImportError:
-    import pickle
-
 # range or xrange?
 try:
     range = xrange
diff --git a/coverage/collector.py b/coverage/collector.py
index eec8703..8ea0427 100644
--- a/coverage/collector.py
+++ b/coverage/collector.py
@@ -303,9 +303,9 @@ class Collector(object):
             return dict((abs_file(k), v) for k, v in iitems(d))
 
         if self.branch:
-            covdata.add_arcs(abs_file_dict(self.data))
+            covdata.set_arcs(abs_file_dict(self.data))
         else:
-            covdata.add_lines(abs_file_dict(self.data))
-        covdata.add_plugins(abs_file_dict(self.plugin_data))
+            covdata.set_lines(abs_file_dict(self.data))
+        covdata.set_plugins(abs_file_dict(self.plugin_data))
 
         self.reset()
diff --git a/coverage/control.py b/coverage/control.py
index 3f6f5ac..e1931a5 100644
--- a/coverage/control.py
+++ b/coverage/control.py
@@ -268,9 +268,7 @@ class Coverage(object):
         # Create the data file.  We do this at construction time so that the
         # data file will be written into the directory where the process
         # started rather than wherever the process eventually chdir'd to.
-        self.data = CoverageData(
-            collector="coverage v%s" % __version__,
-            )
+        self.data = CoverageData(debug=self.debug)
         self.data_files = CoverageDataFiles(basename=self.config.data_file)
 
         # The dirs for files considered "installed with the interpreter".
diff --git a/coverage/data.py b/coverage/data.py
index 9a8a397..68b0212 100644
--- a/coverage/data.py
+++ b/coverage/data.py
@@ -1,11 +1,12 @@
 """Coverage data for Coverage."""
 
 import glob
+import json
 import os
 import random
 import socket
 
-from coverage.backward import iitems, pickle
+from coverage.backward import iitems
 from coverage.debug import _TEST_NAME_FILE
 from coverage.files import PathAliases
 from coverage.misc import CoverageException, file_be_gone
@@ -14,76 +15,116 @@ from coverage.misc import CoverageException, file_be_gone
 class CoverageData(object):
     """Manages collected coverage data, including file storage.
 
-    The data file format is a pickled dict, with these keys:
+    This class is the public supported API to coverage.py's data.
 
-        * collector: a string identifying the collecting software
+    .. note::
 
-        * lines: a dict mapping filenames to lists of line numbers
-          executed::
+        The file format is not documented or guaranteed.  It will change in
+        the future, in possibly complicated ways.  Use this API to avoid
+        disruption.
 
-            { 'file1': [17,23,45], 'file2': [1,2,3], ... }
+    There are three kinds of data that can be collected:
 
-        * arcs: a dict mapping filenames to lists of line number pairs::
+    * **lines**: the line numbers of source lines that were executed.
+      These are always available.
 
-            { 'file1': [(17,23), (17,25), (25,26)], ... }
+    * **arcs**: pairs of source and destination line numbers for transitions
+      between source lines.  These are only available if branch coverage was
+      used.
 
-        * plugins: a dict mapping filenames to plugin names::
+    * **plugin names**: the module names of the plugin that handled each file
+      in the data.
 
-            { 'file1': "django.coverage", ... }
 
-    Only one of `lines` or `arcs` will be present: with branch coverage, data
-    is stored as arcs. Without branch coverage, it is stored as lines.  The
-    line data is easily recovered from the arcs: it is all the first elements
-    of the pairs that are greater than zero.
+    To read a coverage.py data file, use :meth:`read_file`, or :meth:`read` if
+    you have an already-opened file.  You can then access the line, arc, or
+    plugin data with :meth:`lines`, :meth:`arcs`, or :meth:`plugin_name`.
+
+    The :meth:`has_arcs` method indicates whether arc data is available.  You
+    can get a list of the files in the data with :meth:`measured_files`.
+    A summary of the line data is available from :meth:`line_counts`.  As with
+    most Python containers, you can determine if there is any data at all by
+    using this object as a boolean value.
+
+
+    Most data files will be created by coverage.py itself, but you can use
+    methods here to create data files if you like.  The :meth:`set_lines`,
+    :meth:`set_arcs`, and :meth:`set_plugins` methods add data, in ways that
+    are convenient for coverage.py.  To add a file without any measured data,
+    use :meth:`touch_file`.
+
+    You write to a named file with :meth:`write_file`, or to an already opened
+    file with :meth:`write`.
+
+    You can clear the data in memory with :meth:`erase`.  Two data collections
+    can be combined by using :meth:`update` on one `CoverageData`, passing it
+    the other.
 
     """
 
-    def __init__(self, collector=None, debug=None):
+    # The data file format is JSON, with these keys:
+    #
+    #     * lines: a dict mapping filenames to lists of line numbers
+    #       executed::
+    #
+    #         { 'file1': [17,23,45], 'file2': [1,2,3], ... }
+    #
+    #     * arcs: a dict mapping filenames to lists of line number pairs::
+    #
+    #         { 'file1': [[17,23], [17,25], [25,26]], ... }
+    #
+    #     * plugins: a dict mapping filenames to plugin names::
+    #
+    #         { 'file1': "django.coverage", ... }
+    #
+    # Only one of `lines` or `arcs` will be present: with branch coverage, data
+    # is stored as arcs. Without branch coverage, it is stored as lines.  The
+    # line data is easily recovered from the arcs: it is all the first elements
+    # of the pairs that are greater than zero.
+
+    def __init__(self, debug=None):
         """Create a CoverageData.
 
-        `collector` is a string describing the coverage measurement software.
-
         `debug` is a `DebugControl` object for writing debug messages.
 
         """
-        self._collector = collector
         self._debug = debug
 
         # A map from canonical Python source file name to a dictionary in
         # which there's an entry for each line number that has been
         # executed:
         #
-        #   {
-        #       'filename1.py': { 12: None, 47: None, ... },
-        #       ...
-        #       }
+        #   { 'filename1.py': [12, 47, 1001], ... }
         #
         self._lines = {}
 
         # A map from canonical Python source file name to a dictionary with an
         # entry for each pair of line numbers forming an arc:
         #
-        #   {
-        #       'filename1.py': { (12,14): None, (47,48): None, ... },
-        #       ...
-        #       }
+        #   { 'filename1.py': [(12,14), (47,48), ... ], ... }
         #
         self._arcs = {}
 
         # A map from canonical source file name to a plugin module name:
         #
-        #   {
-        #       'filename1.py': 'django.coverage',
-        #       ...
-        #       }
+        #   { 'filename1.py': 'django.coverage', ... }
         #
         self._plugins = {}
 
-    def erase(self):
-        """Erase the data in this object."""
-        self._lines = {}
-        self._arcs = {}
-        self._plugins = {}
+    ##
+    ## Reading data
+    ##
+
+    def has_arcs(self):
+        """Does this data have arcs?
+
+        Arc data is only available if branch coverage was used during
+        collection.
+
+        Returns a boolean.
+
+        """
+        return self._has_arcs()
 
     def lines(self, filename):
         """Get the list of lines executed for a file.
@@ -97,7 +138,7 @@ class CoverageData(object):
                 return [s for s, __ in self._arcs[filename] if s > 0]
         else:
             if filename in self._lines:
-                return list(self._lines[filename])
+                return self._lines[filename]
         return None
 
     def arcs(self, filename):
@@ -108,7 +149,7 @@ class CoverageData(object):
 
         """
         if filename in self._arcs:
-            return list((self._arcs[filename]).keys())
+            return self._arcs[filename]
         return None
 
     def plugin_name(self, filename):
@@ -130,32 +171,56 @@ class CoverageData(object):
             return self._plugins.get(filename, "")
         return None
 
+    def measured_files(self):
+        """A list of all files that had been measured."""
+        return list(self._arcs or self._lines)
+
+    def line_counts(self, fullpath=False):
+        """Return a dict summarizing the line coverage data.
+
+        Keys are based on the filenames, and values are the number of executed
+        lines.  If `fullpath` is true, then the keys are the full pathnames of
+        the files, otherwise they are the basenames of the files.
+
+        Returns:
+            dict mapping filenames to counts of lines.
+
+        """
+        summ = {}
+        if fullpath:
+            filename_fn = lambda f: f
+        else:
+            filename_fn = os.path.basename
+        for filename in self.measured_files():
+            summ[filename_fn(filename)] = len(self.lines(filename))
+        return summ
+
+    def __nonzero__(self):
+        return bool(self._lines) or bool(self._arcs)
+
+    __bool__ = __nonzero__
+
     def read(self, file_obj):
         """Read the coverage data from the given file object.
 
         Should only be used on an empty CoverageData object.
 
         """
-        data = pickle.load(file_obj)
-
-        # Unpack the 'lines' item.
-        self._lines = dict([
-            (f, dict.fromkeys(linenos, None))
-            for f, linenos in iitems(data.get('lines', {}))
-        ])
-        # Unpack the 'arcs' item.
-        self._arcs = dict([
-            (f, dict.fromkeys(arcpairs, None))
-            for f, arcpairs in iitems(data.get('arcs', {}))
-        ])
+        data = json.load(file_obj)
+
+        self._lines = data.get('lines', {})
+        self._arcs = dict(
+            (fname, [tuple(pair) for pair in arcs])
+            for fname, arcs in iitems(data.get('arcs', {}))
+        )
         self._plugins = data.get('plugins', {})
 
     def read_file(self, filename):
-        """Read the coverage data from `filename`."""
+        """Read the coverage data from `filename` into this object."""
         if self._debug and self._debug.should('dataio'):
             self._debug.write("Reading data from %r" % (filename,))
         try:
-            with open(filename, "rb") as f:
+            with open(filename, "r") as f:
                 self.read(f)
         except Exception as exc:
             raise CoverageException(
@@ -164,57 +229,43 @@ class CoverageData(object):
                 )
             )
 
-    def write(self, file_obj):
-        """Write the coverage data to `file_obj`."""
-
-        # Create the file data.
-        file_data = {}
-
-        if self._arcs:
-            file_data['arcs'] = dict((f, list(amap.keys())) for f, amap in iitems(self._arcs))
-        else:
-            file_data['lines'] = dict((f, list(lmap.keys())) for f, lmap in iitems(self._lines))
-
-        if self._collector:
-            file_data['collector'] = self._collector
+    ##
+    ## Writing data
+    ##
 
-        file_data['plugins'] = self._plugins
-
-        # Write the pickle to the file.
-        pickle.dump(file_data, file_obj, 2)
+    def set_lines(self, line_data):
+        """Add executed line data.
 
-    def write_file(self, filename):
-        """Write the coverage data to `filename`."""
-        if self._debug and self._debug.should('dataio'):
-            self._debug.write("Writing data to %r" % (filename,))
-        with open(filename, 'wb') as fdata:
-            self.write(fdata)
+        `line_data` is a dictionary mapping filenames to dictionaries::
 
-    def add_lines(self, line_data):
-        """Add executed line data.
+            { filename: { lineno: None, ... }, ...}
 
-        `line_data` is { filename: { lineno: None, ... }, ...}
+        Do not call this more than once, it will not update data, it only sets
+        data.
 
         """
-        if self.has_arcs():
+        if self._has_arcs():
             raise CoverageException("Can't add lines to existing arc data")
 
         for filename, linenos in iitems(line_data):
-            self._lines.setdefault(filename, {}).update(linenos)
+            self._lines[filename] = list(linenos)
 
-    def add_arcs(self, arc_data):
+    def set_arcs(self, arc_data):
         """Add measured arc data.
 
         `arc_data` is { filename: { (l1,l2): None, ... }, ...}
 
+        Do not call this more than once, it will not update data, it only sets
+        data.
+
         """
-        if self.has_lines():
+        if self._has_lines():
             raise CoverageException("Can't add arcs to existing line data")
 
         for filename, arcs in iitems(arc_data):
-            self._arcs.setdefault(filename, {}).update(arcs)
+            self._arcs[filename] = list(arcs)
 
-    def add_plugins(self, plugin_data):
+    def set_plugins(self, plugin_data):
         """Add per-file plugin information.
 
         `plugin_data` is { filename: plugin_name, ... }
@@ -235,6 +286,39 @@ class CoverageData(object):
                 )
             self._plugins[filename] = plugin_name
 
+    def touch_file(self, filename):
+        """Ensure that `filename` appears in the data, empty if needed."""
+        (self._arcs or self._lines).setdefault(filename, [])
+
+    def write(self, file_obj):
+        """Write the coverage data to `file_obj`."""
+
+        # Create the file data.
+        file_data = {}
+
+        if self._arcs:
+            file_data['arcs'] = self._arcs
+        else:
+            file_data['lines'] = self._lines
+
+        file_data['plugins'] = self._plugins
+
+        # Write the data to the file.
+        json.dump(file_data, file_obj)
+
+    def write_file(self, filename):
+        """Write the coverage data to `filename`."""
+        if self._debug and self._debug.should('dataio'):
+            self._debug.write("Writing data to %r" % (filename,))
+        with open(filename, 'w') as fdata:
+            self.write(fdata)
+
+    def erase(self):
+        """Erase the data in this object."""
+        self._lines = {}
+        self._arcs = {}
+        self._plugins = {}
+
     def update(self, other_data, aliases=None):
         """Update this data with data from another `CoverageData`.
 
@@ -242,9 +326,9 @@ class CoverageData(object):
         re-map paths to match the local machine's.
 
         """
-        if self.has_lines() and other_data.has_arcs():
+        if self._has_lines() and other_data._has_arcs():
             raise CoverageException("Can't combine arc data with line data")
-        if self.has_arcs() and other_data.has_lines():
+        if self._has_arcs() and other_data._has_lines():
             raise CoverageException("Can't combine line data with arc data")
 
         aliases = aliases or PathAliases()
@@ -266,22 +350,26 @@ class CoverageData(object):
                 )
 
         # _lines: merge dicts.
-        for filename, file_data in iitems(other_data._lines):
+        for filename, file_lines in iitems(other_data._lines):
             filename = aliases.map(filename)
-            self._lines.setdefault(filename, {}).update(file_data)
+            if filename in self._lines:
+                lines = set(self._lines[filename])
+                lines.update(file_lines)
+                file_lines = list(lines)
+            self._lines[filename] = file_lines
 
         # _arcs: merge dicts.
-        for filename, file_data in iitems(other_data._arcs):
+        for filename, file_arcs in iitems(other_data._arcs):
             filename = aliases.map(filename)
-            self._arcs.setdefault(filename, {}).update(file_data)
-
-    def touch_file(self, filename):
-        """Ensure that `filename` appears in the data, empty if needed."""
-        (self._arcs or self._lines).setdefault(filename, {})
+            if filename in self._arcs:
+                arcs = set(self._arcs[filename])
+                arcs.update(file_arcs)
+                file_arcs = list(arcs)
+            self._arcs[filename] = file_arcs
 
-    def measured_files(self):
-        """A list of all files that had been measured."""
-        return list(self._arcs or self._lines)
+    ##
+    ## Miscellaneous
+    ##
 
     def add_to_hash(self, filename, hasher):
         """Contribute `filename`'s data to the `hasher`.
@@ -298,37 +386,16 @@ class CoverageData(object):
             hasher.update(sorted(self.lines(filename)))
         hasher.update(self.plugin_name(filename))
 
-    def line_counts(self, fullpath=False):
-        """Return a dict summarizing the line coverage data.
-
-        Keys are based on the filenames, and values are the number of executed
-        lines.  If `fullpath` is true, then the keys are the full pathnames of
-        the files, otherwise they are the basenames of the files.
-
-        Returns:
-            dict mapping filenames to counts of lines.
+    ##
+    ## Internal
+    ##
 
-        """
-        summ = {}
-        if fullpath:
-            filename_fn = lambda f: f
-        else:
-            filename_fn = os.path.basename
-        for filename in self.measured_files():
-            summ[filename_fn(filename)] = len(self.lines(filename))
-        return summ
-
-    def __nonzero__(self):
-        return bool(self._lines) or bool(self._arcs)
-
-    __bool__ = __nonzero__
-
-    def has_lines(self):
-        """Does this data have lines?"""
+    def _has_lines(self):
+        """Do we have data in self._lines?"""
         return bool(self._lines)
 
-    def has_arcs(self):
-        """Does this data have arcs?"""
+    def _has_arcs(self):
+        """Do we have data in self._arcs?"""
         return bool(self._arcs)
author	Ned Batchelder <nedbat@gmail.com>	2015-07-21 21:06:05 -0400
committer	Ned Batchelder <nedbat@gmail.com>	2015-07-21 21:06:05 -0400
commit	cf43af31d35ba527e778267c14e51c56c9c3a773 (patch)
tree	75d7a4c751862f64f79474d41815ef11d224486a /coverage
parent	130f0dcdff98a1f947784f6989d1984b73b28335 (diff)
parent	a591430903ed9108c8cb50369be0d9d9c1a0b200 (diff)
download	python-coveragepy-cf43af31d35ba527e778267c14e51c56c9c3a773.tar.gz