Split off CoverageDataFiles from CoverageData

author: Ned Batchelder <ned@nedbatchelder.com> 2015-07-11 17:18:36 -0400
committer: Ned Batchelder <ned@nedbatchelder.com> 2015-07-11 17:18:36 -0400
commit: 891a22971d046a2f30b9546315440bb37a37a192 (patch)
tree: 7c8ad3a5b4778e84f4089b1c4b02f5cca106e1c8
parent: 516829b783b225c822e60ad323f7a9767f339544 (diff)
download: python-coveragepy-git-891a22971d046a2f30b9546315440bb37a37a192.tar.gz
3 files changed, 159 insertions, 123 deletions
diff --git a/coverage/control.py b/coverage/control.py
index 56ce7c9f..43f07904 100644
--- a/coverage/control.py
+++ b/coverage/control.py
@@ -14,7 +14,7 @@ from coverage.annotate import AnnotateReporter
 from coverage.backward import string_class, iitems
 from coverage.collector import Collector
 from coverage.config import CoverageConfig
-from coverage.data import CoverageData
+from coverage.data import CoverageData, CoverageDataFiles
 from coverage.debug import DebugControl
 from coverage.files import TreeMatcher, FnmatchMatcher
 from coverage.files import PathAliases, find_python_files, prep_patterns
@@ -171,7 +171,7 @@ class Coverage(object):
         # Other instance attributes, set later.
         self.omit = self.include = self.source = None
         self.source_pkgs = None
-        self.data = self.collector = None
+        self.data = self.data_files = self.collector = None
         self.plugins = None
         self.pylib_dirs = self.cover_dirs = None
         self.data_suffix = self.run_suffix = None
@@ -271,8 +271,10 @@ class Coverage(object):
         # data file will be written into the directory where the process
         # started rather than wherever the process eventually chdir'd to.
         self.data = CoverageData(
-            basename=self.config.data_file,
             collector="coverage v%s" % __version__,
+            )
+        self.data_files = CoverageDataFiles(
+            basename=self.config.data_file,
             debug=self.debug,
             )
 
@@ -610,7 +612,7 @@ class Coverage(object):
         """Load previously-collected coverage data from the data file."""
         self._init()
         self.collector.reset()
-        self.data.read()
+        self.data_files.read(self.data)
 
     def start(self):
         """Start measuring code coverage.
@@ -657,6 +659,7 @@ class Coverage(object):
         self._init()
         self.collector.reset()
         self.data.erase()
+        self.data_files.erase()
 
     def clear_exclude(self, which='exclude'):
         """Clear the exclude list."""
@@ -725,7 +728,7 @@ class Coverage(object):
                 )
 
         self._harvest_data()
-        self.data.write(suffix=data_suffix)
+        self.data_files.write(self.data, suffix=data_suffix)
 
     def combine(self, data_dirs=None):
         """Combine together a number of similarly-named coverage data files.
@@ -747,7 +750,7 @@ class Coverage(object):
                 result = paths[0]
                 for pattern in paths[1:]:
                     aliases.add(pattern, result)
-        self.data.combine_parallel_data(aliases=aliases, data_dirs=data_dirs)
+        self.data_files.combine_parallel_data(self.data, aliases=aliases, data_dirs=data_dirs)
 
     def _harvest_data(self):
         """Get the collected data and reset the collector.
@@ -1046,7 +1049,7 @@ class Coverage(object):
             ('plugins.file_tracers', ft_plugins),
             ('config_files', self.config.attempted_config_files),
             ('configs_read', self.config.config_files),
-            ('data_path', self.data.filename),
+            ('data_path', self.data_files.filename),
             ('python', sys.version.replace('\n', '')),
             ('platform', platform.platform()),
             ('implementation', implementation),
diff --git a/coverage/data.py b/coverage/data.py
index 69e928e2..6592dd6e 100644
--- a/coverage/data.py
+++ b/coverage/data.py
@@ -27,24 +27,16 @@ class CoverageData(object):
 
     """
 
-    def __init__(self, basename=None, collector=None, debug=None):
+    def __init__(self, collector=None, debug=None):
         """Create a CoverageData.
 
-        `basename` is the name of the file to use for storing data.
-
         `collector` is a string describing the coverage measurement software.
 
-        `debug` is a `DebugControl` object for writing debug messages.
 
         """
         self.collector = collector or 'unknown'
         self.debug = debug
 
-        # Construct the filename that will be used for data file storage, if we
-        # ever do any file storage.
-        self.filename = basename or ".coverage"
-        self.filename = os.path.abspath(self.filename)
-
         # A map from canonical Python source file name to a dictionary in
         # which there's an entry for each line number that has been
         # executed:
@@ -74,28 +66,8 @@ class CoverageData(object):
         #       }
         self.plugins = {}
 
-    def read(self):
-        """Read coverage data from the coverage data file (if it exists)."""
-        self.lines, self.arcs, self.plugins = self._read_file(self.filename)
-
-    def write(self, suffix=None):
-        """Write the collected coverage data to a file.
-
-        `suffix` is a suffix to append to the base file name. This can be used
-        for multiple or parallel execution, so that many coverage data files
-        can exist simultaneously.  A dot will be used to join the base name and
-        the suffix.
-
-        """
-        filename = self.filename
-        if suffix:
-            filename += "." + suffix
-        self.write_file(filename)
-
     def erase(self):
-        """Erase the data, both in this object, and from its file storage."""
-        if self.filename:
-            file_be_gone(self.filename)
+        """Erase the data in this object."""
         self.lines = {}
         self.arcs = {}
         self.plugins = {}
@@ -116,102 +88,66 @@ class CoverageData(object):
         """Return the map from filenames to plugin names."""
         return self.plugins
 
-    def write_file(self, filename):
-        """Write the coverage data to `filename`."""
-
-        # Create the file data.
-        data = {}
-
-        data['lines'] = self.line_data()
-        arcs = self.arc_data()
-        if arcs:
-            data['arcs'] = arcs
-
-        if self.collector:
-            data['collector'] = self.collector
-
-        data['plugins'] = self.plugins
-
-        if self.debug and self.debug.should('dataio'):
-            self.debug.write("Writing data to %r" % (filename,))
-
-        # Write the pickle to the file.
-        with open(filename, 'wb') as fdata:
-            pickle.dump(data, fdata, 2)
-
-    def read_file(self, filename):
-        """Read the coverage data from `filename`."""
-        self.lines, self.arcs, self.plugins = self._read_file(filename)
-
-    def _raw_data(self, filename):
-        """Return the raw pickled data from `filename`."""
-        if self.debug and self.debug.should('dataio'):
-            self.debug.write("Reading data from %r" % (filename,))
-        with open(filename, 'rb') as fdata:
-            data = pickle.load(fdata)
-        return data
-
-    def _read_file(self, filename):
+    def read(self, file_obj):
         """Return the stored coverage data from the given file.
 
         Returns three values, suitable for assigning to `self.lines`,
         `self.arcs`, and `self.plugins`.
 
         """
-        lines = {}
-        arcs = {}
-        plugins = {}
+        self.lines = {}
+        self.arcs = {}
+        self.plugins = {}
         try:
-            data = self._raw_data(filename)
+            data = pickle.load(file_obj)
             if isinstance(data, dict):
                 # Unpack the 'lines' item.
-                lines = dict([
+                self.lines = dict([
                     (f, dict.fromkeys(linenos, None))
                         for f, linenos in iitems(data.get('lines', {}))
                     ])
                 # Unpack the 'arcs' item.
-                arcs = dict([
+                self.arcs = dict([
                     (f, dict.fromkeys(arcpairs, None))
                         for f, arcpairs in iitems(data.get('arcs', {}))
                     ])
-                plugins = data.get('plugins', {})
+                self.plugins = data.get('plugins', {})
         except Exception:
+            # TODO: this used to handle file-doesnt-exist problems.  Do we still need it?
             pass
-        return lines, arcs, plugins
 
-    def combine_parallel_data(self, aliases=None, data_dirs=None):
-        """Combine a number of data files together.
+    def read_file(self, filename):
+        """Read the coverage data from `filename`."""
+        if self.debug and self.debug.should('dataio'):
+            self.debug.write("Reading data from %r" % (filename,))
+        with open(filename, "rb") as f:
+            self.read(f)
 
-        Treat `self.filename` as a file prefix, and combine the data from all
-        of the data files starting with that prefix plus a dot.
+    def write(self, file_obj):
+        """Write the coverage data to `file_obj`."""
 
-        If `aliases` is provided, it's a `PathAliases` object that is used to
-        re-map paths to match the local machine's.
+        # Create the file data.
+        file_data = {}
 
-        If `data_dirs` is provided, then it combines the data files from each
-        directory into a single file.
+        file_data['lines'] = self.line_data()
+        arcs = self.arc_data()
+        if arcs:
+            file_data['arcs'] = arcs
 
-        """
-        aliases = aliases or PathAliases()
-        data_dir, local = os.path.split(self.filename)
-        localdot = local + '.*'
+        if self.collector:
+            file_data['collector'] = self.collector
 
-        data_dirs = data_dirs or [data_dir]
-        files_to_combine = []
-        for d in data_dirs:
-            pattern = os.path.join(os.path.abspath(d), localdot)
-            files_to_combine.extend(glob.glob(pattern))
+        file_data['plugins'] = self.plugins
 
-        for f in files_to_combine:
-            new_lines, new_arcs, new_plugins = self._read_file(f)
-            for filename, file_data in iitems(new_lines):
-                filename = aliases.map(filename)
-                self.lines.setdefault(filename, {}).update(file_data)
-            for filename, file_data in iitems(new_arcs):
-                filename = aliases.map(filename)
-                self.arcs.setdefault(filename, {}).update(file_data)
-            self.plugins.update(new_plugins)
-            os.remove(f)
+        # Write the pickle to the file.
+        pickle.dump(file_data, file_obj, 2)
+
+    def write_file(self, filename):
+        """Write the coverage data to `filename`."""
+        if self.debug and self.debug.should('dataio'):
+            self.debug.write("Writing data to %r" % (filename,))
+        with open(filename, 'wb') as fdata:
+            self.write(fdata)
 
     def add_line_data(self, line_data):
         """Add executed line data.
@@ -238,6 +174,21 @@ class CoverageData(object):
         """
         self.plugins.update(plugin_data)
 
+    def update(self, other_data, aliases=None):
+        """
+        If `aliases` is provided, it's a `PathAliases` object that is used to
+        re-map paths to match the local machine's.
+
+        """
+        aliases = aliases or PathAliases()
+        for filename, file_data in iitems(other_data.lines):
+            filename = aliases.map(filename)
+            self.lines.setdefault(filename, {}).update(file_data)
+        for filename, file_data in iitems(other_data.arcs):
+            filename = aliases.map(filename)
+            self.arcs.setdefault(filename, {}).update(file_data)
+        self.plugins.update(other_data.plugins)
+
     def touch_file(self, filename):
         """Ensure that `filename` appears in the data, empty if needed."""
         self.lines.setdefault(filename, {})
@@ -286,6 +237,75 @@ class CoverageData(object):
         return bool(self.arcs)
 
 
+class CoverageDataFiles(object):
+    """Manage the use of coverage data files."""
+
+    def __init__(self, basename=None, debug=None):
+        """
+        `basename` is the name of the file to use for storing data.
+
+        `debug` is a `DebugControl` object for writing debug messages.
+
+        """
+        # Construct the filename that will be used for data file storage, if we
+        # ever do any file storage.
+        self.filename = basename or ".coverage"
+        self.filename = os.path.abspath(self.filename)
+
+        self.debug = debug
+
+    def erase(self):
+        """Erase the data from the file storage."""
+        file_be_gone(self.filename)
+
+    def read(self, data):
+        """Read the coverage data."""
+        if os.path.exists(self.filename):
+            data.read_file(self.filename)
+
+    def write(self, data, suffix=None):
+        """Write the collected coverage data to a file.
+
+        `suffix` is a suffix to append to the base file name. This can be used
+        for multiple or parallel execution, so that many coverage data files
+        can exist simultaneously.  A dot will be used to join the base name and
+        the suffix.
+
+        """
+        filename = self.filename
+        if suffix:
+            filename += "." + suffix
+        data.write_file(filename)
+
+    def combine_parallel_data(self, data, aliases=None, data_dirs=None):
+        """Combine a number of data files together.
+
+        Treat `self.filename` as a file prefix, and combine the data from all
+        of the data files starting with that prefix plus a dot.
+
+        If `aliases` is provided, it's a `PathAliases` object that is used to
+        re-map paths to match the local machine's.
+
+        If `data_dirs` is provided, then it combines the data files from each
+        directory into a single file.
+
+        """
+        data_dir, local = os.path.split(self.filename)
+        localdot = local + '.*'
+
+        data_dirs = data_dirs or [data_dir]
+        files_to_combine = []
+        for d in data_dirs:
+            pattern = os.path.join(os.path.abspath(d), localdot)
+            files_to_combine.extend(glob.glob(pattern))
+
+        for f in files_to_combine:
+            new_data = CoverageData()
+            new_data.read_file(f)
+            data.update(new_data, aliases=aliases)
+            os.remove(f)
+
+
 if __name__ == '__main__':
     # Ad-hoc: show the raw data in a data file.
     import pprint, sys
diff --git a/tests/test_data.py b/tests/test_data.py
index ff8255ba..c89bbea7 100644
--- a/tests/test_data.py
+++ b/tests/test_data.py
@@ -1,9 +1,10 @@
 """Tests for coverage.data"""
 
 import os
+import os.path
 
 from coverage.backward import pickle
-from coverage.data import CoverageData
+from coverage.data import CoverageData, CoverageDataFiles
 from coverage.files import PathAliases, canonical_filename
 
 from tests.coveragetest import CoverageTest
@@ -56,8 +57,12 @@ class DataTest(DataTestHelpers, CoverageTest):
     run_in_temp_dir = False
 
     def test_reading_empty(self):
+        # Make sure there is no .coverage data file here.
+        if os.path.exists(".coverage"):
+            os.remove(".coverage")
+        covdatafiles = CoverageDataFiles()
         covdata = CoverageData()
-        covdata.read()
+        covdatafiles.read(covdata)
         self.assert_summary(covdata, {})
 
     def test_adding_data(self):
@@ -73,44 +78,49 @@ class DataTest(DataTestHelpers, CoverageTest):
         self.assert_measured_files(covdata, MEASURED_FILES_1 + ['x.py'])
 
     def test_writing_and_reading(self):
+        covdatafiles = CoverageDataFiles()
         covdata1 = CoverageData()
         covdata1.add_line_data(DATA_1)
-        covdata1.write()
+        covdatafiles.write(covdata1)
 
         covdata2 = CoverageData()
-        covdata2.read()
+        covdatafiles.read(covdata2)
         self.assert_summary(covdata2, SUMMARY_1)
 
     def test_combining(self):
+        covdatafiles = CoverageDataFiles()
         covdata1 = CoverageData()
         covdata1.add_line_data(DATA_1)
-        covdata1.write(suffix='1')
+        covdatafiles.write(covdata1, suffix='1')
 
         covdata2 = CoverageData()
         covdata2.add_line_data(DATA_2)
-        covdata2.write(suffix='2')
+        covdatafiles.write(covdata2, suffix='2')
 
         covdata3 = CoverageData()
-        covdata3.combine_parallel_data()
+        covdatafiles.combine_parallel_data(covdata3)
         self.assert_summary(covdata3, SUMMARY_1_2)
         self.assert_measured_files(covdata3, MEASURED_FILES_1_2)
 
     def test_erasing(self):
+        covdatafiles = CoverageDataFiles()
         covdata1 = CoverageData()
         covdata1.add_line_data(DATA_1)
-        covdata1.write()
+        covdatafiles.write(covdata1)
         covdata1.erase()
         self.assert_summary(covdata1, {})
+        covdatafiles.erase()
 
         covdata2 = CoverageData()
-        covdata2.read()
+        covdatafiles.read(covdata2)
         self.assert_summary(covdata2, {})
 
     def test_file_format(self):
         # Write with CoverageData, then read the pickle explicitly.
+        covdatafiles = CoverageDataFiles()
         covdata = CoverageData()
         covdata.add_line_data(DATA_1)
-        covdata.write()
+        covdatafiles.write(covdata)
 
         with open(".coverage", 'rb') as fdata:
             data = pickle.load(fdata)
@@ -124,9 +134,10 @@ class DataTest(DataTestHelpers, CoverageTest):
 
     def test_file_format_with_arcs(self):
         # Write with CoverageData, then read the pickle explicitly.
+        covdatafiles = CoverageDataFiles()
         covdata = CoverageData()
         covdata.add_arc_data(ARC_DATA_3)
-        covdata.write()
+        covdatafiles.write(covdata)
 
         with open(".coverage", 'rb') as fdata:
             data = pickle.load(fdata)
@@ -137,25 +148,26 @@ class DataTest(DataTestHelpers, CoverageTest):
         self.assertCountEqual(arcs['y.py'], Y_PY_ARCS_3)
 
     def test_combining_with_aliases(self):
+        covdatafiles = CoverageDataFiles()
         covdata1 = CoverageData()
         covdata1.add_line_data({
             '/home/ned/proj/src/a.py': {1: None, 2: None},
             '/home/ned/proj/src/sub/b.py': {3: None},
             })
-        covdata1.write(suffix='1')
+        covdatafiles.write(covdata1, suffix='1')
 
         covdata2 = CoverageData()
         covdata2.add_line_data({
             r'c:\ned\test\a.py': {4: None, 5: None},
             r'c:\ned\test\sub\b.py': {6: None},
             })
-        covdata2.write(suffix='2')
+        covdatafiles.write(covdata2, suffix='2')
 
         covdata3 = CoverageData()
         aliases = PathAliases()
         aliases.add("/home/ned/proj/src/", "./")
         aliases.add(r"c:\ned\test", "./")
-        covdata3.combine_parallel_data(aliases=aliases)
+        covdatafiles.combine_parallel_data(covdata3, aliases=aliases)
 
         apy = canonical_filename('./a.py')
         sub_bpy = canonical_filename('./sub/b.py')
@@ -182,8 +194,9 @@ class DataTestInTempDir(DataTestHelpers, CoverageTest):
         os.makedirs('cov2')
         covdata2.write_file('cov2/.coverage.2')
 
+        covdatafiles = CoverageDataFiles()
         covdata3 = CoverageData()
-        covdata3.combine_parallel_data(data_dirs=[
+        covdatafiles.combine_parallel_data(covdata3, data_dirs=[
             'cov1/',
             'cov2/',
             ])
author	Ned Batchelder <ned@nedbatchelder.com>	2015-07-11 17:18:36 -0400
committer	Ned Batchelder <ned@nedbatchelder.com>	2015-07-11 17:18:36 -0400
commit	891a22971d046a2f30b9546315440bb37a37a192 (patch)
tree	7c8ad3a5b4778e84f4089b1c4b02f5cca106e1c8
parent	516829b783b225c822e60ad323f7a9767f339544 (diff)
download	python-coveragepy-git-891a22971d046a2f30b9546315440bb37a37a192.tar.gz