Merge branch 'nedbat/data-sqlite'

author: Ned Batchelder <ned@nedbatchelder.com> 2018-08-24 07:13:42 -0400
committer: Ned Batchelder <ned@nedbatchelder.com> 2018-08-24 07:13:42 -0400
commit: c4b2392dd51b7f976972afb00f01d4618c523cff (patch)
tree: 7c77b420d4eec7ac628393663c67c0e9bc2c66f7 /coverage
parent: 8a337f91e6444c027771741a56636a56389706e3 (diff)
parent: dd5b0cc88ebe4528abaa7cdf0b3fd516fb1f7e01 (diff)
download: python-coveragepy-git-c4b2392dd51b7f976972afb00f01d4618c523cff.tar.gz
8 files changed, 736 insertions, 274 deletions
diff --git a/coverage/cmdline.py b/coverage/cmdline.py
index 2b8e8fb9..23d2aec3 100644
--- a/coverage/cmdline.py
+++ b/coverage/cmdline.py
@@ -14,6 +14,7 @@ import traceback
 
 from coverage import env
 from coverage.collector import CTracer
+from coverage.data import line_counts
 from coverage.debug import info_formatter, info_header
 from coverage.execfile import run_python_file, run_python_module
 from coverage.misc import BaseCoverageException, ExceptionDuringRun, NoSource
@@ -657,10 +658,10 @@ class CoverageScript(object):
                 self.coverage.load()
                 data = self.coverage.get_data()
                 print(info_header("data"))
-                print("path: %s" % self.coverage._data_files.filename)
+                print("path: %s" % self.coverage.get_data().filename)
                 if data:
                     print("has_arcs: %r" % data.has_arcs())
-                    summary = data.line_counts(fullpath=True)
+                    summary = line_counts(data, fullpath=True)
                     filenames = sorted(summary.keys())
                     print("\n%d files:" % len(filenames))
                     for f in filenames:
diff --git a/coverage/control.py b/coverage/control.py
index 03238910..4dd62e10 100644
--- a/coverage/control.py
+++ b/coverage/control.py
@@ -15,7 +15,7 @@ from coverage.annotate import AnnotateReporter
 from coverage.backward import string_class, iitems
 from coverage.collector import Collector
 from coverage.config import read_coverage_config
-from coverage.data import CoverageData, CoverageDataFiles
+from coverage.data import CoverageData, combine_parallel_data
 from coverage.debug import DebugControl, write_formatted_info
 from coverage.disposition import disposition_debug_msg
 from coverage.files import PathAliases, set_relative_directory, abs_file
@@ -152,7 +152,7 @@ class Coverage(object):
         self._warnings = []
 
         # Other instance attributes, set later.
-        self._data = self._data_files = self._collector = None
+        self._data = self._collector = None
         self._plugins = None
         self._inorout = None
         self._inorout_class = InOrOut
@@ -163,8 +163,11 @@ class Coverage(object):
         # State machine variables:
         # Have we initialized everything?
         self._inited = False
+        self._inited_for_start = False
         # Have we started collecting and not stopped it?
         self._started = False
+        # Have we written --debug output?
+        self._wrote_debug = False
 
         # If we have sub-process measurement happening automatically, then we
         # want any explicit creation of a Coverage object to mean, this process
@@ -214,74 +217,11 @@ class Coverage(object):
             # this is a bit childish. :)
             plugin.configure([self, self.config][int(time.time()) % 2])
 
-        concurrency = self.config.concurrency or []
-        if "multiprocessing" in concurrency:
-            if not patch_multiprocessing:
-                raise CoverageException(                    # pragma: only jython
-                    "multiprocessing is not supported on this Python"
-                )
-            patch_multiprocessing(rcfile=self.config.config_file)
-            # Multi-processing uses parallel for the subprocesses, so also use
-            # it for the main process.
-            self.config.parallel = True
-
-        self._collector = Collector(
-            should_trace=self._should_trace,
-            check_include=self._check_include_omit_etc,
-            timid=self.config.timid,
-            branch=self.config.branch,
-            warn=self._warn,
-            concurrency=concurrency,
-            )
-
-        # Early warning if we aren't going to be able to support plugins.
-        if self._plugins.file_tracers and not self._collector.supports_plugins:
-            self._warn(
-                "Plugin file tracers (%s) aren't supported with %s" % (
-                    ", ".join(
-                        plugin._coverage_plugin_name
-                            for plugin in self._plugins.file_tracers
-                        ),
-                    self._collector.tracer_name(),
-                    )
-                )
-            for plugin in self._plugins.file_tracers:
-                plugin._coverage_enabled = False
-
-        # Create the file classifying substructure.
-        self._inorout = self._inorout_class(warn=self._warn)
-        self._inorout.configure(self.config)
-        self._inorout.plugins = self._plugins
-        self._inorout.disp_class = self._collector.file_disposition_class
-
-        # Suffixes are a bit tricky.  We want to use the data suffix only when
-        # collecting data, not when combining data.  So we save it as
-        # `self._run_suffix` now, and promote it to `self._data_suffix` if we
-        # find that we are collecting data later.
-        if self._data_suffix_specified or self.config.parallel:
-            if not isinstance(self._data_suffix_specified, string_class):
-                # if data_suffix=True, use .machinename.pid.random
-                self._data_suffix_specified = True
-        else:
-            self._data_suffix_specified = None
-        self._data_suffix = None
-        self._run_suffix = self._data_suffix_specified
-
-        # Create the data file.  We do this at construction time so that the
-        # data file will be written into the directory where the process
-        # started rather than wherever the process eventually chdir'd to.
-        self._data = CoverageData(debug=self._debug)
-        self._data_files = CoverageDataFiles(
-            basename=self.config.data_file, warn=self._warn, debug=self._debug,
-        )
-
-        # Set the reporting precision.
-        Numbers.set_precision(self.config.precision)
-
-        atexit.register(self._atexit)
-
-        # The user may want to debug things, show info if desired.
-        self._write_startup_debug()
+    def _post_init(self):
+        """Stuff to do after everything is initialized."""
+        if not self._wrote_debug:
+            self._wrote_debug = True
+            self._write_startup_debug()
 
     def _write_startup_debug(self):
         """Write out debug info at startup if needed."""
@@ -388,8 +328,78 @@ class Coverage(object):
     def load(self):
         """Load previously-collected coverage data from the data file."""
         self._init()
-        self._collector.reset()
-        self._data_files.read(self._data)
+        if self._collector:
+            self._collector.reset()
+        self._init_data(suffix=None)
+        self._post_init()
+        self._data.read()
+
+    def _init_for_start(self):
+        """Initialization for start()"""
+        concurrency = self.config.concurrency or []
+        if "multiprocessing" in concurrency:
+            if not patch_multiprocessing:
+                raise CoverageException(                    # pragma: only jython
+                    "multiprocessing is not supported on this Python"
+                )
+            patch_multiprocessing(rcfile=self.config.config_file)
+            # Multi-processing uses parallel for the subprocesses, so also use
+            # it for the main process.
+            self.config.parallel = True
+
+        self._collector = Collector(
+            should_trace=self._should_trace,
+            check_include=self._check_include_omit_etc,
+            timid=self.config.timid,
+            branch=self.config.branch,
+            warn=self._warn,
+            concurrency=concurrency,
+            )
+
+        suffix = self._data_suffix_specified
+        if suffix or self.config.parallel:
+            if not isinstance(suffix, string_class):
+                # if data_suffix=True, use .machinename.pid.random
+                suffix = True
+        else:
+            suffix = None
+
+        self._init_data(suffix)
+
+        # Early warning if we aren't going to be able to support plugins.
+        if self._plugins.file_tracers and not self._collector.supports_plugins:
+            self._warn(
+                "Plugin file tracers (%s) aren't supported with %s" % (
+                    ", ".join(
+                        plugin._coverage_plugin_name
+                            for plugin in self._plugins.file_tracers
+                        ),
+                    self._collector.tracer_name(),
+                    )
+                )
+            for plugin in self._plugins.file_tracers:
+                plugin._coverage_enabled = False
+
+        # Create the file classifying substructure.
+        self._inorout = self._inorout_class(warn=self._warn)
+        self._inorout.configure(self.config)
+        self._inorout.plugins = self._plugins
+        self._inorout.disp_class = self._collector.file_disposition_class
+
+        atexit.register(self._atexit)
+
+    def _init_data(self, suffix):
+        """Create a data file if we don't have one yet."""
+        if self._data is None:
+            # Create the data file.  We do this at construction time so that the
+            # data file will be written into the directory where the process
+            # started rather than wherever the process eventually chdir'd to.
+            self._data = CoverageData(
+                basename=self.config.data_file,
+                suffix=suffix,
+                warn=self._warn,
+                debug=self._debug,
+            )
 
     def start(self):
         """Start measuring code coverage.
@@ -403,19 +413,22 @@ class Coverage(object):
 
         """
         self._init()
-        self._inorout.warn_conflicting_settings()
+        if not self._inited_for_start:
+            self._inited_for_start = True
+            self._init_for_start()
+        self._post_init()
 
-        if self._run_suffix:
-            # Calling start() means we're running code, so use the run_suffix
-            # as the data_suffix when we eventually save the data.
-            self._data_suffix = self._run_suffix
-        if self._auto_load:
-            self.load()
+        # Issue warnings for possible problems.
+        self._inorout.warn_conflicting_settings()
 
-        # See if we think some code that would eventually be measured has already been imported.
+        # See if we think some code that would eventually be measured has
+        # already been imported.
         if self._warn_preimported_source:
             self._inorout.warn_already_imported_files()
 
+        if self._auto_load:
+            self.load()
+
         self._collector.start()
         self._started = True
 
@@ -442,9 +455,12 @@ class Coverage(object):
 
         """
         self._init()
-        self._collector.reset()
-        self._data.erase()
-        self._data_files.erase(parallel=self.config.parallel)
+        self._post_init()
+        if self._collector:
+            self._collector.reset()
+        self._init_data(suffix=None)
+        self._data.erase(parallel=self.config.parallel)
+        self._data = None
 
     def clear_exclude(self, which='exclude'):
         """Clear the exclude list."""
@@ -495,9 +511,8 @@ class Coverage(object):
 
     def save(self):
         """Save the collected coverage data to the data file."""
-        self._init()
         data = self.get_data()
-        self._data_files.write(data, suffix=self._data_suffix)
+        data.write()
 
     def combine(self, data_paths=None, strict=False):
         """Combine together a number of similarly-named coverage data files.
@@ -522,6 +537,8 @@ class Coverage(object):
 
         """
         self._init()
+        self._init_data(suffix=None)
+        self._post_init()
         self.get_data()
 
         aliases = None
@@ -532,9 +549,7 @@ class Coverage(object):
                 for pattern in paths[1:]:
                     aliases.add(pattern, result)
 
-        self._data_files.combine_parallel_data(
-            self._data, aliases=aliases, data_paths=data_paths, strict=strict,
-        )
+        combine_parallel_data(self._data, aliases=aliases, data_paths=data_paths, strict=strict)
 
     def get_data(self):
         """Get the collected data.
@@ -547,8 +562,10 @@ class Coverage(object):
 
         """
         self._init()
+        self._init_data(suffix=None)
+        self._post_init()
 
-        if self._collector.save_data(self._data):
+        if self._collector and self._collector.save_data(self._data):
             self._post_save_work()
 
         return self._data
@@ -599,7 +616,6 @@ class Coverage(object):
         coverage data.
 
         """
-        self._init()
         analysis = self._analyze(morf)
         return (
             analysis.filename,
@@ -615,6 +631,11 @@ class Coverage(object):
         Returns an `Analysis` object.
 
         """
+        # All reporting comes through here, so do reporting initialization.
+        self._init()
+        Numbers.set_precision(self.config.precision)
+        self._post_init()
+
         data = self.get_data()
         if not isinstance(it, FileReporter):
             it = self._get_file_reporter(it)
@@ -801,6 +822,7 @@ class Coverage(object):
         import coverage as covmod
 
         self._init()
+        self._post_init()
 
         def plugin_info(plugins):
             """Make an entry for the sys_info from a list of plug-ins."""
@@ -815,13 +837,13 @@ class Coverage(object):
         info = [
             ('version', covmod.__version__),
             ('coverage', covmod.__file__),
-            ('tracer', self._collector.tracer_name()),
+            ('tracer', self._collector.tracer_name() if self._collector else "-none-"),
             ('plugins.file_tracers', plugin_info(self._plugins.file_tracers)),
             ('plugins.configurers', plugin_info(self._plugins.configurers)),
             ('configs_attempted', self.config.attempted_config_files),
             ('configs_read', self.config.config_files_read),
             ('config_file', self.config.config_file),
-            ('data_path', self._data_files.filename),
+            ('data_path', self._data.filename if self._data else "-none-"),
             ('python', sys.version.replace('\n', '')),
             ('platform', platform.platform()),
             ('implementation', platform.python_implementation()),
@@ -836,7 +858,8 @@ class Coverage(object):
             ('command_line', " ".join(getattr(sys, 'argv', ['???']))),
             ]
 
-        info.extend(self._inorout.sys_info())
+        if self._inorout:
+            info.extend(self._inorout.sys_info())
 
         return info
 
diff --git a/coverage/data.py b/coverage/data.py
index 9f2d1308..f03e90ca 100644
--- a/coverage/data.py
+++ b/coverage/data.py
@@ -15,14 +15,24 @@ import socket
 
 from coverage import env
 from coverage.backward import iitems, string_class
-from coverage.debug import _TEST_NAME_FILE
 from coverage.files import PathAliases
 from coverage.misc import CoverageException, file_be_gone, isolate_module
 
 os = isolate_module(os)
 
 
-class CoverageData(object):
+def filename_suffix(suffix):
+    if suffix is True:
+        # If data_suffix was a simple true value, then make a suffix with
+        # plenty of distinguishing information.  We do this here in
+        # `save()` at the last minute so that the pid will be correct even
+        # if the process forks.
+        dice = random.Random(os.urandom(8)).randint(0, 999999)
+        suffix = "%s.%s.%06d" % (socket.gethostname(), os.getpid(), dice)
+    return suffix
+
+
+class CoverageJsonData(object):
     """Manages collected coverage data, including file storage.
 
     This class is the public supported API to the data coverage.py collects
@@ -57,8 +67,10 @@ class CoverageData(object):
     names in this API are case-sensitive, even on platforms with
     case-insensitive file systems.
 
-    To read a coverage.py data file, use :meth:`read_file`, or
-    :meth:`read_fileobj` if you have an already-opened file.  You can then
+    A data file is associated with the data when the :class:`CoverageData`
+    is created.
+
+    To read a coverage.py data file, use :meth:`read`.  You can then
     access the line, arc, or file tracer data with :meth:`lines`, :meth:`arcs`,
     or :meth:`file_tracer`.  Run information is available with
     :meth:`run_infos`.
@@ -69,17 +81,15 @@ class CoverageData(object):
     most Python containers, you can determine if there is any data at all by
     using this object as a boolean value.
 
-
     Most data files will be created by coverage.py itself, but you can use
     methods here to create data files if you like.  The :meth:`add_lines`,
     :meth:`add_arcs`, and :meth:`add_file_tracers` methods add data, in ways
     that are convenient for coverage.py.  The :meth:`add_run_info` method adds
     key-value pairs to the run information.
 
-    To add a file without any measured data, use :meth:`touch_file`.
+    To add a source file without any measured data, use :meth:`touch_file`.
 
-    You write to a named file with :meth:`write_file`, or to an already opened
-    file with :meth:`write_fileobj`.
+    Write the data to its file with :meth:`write`.
 
     You can clear the data in memory with :meth:`erase`.  Two data collections
     can be combined by using :meth:`update` on one :class:`CoverageData`,
@@ -112,13 +122,20 @@ class CoverageData(object):
     # line data is easily recovered from the arcs: it is all the first elements
     # of the pairs that are greater than zero.
 
-    def __init__(self, debug=None):
+    def __init__(self, basename=None, suffix=None, warn=None, debug=None):
         """Create a CoverageData.
 
+        `warn` is the warning function to use.
+
+        `basename` is the name of the file to use for storing data.
+
         `debug` is a `DebugControl` object for writing debug messages.
 
         """
+        self._warn = warn
         self._debug = debug
+        self.filename = os.path.abspath(basename or ".coverage")
+        self.suffix = suffix
 
         # A map from canonical Python source file name to a dictionary in
         # which there's an entry for each line number that has been
@@ -238,31 +255,21 @@ class CoverageData(object):
         """A list of all files that had been measured."""
         return list(self._arcs or self._lines or {})
 
-    def line_counts(self, fullpath=False):
-        """Return a dict summarizing the line coverage data.
-
-        Keys are based on the file names, and values are the number of executed
-        lines.  If `fullpath` is true, then the keys are the full pathnames of
-        the files, otherwise they are the basenames of the files.
-
-        Returns a dict mapping file names to counts of lines.
-
-        """
-        summ = {}
-        if fullpath:
-            filename_fn = lambda f: f
-        else:
-            filename_fn = os.path.basename
-        for filename in self.measured_files():
-            summ[filename_fn(filename)] = len(self.lines(filename))
-        return summ
-
     def __nonzero__(self):
         return bool(self._lines or self._arcs)
 
     __bool__ = __nonzero__
 
-    def read_fileobj(self, file_obj):
+    def read(self):
+        """Read the coverage data.
+
+        It is fine for the file to not exist, in which case no data is read.
+
+        """
+        if os.path.exists(self.filename):
+            self._read_file(self.filename)
+
+    def _read_fileobj(self, file_obj):
         """Read the coverage data from the given file object.
 
         Should only be used on an empty CoverageData object.
@@ -284,13 +291,13 @@ class CoverageData(object):
 
         self._validate()
 
-    def read_file(self, filename):
+    def _read_file(self, filename):
         """Read the coverage data from `filename` into this object."""
         if self._debug and self._debug.should('dataio'):
             self._debug.write("Reading data from %r" % (filename,))
         try:
             with self._open_for_reading(filename) as f:
-                self.read_fileobj(f)
+                self._read_fileobj(f)
         except Exception as exc:
             raise CoverageException(
                 "Couldn't read data from '%s': %s: %s" % (
@@ -438,7 +445,22 @@ class CoverageData(object):
 
         self._validate()
 
-    def write_fileobj(self, file_obj):
+    def write(self):
+        """Write the collected coverage data to a file.
+
+        `suffix` is a suffix to append to the base file name. This can be used
+        for multiple or parallel execution, so that many coverage data files
+        can exist simultaneously.  A dot will be used to join the base name and
+        the suffix.
+
+        """
+        filename = self.filename
+        suffix = filename_suffix(self.suffix)
+        if suffix:
+            filename += "." + suffix
+        self._write_file(filename)
+
+    def _write_fileobj(self, file_obj):
         """Write the coverage data to `file_obj`."""
 
         # Create the file data.
@@ -460,21 +482,38 @@ class CoverageData(object):
         file_obj.write(self._GO_AWAY)
         json.dump(file_data, file_obj, separators=(',', ':'))
 
-    def write_file(self, filename):
+    def _write_file(self, filename):
         """Write the coverage data to `filename`."""
         if self._debug and self._debug.should('dataio'):
             self._debug.write("Writing data to %r" % (filename,))
         with open(filename, 'w') as fdata:
-            self.write_fileobj(fdata)
+            self._write_fileobj(fdata)
 
-    def erase(self):
-        """Erase the data in this object."""
+    def erase(self, parallel=False):
+        """Erase the data in this object.
+
+        If `parallel` is true, then also deletes data files created from the
+        basename by parallel-mode.
+
+        """
         self._lines = None
         self._arcs = None
         self._file_tracers = {}
         self._runs = []
         self._validate()
 
+        if self._debug and self._debug.should('dataio'):
+            self._debug.write("Erasing data file %r" % (self.filename,))
+        file_be_gone(self.filename)
+        if parallel:
+            data_dir, local = os.path.split(self.filename)
+            localdot = local + '.*'
+            pattern = os.path.join(os.path.abspath(data_dir), localdot)
+            for filename in glob.glob(pattern):
+                if self._debug and self._debug.should('dataio'):
+                    self._debug.write("Erasing parallel data file %r" % (filename,))
+                file_be_gone(filename)
+
     def update(self, other_data, aliases=None):
         """Update this data with data from another `CoverageData`.
 
@@ -582,20 +621,6 @@ class CoverageData(object):
             for key in val:
                 assert isinstance(key, string_class), "Key in _runs shouldn't be %r" % (key,)
 
-    def add_to_hash(self, filename, hasher):
-        """Contribute `filename`'s data to the `hasher`.
-
-        `hasher` is a `coverage.misc.Hasher` instance to be updated with
-        the file's data.  It should only get the results data, not the run
-        data.
-
-        """
-        if self._has_arcs():
-            hasher.update(sorted(self.arcs(filename) or []))
-        else:
-            hasher.update(sorted(self.lines(filename) or []))
-        hasher.update(self.file_tracer(filename))
-
     ##
     ## Internal
     ##
@@ -609,139 +634,111 @@ class CoverageData(object):
         return self._arcs is not None
 
 
-class CoverageDataFiles(object):
-    """Manage the use of coverage data files."""
+STORAGE = os.environ.get("COVERAGE_STORAGE", "sql")
+if STORAGE == "json":
+    CoverageData = CoverageJsonData
+elif STORAGE == "sql":
+    from coverage.sqldata import CoverageSqliteData
+    CoverageData = CoverageSqliteData
 
-    def __init__(self, basename=None, warn=None, debug=None):
-        """Create a CoverageDataFiles to manage data files.
 
-        `warn` is the warning function to use.
+def line_counts(data, fullpath=False):
+    """Return a dict summarizing the line coverage data.
 
-        `basename` is the name of the file to use for storing data.
+    Keys are based on the file names, and values are the number of executed
+    lines.  If `fullpath` is true, then the keys are the full pathnames of
+    the files, otherwise they are the basenames of the files.
 
-        `debug` is a `DebugControl` object for writing debug messages.
+    Returns a dict mapping file names to counts of lines.
 
-        """
-        self.warn = warn
-        self.debug = debug
+    """
+    summ = {}
+    if fullpath:
+        filename_fn = lambda f: f
+    else:
+        filename_fn = os.path.basename
+    for filename in data.measured_files():
+        summ[filename_fn(filename)] = len(data.lines(filename))
+    return summ
 
-        # Construct the file name that will be used for data storage.
-        self.filename = os.path.abspath(basename or ".coverage")
 
-    def erase(self, parallel=False):
-        """Erase the data from the file storage.
+def add_data_to_hash(data, filename, hasher):
+    """Contribute `filename`'s data to the `hasher`.
 
-        If `parallel` is true, then also deletes data files created from the
-        basename by parallel-mode.
+    `hasher` is a `coverage.misc.Hasher` instance to be updated with
+    the file's data.  It should only get the results data, not the run
+    data.
 
-        """
-        if self.debug and self.debug.should('dataio'):
-            self.debug.write("Erasing data file %r" % (self.filename,))
-        file_be_gone(self.filename)
-        if parallel:
-            data_dir, local = os.path.split(self.filename)
-            localdot = local + '.*'
-            pattern = os.path.join(os.path.abspath(data_dir), localdot)
-            for filename in glob.glob(pattern):
-                if self.debug and self.debug.should('dataio'):
-                    self.debug.write("Erasing parallel data file %r" % (filename,))
-                file_be_gone(filename)
-
-    def read(self, data):
-        """Read the coverage data."""
-        if os.path.exists(self.filename):
-            data.read_file(self.filename)
-
-    def write(self, data, suffix=None):
-        """Write the collected coverage data to a file.
-
-        `suffix` is a suffix to append to the base file name. This can be used
-        for multiple or parallel execution, so that many coverage data files
-        can exist simultaneously.  A dot will be used to join the base name and
-        the suffix.
+    """
+    if data.has_arcs():
+        hasher.update(sorted(data.arcs(filename) or []))
+    else:
+        hasher.update(sorted(data.lines(filename) or []))
+    hasher.update(data.file_tracer(filename))
 
-        """
-        filename = self.filename
-        if suffix is True:
-            # If data_suffix was a simple true value, then make a suffix with
-            # plenty of distinguishing information.  We do this here in
-            # `save()` at the last minute so that the pid will be correct even
-            # if the process forks.
-            extra = ""
-            if _TEST_NAME_FILE:                             # pragma: debugging
-                with open(_TEST_NAME_FILE) as f:
-                    test_name = f.read()
-                extra = "." + test_name
-            dice = random.Random(os.urandom(8)).randint(0, 999999)
-            suffix = "%s%s.%s.%06d" % (socket.gethostname(), extra, os.getpid(), dice)
 
-        if suffix:
-            filename += "." + suffix
-        data.write_file(filename)
+def combine_parallel_data(data, aliases=None, data_paths=None, strict=False):
+    """Combine a number of data files together.
 
-    def combine_parallel_data(self, data, aliases=None, data_paths=None, strict=False):
-        """Combine a number of data files together.
+    Treat `data.filename` as a file prefix, and combine the data from all
+    of the data files starting with that prefix plus a dot.
 
-        Treat `self.filename` as a file prefix, and combine the data from all
-        of the data files starting with that prefix plus a dot.
+    If `aliases` is provided, it's a `PathAliases` object that is used to
+    re-map paths to match the local machine's.
 
-        If `aliases` is provided, it's a `PathAliases` object that is used to
-        re-map paths to match the local machine's.
+    If `data_paths` is provided, it is a list of directories or files to
+    combine.  Directories are searched for files that start with
+    `data.filename` plus dot as a prefix, and those files are combined.
 
-        If `data_paths` is provided, it is a list of directories or files to
-        combine.  Directories are searched for files that start with
-        `self.filename` plus dot as a prefix, and those files are combined.
+    If `data_paths` is not provided, then the directory portion of
+    `data.filename` is used as the directory to search for data files.
 
-        If `data_paths` is not provided, then the directory portion of
-        `self.filename` is used as the directory to search for data files.
+    Every data file found and combined is then deleted from disk. If a file
+    cannot be read, a warning will be issued, and the file will not be
+    deleted.
 
-        Every data file found and combined is then deleted from disk. If a file
-        cannot be read, a warning will be issued, and the file will not be
-        deleted.
+    If `strict` is true, and no files are found to combine, an error is
+    raised.
 
-        If `strict` is true, and no files are found to combine, an error is
-        raised.
+    """
+    # Because of the os.path.abspath in the constructor, data_dir will
+    # never be an empty string.
+    data_dir, local = os.path.split(data.filename)
+    localdot = local + '.*'
+
+    data_paths = data_paths or [data_dir]
+    files_to_combine = []
+    for p in data_paths:
+        if os.path.isfile(p):
+            files_to_combine.append(os.path.abspath(p))
+        elif os.path.isdir(p):
+            pattern = os.path.join(os.path.abspath(p), localdot)
+            files_to_combine.extend(glob.glob(pattern))
+        else:
+            raise CoverageException("Couldn't combine from non-existent path '%s'" % (p,))
 
-        """
-        # Because of the os.path.abspath in the constructor, data_dir will
-        # never be an empty string.
-        data_dir, local = os.path.split(self.filename)
-        localdot = local + '.*'
-
-        data_paths = data_paths or [data_dir]
-        files_to_combine = []
-        for p in data_paths:
-            if os.path.isfile(p):
-                files_to_combine.append(os.path.abspath(p))
-            elif os.path.isdir(p):
-                pattern = os.path.join(os.path.abspath(p), localdot)
-                files_to_combine.extend(glob.glob(pattern))
-            else:
-                raise CoverageException("Couldn't combine from non-existent path '%s'" % (p,))
-
-        if strict and not files_to_combine:
-            raise CoverageException("No data to combine")
-
-        files_combined = 0
-        for f in files_to_combine:
-            new_data = CoverageData(debug=self.debug)
-            try:
-                new_data.read_file(f)
-            except CoverageException as exc:
-                if self.warn:
-                    # The CoverageException has the file name in it, so just
-                    # use the message as the warning.
-                    self.warn(str(exc))
-            else:
-                data.update(new_data, aliases=aliases)
-                files_combined += 1
-                if self.debug and self.debug.should('dataio'):
-                    self.debug.write("Deleting combined data file %r" % (f,))
-                file_be_gone(f)
-
-        if strict and not files_combined:
-            raise CoverageException("No usable data files")
+    if strict and not files_to_combine:
+        raise CoverageException("No data to combine")
 
+    files_combined = 0
+    for f in files_to_combine:
+        try:
+            new_data = CoverageData(f, debug=data._debug)
+            new_data.read()
+        except CoverageException as exc:
+            if data._warn:
+                # The CoverageException has the file name in it, so just
+                # use the message as the warning.
+                data._warn(str(exc))
+        else:
+            data.update(new_data, aliases=aliases)
+            files_combined += 1
+            if data._debug and data._debug.should('dataio'):
+                data._debug.write("Deleting combined data file %r" % (f,))
+            file_be_gone(f)
+
+    if strict and not files_combined:
+        raise CoverageException("No usable data files")
 
 def canonicalize_json_data(data):
     """Canonicalize our JSON data so it can be compared."""
diff --git a/coverage/debug.py b/coverage/debug.py
index d63a9070..f491a0f7 100644
--- a/coverage/debug.py
+++ b/coverage/debug.py
@@ -24,13 +24,12 @@ os = isolate_module(os)
 # This is a list of forced debugging options.
 FORCED_DEBUG = []
 
-# A hack for debugging testing in sub-processes.
-_TEST_NAME_FILE = ""    # "/tmp/covtest.txt"
-
 
 class DebugControl(object):
     """Control and output for debugging."""
 
+    show_repr_attr = False      # For SimpleRepr
+
     def __init__(self, options, output):
         """Configure the options and output file for debugging."""
         self.options = list(options) + FORCED_DEBUG
@@ -71,6 +70,10 @@ class DebugControl(object):
         `msg` is the line to write. A newline will be appended.
 
         """
+        if self.should('self'):
+            caller_self = inspect.stack()[1][0].f_locals.get('self')
+            if caller_self is not None:
+                msg = "[self: {!r}] {}".format(caller_self, msg)
         self.output.write(msg+"\n")
         if self.should('callers'):
             dump_stack_frames(out=self.output, skip=1)
@@ -167,6 +170,17 @@ def add_pid_and_tid(text):
     return text
 
 
+class SimpleRepr(object):
+    """A mixin implementing a simple __repr__."""
+    def __repr__(self):
+        show_attrs = ((k, v) for k, v in self.__dict__.items() if getattr(v, "show_repr_attr", True))
+        return "<{klass} @0x{id:x} {attrs}>".format(
+            klass=self.__class__.__name__,
+            id=id(self),
+            attrs=" ".join("{}={!r}".format(k, v) for k, v in show_attrs),
+            )
+
+
 def filter_text(text, filters):
     """Run `text` through a series of filters.
 
diff --git a/coverage/html.py b/coverage/html.py
index 186e9d22..bb519254 100644
--- a/coverage/html.py
+++ b/coverage/html.py
@@ -12,6 +12,7 @@ import shutil
 import coverage
 from coverage import env
 from coverage.backward import iitems
+from coverage.data import add_data_to_hash
 from coverage.files import flat_rootname
 from coverage.misc import CoverageException, file_be_gone, Hasher, isolate_module
 from coverage.report import Reporter
@@ -67,7 +68,7 @@ def read_data(fname):
 
 def write_html(fname, html):
     """Write `html` to `fname`, properly encoded."""
-    html = re.sub(r"(\A\s+)|(\s+$)", "", html, flags=re.MULTILINE)
+    html = re.sub(r"(\A\s+)|(\s+$)", "", html, flags=re.MULTILINE) + "\n"
     with open(fname, "wb") as fout:
         fout.write(html.encode('ascii', 'xmlcharrefreplace'))
 
@@ -169,7 +170,7 @@ class HtmlReporter(Reporter):
         """Compute a hash that changes if the file needs to be re-reported."""
         m = Hasher()
         m.update(source)
-        self.data.add_to_hash(fr.filename, m)
+        add_data_to_hash(self.data, fr.filename, m)
         return m.hexdigest()
 
     def html_file(self, fr, analysis):
diff --git a/coverage/misc.py b/coverage/misc.py
index 50c8d8ac..ab950846 100644
--- a/coverage/misc.py
+++ b/coverage/misc.py
@@ -250,16 +250,6 @@ def _needs_to_implement(that, func_name):
         )
 
 
-class SimpleRepr(object):
-    """A mixin implementing a simple __repr__."""
-    def __repr__(self):
-        return "<{klass} @{id:x} {attrs}>".format(
-            klass=self.__class__.__name__,
-            id=id(self) & 0xFFFFFF,
-            attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()),
-            )
-
-
 class BaseCoverageException(Exception):
     """The base of all Coverage exceptions."""
     pass
diff --git a/coverage/results.py b/coverage/results.py
index 7e3bd268..fb919c9b 100644
--- a/coverage/results.py
+++ b/coverage/results.py
@@ -6,7 +6,8 @@
 import collections
 
 from coverage.backward import iitems
-from coverage.misc import contract, format_lines, SimpleRepr
+from coverage.debug import SimpleRepr
+from coverage.misc import contract, format_lines
 
 
 class Analysis(object):
diff --git a/coverage/sqldata.py b/coverage/sqldata.py
new file mode 100644
index 00000000..f92e245b
--- /dev/null
+++ b/coverage/sqldata.py
@@ -0,0 +1,435 @@
+# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
+# For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt
+
+"""Sqlite coverage data."""
+
+# TODO: get sys_info for data class, so we can see sqlite version etc
+# TODO: get rid of skip_unless_data_storage_is_json
+# TODO: get rid of "JSON message" and "SQL message" in the tests
+# TODO: check the schema
+# TODO: get rid of the application_id?
+# TODO: factor out dataop debugging to a wrapper class?
+# TODO: make sure all dataop debugging is in place somehow
+# TODO: should writes be batched?
+# TODO: settle the os.fork question
+# TODO: run_info
+
+import glob
+import os
+import sqlite3
+import struct
+
+from coverage.backward import iitems
+from coverage.data import filename_suffix
+from coverage.debug import SimpleRepr
+from coverage.files import PathAliases
+from coverage.misc import CoverageException, file_be_gone
+
+
+SCHEMA = """
+create table schema (
+    version integer
+);
+
+insert into schema (version) values (1);
+
+create table meta (
+    has_lines boolean,
+    has_arcs boolean
+);
+
+create table file (
+    id integer primary key,
+    path text,
+    unique(path)
+);
+
+create table line (
+    file_id integer,
+    lineno integer,
+    unique(file_id, lineno)
+);
+
+create table arc (
+    file_id integer,
+    fromno integer,
+    tono integer,
+    unique(file_id, fromno, tono)
+);
+
+create table tracer (
+    file_id integer primary key,
+    tracer text
+);
+"""
+
+APP_ID = 0xc07e8a6e         # "coverage", kind of.
+
+def unsigned_to_signed(val):
+    return struct.unpack('>i', struct.pack('>I', val))[0]
+
+def signed_to_unsigned(val):
+    return struct.unpack('>I', struct.pack('>i', val))[0]
+
+class CoverageSqliteData(SimpleRepr):
+    def __init__(self, basename=None, suffix=None, warn=None, debug=None):
+        self.filename = os.path.abspath(basename or ".coverage")
+        suffix = filename_suffix(suffix)
+        if suffix:
+            self.filename += "." + suffix
+        self._warn = warn
+        self._debug = debug
+
+        self._file_map = {}
+        self._db = None
+        # Are we in sync with the data file?
+        self._have_used = False
+
+        self._has_lines = False
+        self._has_arcs = False
+
+    def _reset(self):
+        self._file_map = {}
+        if self._db is not None:
+            self._db.close()
+        self._db = None
+        self._have_used = False
+
+    def _create_db(self):
+        if self._debug and self._debug.should('dataio'):
+            self._debug.write("Creating data file {!r}".format(self.filename))
+        self._db = Sqlite(self.filename, self._debug)
+        with self._db:
+            self._db.execute("pragma application_id = {}".format(unsigned_to_signed(APP_ID)))
+            for stmt in SCHEMA.split(';'):
+                stmt = stmt.strip()
+                if stmt:
+                    self._db.execute(stmt)
+            self._db.execute(
+                "insert into meta (has_lines, has_arcs) values (?, ?)",
+                (self._has_lines, self._has_arcs)
+            )
+
+    def _open_db(self):
+        if self._debug and self._debug.should('dataio'):
+            self._debug.write("Opening data file {!r}".format(self.filename))
+        self._db = Sqlite(self.filename, self._debug)
+        with self._db:
+            for app_id, in self._db.execute("pragma application_id"):
+                app_id = signed_to_unsigned(int(app_id))
+                if app_id != APP_ID:
+                    raise CoverageException(
+                        "Couldn't use {!r}: wrong application_id: "
+                        "0x{:08x} != 0x{:08x}".format(self.filename, app_id, APP_ID)
+                    )
+        for row in self._db.execute("select has_lines, has_arcs from meta"):
+            self._has_lines, self._has_arcs = row
+
+        for path, id in self._db.execute("select path, id from file"):
+            self._file_map[path] = id
+
+    def _connect(self):
+        if self._db is None:
+            if os.path.exists(self.filename):
+                self._open_db()
+            else:
+                self._create_db()
+        return self._db
+
+    def __nonzero__(self):
+        try:
+            with self._connect() as con:
+                rows = con.execute("select * from file limit 1")
+                return bool(list(rows))
+        except CoverageException:
+            return False
+
+    __bool__ = __nonzero__
+
+    def _file_id(self, filename, add=False):
+        """Get the file id for `filename`.
+
+        If filename is not in the database yet, add if it `add` is True.
+        If `add` is not True, return None.
+        """
+        if filename not in self._file_map:
+            if add:
+                with self._connect() as con:
+                    cur = con.execute("insert into file (path) values (?)", (filename,))
+                    self._file_map[filename] = cur.lastrowid
+        return self._file_map.get(filename)
+
+    def add_lines(self, line_data):
+        """Add measured line data.
+
+        `line_data` is a dictionary mapping file names to dictionaries::
+
+            { filename: { lineno: None, ... }, ...}
+
+        """
+        if self._debug and self._debug.should('dataop'):
+            self._debug.write("Adding lines: %d files, %d lines total" % (
+                len(line_data), sum(len(lines) for lines in line_data.values())
+            ))
+        self._start_using()
+        self._choose_lines_or_arcs(lines=True)
+        with self._connect() as con:
+            for filename, linenos in iitems(line_data):
+                file_id = self._file_id(filename, add=True)
+                data = [(file_id, lineno) for lineno in linenos]
+                con.executemany(
+                    "insert or ignore into line (file_id, lineno) values (?, ?)",
+                    data,
+                )
+
+    def add_arcs(self, arc_data):
+        """Add measured arc data.
+
+        `arc_data` is a dictionary mapping file names to dictionaries::
+
+            { filename: { (l1,l2): None, ... }, ...}
+
+        """
+        if self._debug and self._debug.should('dataop'):
+            self._debug.write("Adding arcs: %d files, %d arcs total" % (
+                len(arc_data), sum(len(arcs) for arcs in arc_data.values())
+            ))
+        self._start_using()
+        self._choose_lines_or_arcs(arcs=True)
+        with self._connect() as con:
+            for filename, arcs in iitems(arc_data):
+                file_id = self._file_id(filename, add=True)
+                data = [(file_id, fromno, tono) for fromno, tono in arcs]
+                con.executemany(
+                    "insert or ignore into arc (file_id, fromno, tono) values (?, ?, ?)",
+                    data,
+                )
+
+    def _choose_lines_or_arcs(self, lines=False, arcs=False):
+        if lines and self._has_arcs:
+            raise CoverageException("Can't add lines to existing arc data")
+        if arcs and self._has_lines:
+            raise CoverageException("Can't add arcs to existing line data")
+        if not self._has_arcs and not self._has_lines:
+            self._has_lines = lines
+            self._has_arcs = arcs
+            with self._connect() as con:
+                con.execute("update meta set has_lines = ?, has_arcs = ?", (lines, arcs))
+
+    def add_file_tracers(self, file_tracers):
+        """Add per-file plugin information.
+
+        `file_tracers` is { filename: plugin_name, ... }
+
+        """
+        self._start_using()
+        with self._connect() as con:
+            for filename, plugin_name in iitems(file_tracers):
+                file_id = self._file_id(filename)
+                if file_id is None:
+                    raise CoverageException(
+                        "Can't add file tracer data for unmeasured file '%s'" % (filename,)
+                    )
+
+                existing_plugin = self.file_tracer(filename)
+                if existing_plugin:
+                    if existing_plugin != plugin_name:
+                        raise CoverageException(
+                            "Conflicting file tracer name for '%s': %r vs %r" % (
+                                filename, existing_plugin, plugin_name,
+                            )
+                        )
+                elif plugin_name:
+                    con.execute(
+                        "insert into tracer (file_id, tracer) values (?, ?)",
+                        (file_id, plugin_name)
+                    )
+
+    def touch_file(self, filename, plugin_name=""):
+        """Ensure that `filename` appears in the data, empty if needed.
+
+        `plugin_name` is the name of the plugin resposible for this file. It is used
+        to associate the right filereporter, etc.
+        """
+        self._start_using()
+        if self._debug and self._debug.should('dataop'):
+            self._debug.write("Touching %r" % (filename,))
+        if not self._has_arcs and not self._has_lines:
+            raise CoverageException("Can't touch files in an empty CoverageSqliteData")
+
+        self._file_id(filename, add=True)
+        if plugin_name:
+            # Set the tracer for this file
+            self.add_file_tracers({filename: plugin_name})
+
+    def update(self, other_data, aliases=None):
+        if self._has_lines and other_data._has_arcs:
+            raise CoverageException("Can't combine arc data with line data")
+        if self._has_arcs and other_data._has_lines:
+            raise CoverageException("Can't combine line data with arc data")
+
+        aliases = aliases or PathAliases()
+
+        # See what we had already measured, for accurate conflict reporting.
+        this_measured = set(self.measured_files())
+
+        # lines
+        if other_data._has_lines:
+            for filename in other_data.measured_files():
+                lines = set(other_data.lines(filename))
+                filename = aliases.map(filename)
+                lines.update(self.lines(filename) or ())
+                self.add_lines({filename: lines})
+
+        # arcs
+        if other_data._has_arcs:
+            for filename in other_data.measured_files():
+                arcs = set(other_data.arcs(filename))
+                filename = aliases.map(filename)
+                arcs.update(self.arcs(filename) or ())
+                self.add_arcs({filename: arcs})
+
+        # file_tracers
+        for filename in other_data.measured_files():
+            other_plugin = other_data.file_tracer(filename)
+            filename = aliases.map(filename)
+            if filename in this_measured:
+                this_plugin = self.file_tracer(filename)
+            else:
+                this_plugin = None
+            if this_plugin is None:
+                self.add_file_tracers({filename: other_plugin})
+            elif this_plugin != other_plugin:
+                raise CoverageException(
+                    "Conflicting file tracer name for '%s': %r vs %r" % (
+                        filename, this_plugin, other_plugin,
+                    )
+                )
+
+    def erase(self, parallel=False):
+        """Erase the data in this object.
+
+        If `parallel` is true, then also deletes data files created from the
+        basename by parallel-mode.
+
+        """
+        self._reset()
+        if self._debug and self._debug.should('dataio'):
+            self._debug.write("Erasing data file {!r}".format(self.filename))
+        file_be_gone(self.filename)
+        if parallel:
+            data_dir, local = os.path.split(self.filename)
+            localdot = local + '.*'
+            pattern = os.path.join(os.path.abspath(data_dir), localdot)
+            for filename in glob.glob(pattern):
+                if self._debug and self._debug.should('dataio'):
+                    self._debug.write("Erasing parallel data file {!r}".format(filename))
+                file_be_gone(filename)
+
+    def read(self):
+        self._connect()     # TODO: doesn't look right
+        self._have_used = True
+
+    def write(self):
+        """Write the collected coverage data to a file."""
+        pass
+
+    def _start_using(self):
+        if not self._have_used:
+            self.erase()
+        self._have_used = True
+
+    def has_arcs(self):
+        return bool(self._has_arcs)
+
+    def measured_files(self):
+        """A list of all files that had been measured."""
+        return list(self._file_map)
+
+    def file_tracer(self, filename):
+        """Get the plugin name of the file tracer for a file.
+
+        Returns the name of the plugin that handles this file.  If the file was
+        measured, but didn't use a plugin, then "" is returned.  If the file
+        was not measured, then None is returned.
+
+        """
+        self._start_using()
+        with self._connect() as con:
+            file_id = self._file_id(filename)
+            if file_id is None:
+                return None
+            row = con.execute("select tracer from tracer where file_id = ?", (file_id,)).fetchone()
+            if row is not None:
+                return row[0] or ""
+            return ""   # File was measured, but no tracer associated.
+
+    def lines(self, filename):
+        self._start_using()
+        if self.has_arcs():
+            arcs = self.arcs(filename)
+            if arcs is not None:
+                import itertools
+                all_lines = itertools.chain.from_iterable(arcs)
+                return list(set(l for l in all_lines if l > 0))
+
+        with self._connect() as con:
+            file_id = self._file_id(filename)
+            if file_id is None:
+                return None
+            else:
+                linenos = con.execute("select lineno from line where file_id = ?", (file_id,))
+                return [lineno for lineno, in linenos]
+
+    def arcs(self, filename):
+        self._start_using()
+        with self._connect() as con:
+            file_id = self._file_id(filename)
+            if file_id is None:
+                return None
+            else:
+                arcs = con.execute("select fromno, tono from arc where file_id = ?", (file_id,))
+                return [pair for pair in arcs]
+
+    def run_infos(self):
+        return []   # TODO
+
+
+class Sqlite(SimpleRepr):
+    def __init__(self, filename, debug):
+        self.debug = debug if (debug and debug.should('sql')) else None
+        if self.debug:
+            self.debug.write("Connecting to {!r}".format(filename))
+        self.filename = filename
+        self.con = sqlite3.connect(self.filename)
+
+        # This pragma makes writing faster. It disables rollbacks, but we never need them.
+        # PyPy needs the .close() calls here, or sqlite gets twisted up:
+        # https://bitbucket.org/pypy/pypy/issues/2872/default-isolation-mode-is-different-on
+        self.execute("pragma journal_mode=off").close()
+        # This pragma makes writing faster.
+        self.execute("pragma synchronous=off").close()
+
+    def close(self):
+        self.con.close()
+
+    def __enter__(self):
+        self.con.__enter__()
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        return self.con.__exit__(exc_type, exc_value, traceback)
+
+    def execute(self, sql, parameters=()):
+        if self.debug:
+            tail = " with {!r}".format(parameters) if parameters else ""
+            self.debug.write("Executing {!r}{}".format(sql, tail))
+        try:
+            return self.con.execute(sql, parameters)
+        except sqlite3.Error as exc:
+            raise CoverageException("Couldn't use data file {!r}: {}".format(self.filename, exc))
+
+    def executemany(self, sql, data):
+        if self.debug:
+            self.debug.write("Executing many {!r} with {} rows".format(sql, len(data)))
+        return self.con.executemany(sql, data)
author	Ned Batchelder <ned@nedbatchelder.com>	2018-08-24 07:13:42 -0400
committer	Ned Batchelder <ned@nedbatchelder.com>	2018-08-24 07:13:42 -0400
commit	c4b2392dd51b7f976972afb00f01d4618c523cff (patch)
tree	7c77b420d4eec7ac628393663c67c0e9bc2c66f7 /coverage
parent	8a337f91e6444c027771741a56636a56389706e3 (diff)
parent	dd5b0cc88ebe4528abaa7cdf0b3fd516fb1f7e01 (diff)
download	python-coveragepy-git-c4b2392dd51b7f976972afb00f01d4618c523cff.tar.gz