summaryrefslogtreecommitdiff
path: root/coverage/sqldata.py
diff options
context:
space:
mode:
Diffstat (limited to 'coverage/sqldata.py')
-rw-r--r--coverage/sqldata.py535
1 files changed, 535 insertions, 0 deletions
diff --git a/coverage/sqldata.py b/coverage/sqldata.py
new file mode 100644
index 00000000..e9ccbede
--- /dev/null
+++ b/coverage/sqldata.py
@@ -0,0 +1,535 @@
+# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
+# For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt
+
+"""Sqlite coverage data."""
+
+# TODO: get sys_info for data class, so we can see sqlite version etc
+# TODO: get rid of skip_unless_data_storage_is
+# TODO: get rid of "JSON message" and "SQL message" in the tests
+# TODO: factor out dataop debugging to a wrapper class?
+# TODO: make sure all dataop debugging is in place somehow
+# TODO: should writes be batched?
+# TODO: run_info
+
+import glob
+import itertools
+import os
+import sqlite3
+
+from coverage.backward import iitems
+from coverage.data import filename_suffix
+from coverage.debug import SimpleReprMixin
+from coverage.files import PathAliases
+from coverage.misc import CoverageException, file_be_gone
+
+
+# Schema versions:
+# 1: Released in 5.0a2
+# 2: Added contexts
+
+SCHEMA_VERSION = 2
+
+SCHEMA = """
+create table coverage_schema (
+ version integer
+);
+
+create table meta (
+ has_lines boolean,
+ has_arcs boolean
+);
+
+create table file (
+ id integer primary key,
+ path text,
+ unique(path)
+);
+
+create table context (
+ id integer primary key,
+ context text,
+ unique(context)
+);
+
+create table line (
+ file_id integer,
+ context_id integer,
+ lineno integer,
+ unique(file_id, context_id, lineno)
+);
+
+create table arc (
+ file_id integer,
+ context_id integer,
+ fromno integer,
+ tono integer,
+ unique(file_id, context_id, fromno, tono)
+);
+
+create table tracer (
+ file_id integer primary key,
+ tracer text
+);
+"""
+
+
+class CoverageSqliteData(SimpleReprMixin):
+ def __init__(self, basename=None, suffix=None, warn=None, debug=None):
+ self._basename = os.path.abspath(basename or ".coverage")
+ self._suffix = suffix
+ self._warn = warn
+ self._debug = debug
+
+ self._choose_filename()
+ self._file_map = {}
+ self._db = None
+ self._pid = os.getpid()
+
+ # Are we in sync with the data file?
+ self._have_used = False
+
+ self._has_lines = False
+ self._has_arcs = False
+
+ self._current_context_id = None
+
+ def _choose_filename(self):
+ self.filename = self._basename
+ suffix = filename_suffix(self._suffix)
+ if suffix:
+ self.filename += "." + suffix
+
+ def _reset(self):
+ if self._db is not None:
+ self._db.close()
+ self._db = None
+ self._file_map = {}
+ self._have_used = False
+ self._current_context_id = None
+
+ def _create_db(self):
+ if self._debug and self._debug.should('dataio'):
+ self._debug.write("Creating data file {!r}".format(self.filename))
+ self._db = Sqlite(self.filename, self._debug)
+ with self._db:
+ for stmt in SCHEMA.split(';'):
+ stmt = " ".join(stmt.strip().split())
+ if stmt:
+ self._db.execute(stmt)
+ self._db.execute("insert into coverage_schema (version) values (?)", (SCHEMA_VERSION,))
+ self._db.execute(
+ "insert into meta (has_lines, has_arcs) values (?, ?)",
+ (self._has_lines, self._has_arcs)
+ )
+
+ def _open_db(self):
+ if self._debug and self._debug.should('dataio'):
+ self._debug.write("Opening data file {!r}".format(self.filename))
+ self._db = Sqlite(self.filename, self._debug)
+ with self._db:
+ try:
+ schema_version, = self._db.execute("select version from coverage_schema").fetchone()
+ except Exception as exc:
+ raise CoverageException(
+ "Data file {!r} doesn't seem to be a coverage data file: {}".format(
+ self.filename, exc
+ )
+ )
+ else:
+ if schema_version != SCHEMA_VERSION:
+ raise CoverageException(
+ "Couldn't use data file {!r}: wrong schema: {} instead of {}".format(
+ self.filename, schema_version, SCHEMA_VERSION
+ )
+ )
+
+ for row in self._db.execute("select has_lines, has_arcs from meta"):
+ self._has_lines, self._has_arcs = row
+
+ for path, id in self._db.execute("select path, id from file"):
+ self._file_map[path] = id
+
+ def _connect(self):
+ if self._db is None:
+ if os.path.exists(self.filename):
+ self._open_db()
+ else:
+ self._create_db()
+ return self._db
+
+ def __nonzero__(self):
+ if self._db is None and not os.path.exists(self.filename):
+ return False
+ try:
+ with self._connect() as con:
+ rows = con.execute("select * from file limit 1")
+ return bool(list(rows))
+ except CoverageException:
+ return False
+
+ __bool__ = __nonzero__
+
+ def dump(self): # pragma: debugging
+ """Write a dump of the database."""
+ if self._debug:
+ with self._connect() as con:
+ self._debug.write(con.dump())
+
+ def _file_id(self, filename, add=False):
+ """Get the file id for `filename`.
+
+ If filename is not in the database yet, add if it `add` is True.
+ If `add` is not True, return None.
+ """
+ if filename not in self._file_map:
+ if add:
+ with self._connect() as con:
+ cur = con.execute("insert into file (path) values (?)", (filename,))
+ self._file_map[filename] = cur.lastrowid
+ return self._file_map.get(filename)
+
+ def _context_id(self, context):
+ """Get the id for a context."""
+ assert context is not None
+ self._start_using()
+ with self._connect() as con:
+ row = con.execute("select id from context where context = ?", (context,)).fetchone()
+ if row is not None:
+ return row[0]
+ else:
+ return None
+
+ def set_context(self, context):
+ """Set the current context for future `add_lines` etc."""
+ if self._debug and self._debug.should('dataop'):
+ self._debug.write("Setting context: %r" % (context,))
+ self._start_using()
+ context = context or ""
+ with self._connect() as con:
+ row = con.execute("select id from context where context = ?", (context,)).fetchone()
+ if row is not None:
+ self._current_context_id = row[0]
+ else:
+ cur = con.execute("insert into context (context) values (?)", (context,))
+ self._current_context_id = cur.lastrowid
+
+ def add_lines(self, line_data):
+ """Add measured line data.
+
+ `line_data` is a dictionary mapping file names to dictionaries::
+
+ { filename: { lineno: None, ... }, ...}
+
+ """
+ if self._debug and self._debug.should('dataop'):
+ self._debug.write("Adding lines: %d files, %d lines total" % (
+ len(line_data), sum(len(lines) for lines in line_data.values())
+ ))
+ self._start_using()
+ self._choose_lines_or_arcs(lines=True)
+ if self._current_context_id is None:
+ self.set_context("")
+ with self._connect() as con:
+ for filename, linenos in iitems(line_data):
+ file_id = self._file_id(filename, add=True)
+ data = [(file_id, self._current_context_id, lineno) for lineno in linenos]
+ con.executemany(
+ "insert or ignore into line (file_id, context_id, lineno) values (?, ?, ?)",
+ data,
+ )
+
+ def add_arcs(self, arc_data):
+ """Add measured arc data.
+
+ `arc_data` is a dictionary mapping file names to dictionaries::
+
+ { filename: { (l1,l2): None, ... }, ...}
+
+ """
+ if self._debug and self._debug.should('dataop'):
+ self._debug.write("Adding arcs: %d files, %d arcs total" % (
+ len(arc_data), sum(len(arcs) for arcs in arc_data.values())
+ ))
+ self._start_using()
+ self._choose_lines_or_arcs(arcs=True)
+ if self._current_context_id is None:
+ self.set_context("")
+ with self._connect() as con:
+ for filename, arcs in iitems(arc_data):
+ file_id = self._file_id(filename, add=True)
+ data = [(file_id, self._current_context_id, fromno, tono) for fromno, tono in arcs]
+ con.executemany(
+ "insert or ignore into arc (file_id, context_id, fromno, tono) values (?, ?, ?, ?)",
+ data,
+ )
+
+ def _choose_lines_or_arcs(self, lines=False, arcs=False):
+ if lines and self._has_arcs:
+ raise CoverageException("Can't add lines to existing arc data")
+ if arcs and self._has_lines:
+ raise CoverageException("Can't add arcs to existing line data")
+ if not self._has_arcs and not self._has_lines:
+ self._has_lines = lines
+ self._has_arcs = arcs
+ with self._connect() as con:
+ con.execute("update meta set has_lines = ?, has_arcs = ?", (lines, arcs))
+
+ def add_file_tracers(self, file_tracers):
+ """Add per-file plugin information.
+
+ `file_tracers` is { filename: plugin_name, ... }
+
+ """
+ self._start_using()
+ with self._connect() as con:
+ for filename, plugin_name in iitems(file_tracers):
+ file_id = self._file_id(filename)
+ if file_id is None:
+ raise CoverageException(
+ "Can't add file tracer data for unmeasured file '%s'" % (filename,)
+ )
+
+ existing_plugin = self.file_tracer(filename)
+ if existing_plugin:
+ if existing_plugin != plugin_name:
+ raise CoverageException(
+ "Conflicting file tracer name for '%s': %r vs %r" % (
+ filename, existing_plugin, plugin_name,
+ )
+ )
+ elif plugin_name:
+ con.execute(
+ "insert into tracer (file_id, tracer) values (?, ?)",
+ (file_id, plugin_name)
+ )
+
+ def touch_file(self, filename, plugin_name=""):
+ """Ensure that `filename` appears in the data, empty if needed.
+
+ `plugin_name` is the name of the plugin resposible for this file. It is used
+ to associate the right filereporter, etc.
+ """
+ self._start_using()
+ if self._debug and self._debug.should('dataop'):
+ self._debug.write("Touching %r" % (filename,))
+ if not self._has_arcs and not self._has_lines:
+ raise CoverageException("Can't touch files in an empty CoverageSqliteData")
+
+ self._file_id(filename, add=True)
+ if plugin_name:
+ # Set the tracer for this file
+ self.add_file_tracers({filename: plugin_name})
+
+ def update(self, other_data, aliases=None):
+ if self._has_lines and other_data._has_arcs:
+ raise CoverageException("Can't combine arc data with line data")
+ if self._has_arcs and other_data._has_lines:
+ raise CoverageException("Can't combine line data with arc data")
+
+ aliases = aliases or PathAliases()
+
+ # See what we had already measured, for accurate conflict reporting.
+ this_measured = self.measured_files()
+
+ # lines
+ if other_data._has_lines:
+ for context in other_data.measured_contexts():
+ self.set_context(context)
+ for filename in other_data.measured_files():
+ lines = set(other_data.lines(filename, context=context))
+ filename = aliases.map(filename)
+ lines.update(self.lines(filename, context=context) or ())
+ self.add_lines({filename: lines})
+
+ # arcs
+ if other_data._has_arcs:
+ for context in other_data.measured_contexts():
+ self.set_context(context)
+ for filename in other_data.measured_files():
+ arcs = set(other_data.arcs(filename, context=context))
+ filename = aliases.map(filename)
+ arcs.update(self.arcs(filename, context=context) or ())
+ self.add_arcs({filename: arcs})
+
+ # file_tracers
+ for filename in other_data.measured_files():
+ other_plugin = other_data.file_tracer(filename)
+ filename = aliases.map(filename)
+ if filename in this_measured:
+ this_plugin = self.file_tracer(filename)
+ else:
+ this_plugin = None
+ if this_plugin is None:
+ self.add_file_tracers({filename: other_plugin})
+ elif this_plugin != other_plugin:
+ raise CoverageException(
+ "Conflicting file tracer name for '%s': %r vs %r" % (
+ filename, this_plugin, other_plugin,
+ )
+ )
+
+ def erase(self, parallel=False):
+ """Erase the data in this object.
+
+ If `parallel` is true, then also deletes data files created from the
+ basename by parallel-mode.
+
+ """
+ self._reset()
+ if self._debug and self._debug.should('dataio'):
+ self._debug.write("Erasing data file {!r}".format(self.filename))
+ file_be_gone(self.filename)
+ if parallel:
+ data_dir, local = os.path.split(self.filename)
+ localdot = local + '.*'
+ pattern = os.path.join(os.path.abspath(data_dir), localdot)
+ for filename in glob.glob(pattern):
+ if self._debug and self._debug.should('dataio'):
+ self._debug.write("Erasing parallel data file {!r}".format(filename))
+ file_be_gone(filename)
+
+ def read(self):
+ with self._connect(): # TODO: doesn't look right
+ self._have_used = True
+
+ def write(self):
+ """Write the collected coverage data to a file."""
+ pass
+
+ def _start_using(self):
+ if self._pid != os.getpid():
+ # Looks like we forked! Have to start a new data file.
+ self._reset()
+ self._choose_filename()
+ self._pid = os.getpid()
+ if not self._have_used:
+ self.erase()
+ self._have_used = True
+
+ def has_arcs(self):
+ return bool(self._has_arcs)
+
+ def measured_files(self):
+ """A set of all files that had been measured."""
+ return set(self._file_map)
+
+ def measured_contexts(self):
+ """A set of all contexts that have been measured."""
+ self._start_using()
+ with self._connect() as con:
+ contexts = set(row[0] for row in con.execute("select distinct(context) from context"))
+ return contexts
+
+ def file_tracer(self, filename):
+ """Get the plugin name of the file tracer for a file.
+
+ Returns the name of the plugin that handles this file. If the file was
+ measured, but didn't use a plugin, then "" is returned. If the file
+ was not measured, then None is returned.
+
+ """
+ self._start_using()
+ with self._connect() as con:
+ file_id = self._file_id(filename)
+ if file_id is None:
+ return None
+ row = con.execute("select tracer from tracer where file_id = ?", (file_id,)).fetchone()
+ if row is not None:
+ return row[0] or ""
+ return "" # File was measured, but no tracer associated.
+
+ def lines(self, filename, context=None):
+ self._start_using()
+ if self.has_arcs():
+ arcs = self.arcs(filename, context=context)
+ if arcs is not None:
+ all_lines = itertools.chain.from_iterable(arcs)
+ return list(set(l for l in all_lines if l > 0))
+
+ with self._connect() as con:
+ file_id = self._file_id(filename)
+ if file_id is None:
+ return None
+ else:
+ query = "select lineno from line where file_id = ?"
+ data = [file_id]
+ if context is not None:
+ query += " and context_id = ?"
+ data += [self._context_id(context)]
+ linenos = con.execute(query, data)
+ return [lineno for lineno, in linenos]
+
+ def arcs(self, filename, context=None):
+ self._start_using()
+ with self._connect() as con:
+ file_id = self._file_id(filename)
+ if file_id is None:
+ return None
+ else:
+ query = "select fromno, tono from arc where file_id = ?"
+ data = [file_id]
+ if context is not None:
+ query += " and context_id = ?"
+ data += [self._context_id(context)]
+ arcs = con.execute(query, data)
+ return list(arcs)
+
+ def run_infos(self):
+ return [] # TODO
+
+
+class Sqlite(SimpleReprMixin):
+ def __init__(self, filename, debug):
+ self.debug = debug if (debug and debug.should('sql')) else None
+ if self.debug:
+ self.debug.write("Connecting to {!r}".format(filename))
+ self.filename = filename
+ self.nest = 0
+
+ def connect(self):
+ # SQLite on Windows on py2 won't open a file if the filename argument
+ # has non-ascii characters in it. Opening a relative file name avoids
+ # a problem if the current directory has non-ascii.
+ filename = os.path.relpath(self.filename)
+ self.con = sqlite3.connect(filename)
+
+ # This pragma makes writing faster. It disables rollbacks, but we never need them.
+ # PyPy needs the .close() calls here, or sqlite gets twisted up:
+ # https://bitbucket.org/pypy/pypy/issues/2872/default-isolation-mode-is-different-on
+ self.execute("pragma journal_mode=off").close()
+ # This pragma makes writing faster.
+ self.execute("pragma synchronous=off").close()
+
+ def close(self):
+ self.con.close()
+
+ def __enter__(self):
+ if self.nest == 0:
+ self.connect()
+ self.con.__enter__()
+ self.nest += 1
+ return self
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ self.nest -= 1
+ if self.nest == 0:
+ self.con.__exit__(exc_type, exc_value, traceback)
+ self.close()
+
+ def execute(self, sql, parameters=()):
+ if self.debug:
+ tail = " with {!r}".format(parameters) if parameters else ""
+ self.debug.write("Executing {!r}{}".format(sql, tail))
+ try:
+ return self.con.execute(sql, parameters)
+ except sqlite3.Error as exc:
+ raise CoverageException("Couldn't use data file {!r}: {}".format(self.filename, exc))
+
+ def executemany(self, sql, data):
+ if self.debug:
+ self.debug.write("Executing many {!r} with {} rows".format(sql, len(data)))
+ return self.con.executemany(sql, data)
+
+ def dump(self): # pragma: debugging
+ """Return a multi-line string, the dump of the database."""
+ return "\n".join(self.con.iterdump())