diff options
Diffstat (limited to 'coverage/sqldata.py')
-rw-r--r-- | coverage/sqldata.py | 237 |
1 files changed, 164 insertions, 73 deletions
diff --git a/coverage/sqldata.py b/coverage/sqldata.py index 8bfb04be..893f620d 100644 --- a/coverage/sqldata.py +++ b/coverage/sqldata.py @@ -17,7 +17,7 @@ import os import sqlite3 import sys -from coverage.backward import iitems +from coverage.backward import get_thread_id, iitems from coverage.data import filename_suffix from coverage.debug import NoDebugging, SimpleReprMixin from coverage.files import PathAliases @@ -84,7 +84,7 @@ class CoverageSqliteData(SimpleReprMixin): self._choose_filename() self._file_map = {} - self._db = None + self._dbs = {} self._pid = os.getpid() # Are we in sync with the data file? @@ -97,71 +97,72 @@ class CoverageSqliteData(SimpleReprMixin): self._current_context_id = None def _choose_filename(self): - self.filename = self._basename + self._filename = self._basename suffix = filename_suffix(self._suffix) if suffix: - self.filename += "." + suffix + self._filename += "." + suffix def _reset(self): - if self._db is not None: - self._db.close() - self._db = None + if self._dbs: + for db in self._dbs.values(): + db.close() + self._dbs = {} self._file_map = {} self._have_used = False self._current_context_id = None def _create_db(self): if self._debug.should('dataio'): - self._debug.write("Creating data file {!r}".format(self.filename)) - self._db = Sqlite(self.filename, self._debug) - with self._db: + self._debug.write("Creating data file {!r}".format(self._filename)) + self._dbs[get_thread_id()] = Sqlite(self._filename, self._debug) + with self._dbs[get_thread_id()] as db: for stmt in SCHEMA.split(';'): stmt = " ".join(stmt.strip().split()) if stmt: - self._db.execute(stmt) - self._db.execute("insert into coverage_schema (version) values (?)", (SCHEMA_VERSION,)) - self._db.execute( + db.execute(stmt) + db.execute("insert into coverage_schema (version) values (?)", (SCHEMA_VERSION,)) + db.execute( "insert into meta (has_lines, has_arcs, sys_argv) values (?, ?, ?)", (self._has_lines, self._has_arcs, str(getattr(sys, 'argv', None))) ) def _open_db(self): if self._debug.should('dataio'): - self._debug.write("Opening data file {!r}".format(self.filename)) - self._db = Sqlite(self.filename, self._debug) - with self._db: + self._debug.write("Opening data file {!r}".format(self._filename)) + self._dbs[get_thread_id()] = Sqlite(self._filename, self._debug) + with self._dbs[get_thread_id()] as db: try: - schema_version, = self._db.execute("select version from coverage_schema").fetchone() + schema_version, = db.execute("select version from coverage_schema").fetchone() except Exception as exc: raise CoverageException( "Data file {!r} doesn't seem to be a coverage data file: {}".format( - self.filename, exc + self._filename, exc ) ) else: if schema_version != SCHEMA_VERSION: raise CoverageException( "Couldn't use data file {!r}: wrong schema: {} instead of {}".format( - self.filename, schema_version, SCHEMA_VERSION + self._filename, schema_version, SCHEMA_VERSION ) ) - for row in self._db.execute("select has_lines, has_arcs from meta"): + for row in db.execute("select has_lines, has_arcs from meta"): self._has_lines, self._has_arcs = row - for path, id in self._db.execute("select path, id from file"): + for path, id in db.execute("select path, id from file"): self._file_map[path] = id def _connect(self): - if self._db is None: - if os.path.exists(self.filename): + if get_thread_id() not in self._dbs: + if os.path.exists(self._filename): self._open_db() else: self._create_db() - return self._db + return self._dbs[get_thread_id()] def __nonzero__(self): - if self._db is None and not os.path.exists(self.filename): + if (get_thread_id() not in self._dbs and not os.path.exists(self._filename)): return False try: with self._connect() as con: @@ -181,13 +182,13 @@ class CoverageSqliteData(SimpleReprMixin): def _file_id(self, filename, add=False): """Get the file id for `filename`. - If filename is not in the database yet, add if it `add` is True. + If filename is not in the database yet, add it if `add` is True. If `add` is not True, return None. """ if filename not in self._file_map: if add: with self._connect() as con: - cur = con.execute("insert into file (path) values (?)", (filename,)) + cur = con.execute("insert or replace into file (path) values (?)", (filename,)) self._file_map[filename] = cur.lastrowid return self._file_map.get(filename) @@ -220,6 +221,14 @@ class CoverageSqliteData(SimpleReprMixin): cur = con.execute("insert into context (context) values (?)", (context,)) self._current_context_id = cur.lastrowid + def base_filename(self): + """The base filename for storing data.""" + return self._basename + + def filename(self): + """Where is the data stored?""" + return self._filename + def add_lines(self, line_data): """Add measured line data. @@ -326,6 +335,11 @@ class CoverageSqliteData(SimpleReprMixin): self.add_file_tracers({filename: plugin_name}) def update(self, other_data, aliases=None): + """Update this data with data from several other `CoverageData` instances. + + If `aliases` is provided, it's a `PathAliases` object that is used to + re-map paths to match the local machine's. + """ if self._has_lines and other_data._has_arcs: raise CoverageException("Can't combine arc data with line data") if self._has_arcs and other_data._has_lines: @@ -333,57 +347,134 @@ class CoverageSqliteData(SimpleReprMixin): aliases = aliases or PathAliases() - # See what we had already measured, for accurate conflict reporting. - this_measured = self.measured_files() - - other_files = set() - # Force the database we're writing to to exist before we start nesting # contexts. self._start_using() - # Start a single transaction in each file. - with self._connect(), other_data._connect(): - # lines - if other_data._has_lines: - for context in other_data.measured_contexts(): - self.set_context(context) - for filename in other_data.measured_files(): - lines = set(other_data.lines(filename, context=context)) - if lines: - other_files.add(filename) - filename = aliases.map(filename) - lines.update(self.lines(filename, context=context) or ()) - self.add_lines({filename: lines}) - - # arcs - if other_data._has_arcs: - for context in other_data.measured_contexts(): - self.set_context(context) - for filename in other_data.measured_files(): - arcs = set(other_data.arcs(filename, context=context)) - if arcs: - other_files.add(filename) - filename = aliases.map(filename) - arcs.update(self.arcs(filename, context=context) or ()) - self.add_arcs({filename: arcs}) - - # file_tracers - for filename in other_files: - other_plugin = other_data.file_tracer(filename) - filename = aliases.map(filename) - if filename in this_measured: - this_plugin = self.file_tracer(filename) - else: - this_plugin = None - if this_plugin is None: - self.add_file_tracers({filename: other_plugin}) - elif this_plugin != other_plugin: + # Collector for all arcs, lines and tracers + other_data.read() + with other_data._connect() as conn: + # Get files data. + cur = conn.execute('select path from file') + files = {path: aliases.map(path) for (path,) in cur} + cur.close() + + # Get contexts data. + cur = conn.execute('select context from context') + contexts = [context for (context,) in cur] + cur.close() + + # Get arc data. + cur = conn.execute( + 'select file.path, context.context, arc.fromno, arc.tono ' + 'from arc ' + 'inner join file on file.id = arc.file_id ' + 'inner join context on context.id = arc.context_id' + ) + arcs = [(files[path], context, fromno, tono) for (path, context, fromno, tono) in cur] + cur.close() + + # Get line data. + cur = conn.execute( + 'select file.path, context.context, line.lineno ' + 'from line ' + 'inner join file on file.id = line.file_id ' + 'inner join context on context.id = line.context_id' + ) + lines = [(files[path], context, lineno) for (path, context, lineno) in cur] + cur.close() + + # Get tracer data. + cur = conn.execute( + 'select file.path, tracer ' + 'from tracer ' + 'inner join file on file.id = tracer.file_id' + ) + tracers = {files[path]: tracer for (path, tracer) in cur} + cur.close() + + with self._connect() as conn: + conn.isolation_level = 'IMMEDIATE' + + # Get all tracers in the DB. Files not in the tracers are assumed + # to have an empty string tracer. Since Sqlite does not support + # full outer joins, we have to make two queries to fill the + # dictionary. + this_tracers = {path: '' for path, in conn.execute('select path from file')} + this_tracers.update({ + aliases.map(path): tracer + for path, tracer in conn.execute( + 'select file.path, tracer from tracer ' + 'inner join file on file.id = tracer.file_id' + ) + }) + + # Create all file and context rows in the DB. + conn.executemany( + 'insert or ignore into file (path) values (?)', + ((file,) for file in files.values()) + ) + file_ids = { + path: id + for id, path in conn.execute('select id, path from file') + } + conn.executemany( + 'insert or ignore into context (context) values (?)', + ((context,) for context in contexts) + ) + context_ids = { + context: id + for id, context in conn.execute('select id, context from context') + } + + # Prepare tracers and fail, if a conflict is found. + # tracer_paths is used to ensure consistency over the tracer data + # and tracer_map tracks the tracers to be inserted. + tracer_map = {} + for path in files.values(): + this_tracer = this_tracers.get(path) + other_tracer = tracers.get(path, '') + # If there is no tracer, there is always the None tracer. + if this_tracer is not None and this_tracer != other_tracer: raise CoverageException( "Conflicting file tracer name for '%s': %r vs %r" % ( - filename, this_plugin, other_plugin, + path, this_tracer, other_tracer ) ) + tracer_map[path] = other_tracer + + # Prepare arc and line rows to be inserted by converting the file + # and context strings with integer ids. Then use the efficient + # `executemany()` to insert all rows at once. + arc_rows = ( + (file_ids[file], context_ids[context], fromno, tono) + for file, context, fromno, tono in arcs + ) + line_rows = ( + (file_ids[file], context_ids[context], lineno) + for file, context, lineno in lines + ) + + self._choose_lines_or_arcs(arcs=bool(arcs), lines=bool(lines)) + + conn.executemany( + 'insert or ignore into arc ' + '(file_id, context_id, fromno, tono) values (?, ?, ?, ?)', + arc_rows + ) + conn.executemany( + 'insert or ignore into line ' + '(file_id, context_id, lineno) values (?, ?, ?)', + line_rows + ) + conn.executemany( + 'insert or ignore into tracer (file_id, tracer) values (?, ?)', + ((file_ids[filename], tracer) for filename, tracer in tracer_map.items()) + ) + + # Update all internal cache data. + self._reset() + self.read() def erase(self, parallel=False): """Erase the data in this object. @@ -394,10 +485,10 @@ class CoverageSqliteData(SimpleReprMixin): """ self._reset() if self._debug.should('dataio'): - self._debug.write("Erasing data file {!r}".format(self.filename)) - file_be_gone(self.filename) + self._debug.write("Erasing data file {!r}".format(self._filename)) + file_be_gone(self._filename) if parallel: - data_dir, local = os.path.split(self.filename) + data_dir, local = os.path.split(self._filename) localdot = local + '.*' pattern = os.path.join(os.path.abspath(data_dir), localdot) for filename in glob.glob(pattern): |