diff options
author | Ned Batchelder <ned@nedbatchelder.com> | 2019-07-09 16:22:51 -0400 |
---|---|---|
committer | Ned Batchelder <ned@nedbatchelder.com> | 2019-07-10 06:57:24 -0400 |
commit | 6b226d85f5191cd27b20ad27caded8b407772a02 (patch) | |
tree | a7d2e58246fb5ef9b8128ff342e97968c4ace853 /coverage/data.py | |
parent | 9bc6b93805a5f20a87211a315d00503eddab66dc (diff) | |
download | python-coveragepy-git-6b226d85f5191cd27b20ad27caded8b407772a02.tar.gz |
Remove the JSON data code
Diffstat (limited to 'coverage/data.py')
-rw-r--r-- | coverage/data.py | 717 |
1 files changed, 10 insertions, 707 deletions
diff --git a/coverage/data.py b/coverage/data.py index bcb418b8..82bf1d41 100644 --- a/coverage/data.py +++ b/coverage/data.py @@ -1,667 +1,20 @@ # Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 # For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt -"""Coverage data for coverage.py.""" +"""Coverage data for coverage.py. -import collections -import glob -import itertools -import json -import optparse -import os -import os.path -import random -import re -import socket - -from coverage import env -from coverage.backward import iitems, string_class -from coverage.debug import NoDebugging -from coverage.files import PathAliases -from coverage.misc import CoverageException, file_be_gone, isolate_module - -os = isolate_module(os) - - -def filename_suffix(suffix): - if suffix is True: - # If data_suffix was a simple true value, then make a suffix with - # plenty of distinguishing information. We do this here in - # `save()` at the last minute so that the pid will be correct even - # if the process forks. - dice = random.Random(os.urandom(8)).randint(0, 999999) - suffix = "%s.%s.%06d" % (socket.gethostname(), os.getpid(), dice) - return suffix - - -class CoverageJsonData(object): - """Manages collected coverage data, including file storage. - - This class is the public supported API to the data coverage.py collects - during program execution. It includes information about what code was - executed. It does not include information from the analysis phase, to - determine what lines could have been executed, or what lines were not - executed. - - .. note:: - - The file format is not documented or guaranteed. It will change in - the future, in possibly complicated ways. Do not read coverage.py - data files directly. Use this API to avoid disruption. - - There are a number of kinds of data that can be collected: - - * **lines**: the line numbers of source lines that were executed. - These are always available. - - * **arcs**: pairs of source and destination line numbers for transitions - between source lines. These are only available if branch coverage was - used. - - * **file tracer names**: the module names of the file tracer plugins that - handled each file in the data. - - * **run information**: information about the program execution. This is - written during "coverage run", and then accumulated during "coverage - combine". - - Lines, arcs, and file tracer names are stored for each source file. File - names in this API are case-sensitive, even on platforms with - case-insensitive file systems. - - A data file is associated with the data when the :class:`CoverageData` - is created. - - To read a coverage.py data file, use :meth:`read`. You can then - access the line, arc, or file tracer data with :meth:`lines`, :meth:`arcs`, - or :meth:`file_tracer`. Run information is available with - :meth:`run_infos`. - - The :meth:`has_arcs` method indicates whether arc data is available. You - can get a list of the files in the data with :meth:`measured_files`. - A summary of the line data is available from :meth:`line_counts`. As with - most Python containers, you can determine if there is any data at all by - using this object as a boolean value. - - Most data files will be created by coverage.py itself, but you can use - methods here to create data files if you like. The :meth:`add_lines`, - :meth:`add_arcs`, and :meth:`add_file_tracers` methods add data, in ways - that are convenient for coverage.py. The :meth:`add_run_info` method adds - key-value pairs to the run information. - - To add a source file without any measured data, use :meth:`touch_file`. - - Write the data to its file with :meth:`write`. - - You can clear the data in memory with :meth:`erase`. Two data collections - can be combined by using :meth:`update` on one :class:`CoverageData`, - passing it the other. - - """ - - # The data file format is JSON, with these keys: - # - # * lines: a dict mapping file names to lists of line numbers - # executed:: - # - # { "file1": [17,23,45], "file2": [1,2,3], ... } - # - # * arcs: a dict mapping file names to lists of line number pairs:: - # - # { "file1": [[17,23], [17,25], [25,26]], ... } - # - # * file_tracers: a dict mapping file names to plugin names:: - # - # { "file1": "django.coverage", ... } - # - # * runs: a list of dicts of information about the coverage.py runs - # contributing to the data:: - # - # [ { "brief_sys": "CPython 2.7.10 Darwin" }, ... ] - # - # Only one of `lines` or `arcs` will be present: with branch coverage, data - # is stored as arcs. Without branch coverage, it is stored as lines. The - # line data is easily recovered from the arcs: it is all the first elements - # of the pairs that are greater than zero. - - def __init__(self, basename=None, suffix=None, warn=None, debug=None): - """Create a CoverageData. - - `warn` is the warning function to use. - - `basename` is the name of the file to use for storing data. - - `debug` is a `DebugControl` object for writing debug messages. - - """ - self._warn = warn - self._debug = debug or NoDebugging() - self.filename = os.path.abspath(basename or ".coverage") - self.suffix = suffix - - # A map from canonical Python source file name to a dictionary in - # which there's an entry for each line number that has been - # executed: - # - # { 'filename1.py': [12, 47, 1001], ... } - # - self._lines = None - - # A map from canonical Python source file name to a dictionary with an - # entry for each pair of line numbers forming an arc: - # - # { 'filename1.py': [(12,14), (47,48), ... ], ... } - # - self._arcs = None - - # A map from canonical source file name to a plugin module name: - # - # { 'filename1.py': 'django.coverage', ... } - # - self._file_tracers = {} - - # A list of dicts of information about the coverage.py runs. - self._runs = [] - - def __repr__(self): - return "<{klass} lines={lines} arcs={arcs} tracers={tracers} runs={runs}>".format( - klass=self.__class__.__name__, - lines="None" if self._lines is None else "{{{0}}}".format(len(self._lines)), - arcs="None" if self._arcs is None else "{{{0}}}".format(len(self._arcs)), - tracers="{{{0}}}".format(len(self._file_tracers)), - runs="[{0}]".format(len(self._runs)), - ) - - ## - ## Reading data - ## - - def set_query_contexts(self, contexts=None): - """Set the query contexts. - - No-op, since contexts are not supported for this data format. - """ - pass - - def has_arcs(self): - """Does this data have arcs? - - Arc data is only available if branch coverage was used during - collection. - - Returns a boolean. - - """ - return self._has_arcs() - - def lines(self, filename, contexts=None): - """Get the list of lines executed for a file. - - If the file was not measured, returns None. A file might be measured, - and have no lines executed, in which case an empty list is returned. - - If the file was executed, returns a list of integers, the line numbers - executed in the file. The list is in no particular order. - - `contexts` is ignored, since contexts are not supported for this data - format. - """ - if self._arcs is not None: - arcs = self._arcs.get(filename) - if arcs is not None: - all_lines = itertools.chain.from_iterable(arcs) - return list(set(l for l in all_lines if l > 0)) - elif self._lines is not None: - return self._lines.get(filename) - return None - - def arcs(self, filename, contexts=None): - """Get the list of arcs executed for a file. - - If the file was not measured, returns None. A file might be measured, - and have no arcs executed, in which case an empty list is returned. - - If the file was executed, returns a list of 2-tuples of integers. Each - pair is a starting line number and an ending line number for a - transition from one line to another. The list is in no particular - order. - - Negative numbers have special meaning. If the starting line number is - -N, it represents an entry to the code object that starts at line N. - If the ending ling number is -N, it's an exit from the code object that - starts at line N. - - `contexts` is ignored, since contexts are not supported for this data - format. - """ - if self._arcs is not None: - if filename in self._arcs: - return self._arcs[filename] - return None - - def file_tracer(self, filename): - """Get the plugin name of the file tracer for a file. - - Returns the name of the plugin that handles this file. If the file was - measured, but didn't use a plugin, then "" is returned. If the file - was not measured, then None is returned. - - """ - # Because the vast majority of files involve no plugin, we don't store - # them explicitly in self._file_tracers. Check the measured data - # instead to see if it was a known file with no plugin. - if filename in (self._arcs or self._lines or {}): - return self._file_tracers.get(filename, "") - return None - - def contexts_by_lineno(self, filename): - return collections.defaultdict(list) - - def run_infos(self): - """Return the list of dicts of run information. - - For data collected during a single run, this will be a one-element - list. If data has been combined, there will be one element for each - original data file. - - """ - return self._runs - - def measured_files(self): - """A set of all files that had been measured.""" - return set(self._arcs or self._lines or {}) - - def __nonzero__(self): - return bool(self._lines or self._arcs) - - __bool__ = __nonzero__ - - def read(self): - """Read the coverage data. - - It is fine for the file to not exist, in which case no data is read. - - """ - if os.path.exists(self.filename): - self._read_file(self.filename) - - def _read_fileobj(self, file_obj): - """Read the coverage data from the given file object. - - Should only be used on an empty CoverageData object. - - """ - data = self._read_raw_data(file_obj) - - self._lines = self._arcs = None - - if 'lines' in data: - self._lines = data['lines'] - if 'arcs' in data: - self._arcs = dict( - (fname, [tuple(pair) for pair in arcs]) - for fname, arcs in iitems(data['arcs']) - ) - self._file_tracers = data.get('file_tracers', {}) - self._runs = data.get('runs', []) - - self._validate() - - def _read_file(self, filename): - """Read the coverage data from `filename` into this object.""" - if self._debug.should('dataio'): - self._debug.write("Reading data from %r" % (filename,)) - try: - with self._open_for_reading(filename) as f: - self._read_fileobj(f) - except Exception as exc: - raise CoverageException( - "Couldn't read data from '%s': %s: %s" % ( - filename, exc.__class__.__name__, exc, - ) - ) - - _GO_AWAY = "!coverage.py: This is a private format, don't read it directly!" - - @classmethod - def _open_for_reading(cls, filename): - """Open a file appropriately for reading data.""" - return open(filename, "r") - - @classmethod - def _read_raw_data(cls, file_obj): - """Read the raw data from a file object.""" - go_away = file_obj.read(len(cls._GO_AWAY)) - if go_away != cls._GO_AWAY: - raise CoverageException("Doesn't seem to be a coverage.py data file") - return json.load(file_obj) - - @classmethod - def _read_raw_data_file(cls, filename): - """Read the raw data from a file, for debugging.""" - with cls._open_for_reading(filename) as f: - return cls._read_raw_data(f) +This file had the 4.x JSON data support, which is now gone. This file still +has storage-agnostic helpers, and is kept to avoid changing too many imports. +CoverageData is now defined in sqldata.py, and imported here to keep the +imports working. - ## - ## Writing data - ## - - def add_lines(self, line_data): - """Add measured line data. - - `line_data` is a dictionary mapping file names to dictionaries:: - - { filename: { lineno: None, ... }, ...} - - """ - if self._debug.should('dataop'): - self._debug.write("Adding lines: %d files, %d lines total" % ( - len(line_data), sum(len(lines) for lines in line_data.values()) - )) - if self._has_arcs(): - raise CoverageException("Can't add lines to existing arc data") - - if self._lines is None: - self._lines = {} - for filename, linenos in iitems(line_data): - if filename in self._lines: - new_linenos = set(self._lines[filename]) - new_linenos.update(linenos) - linenos = new_linenos - self._lines[filename] = list(linenos) - - self._validate() - - def add_arcs(self, arc_data): - """Add measured arc data. - - `arc_data` is a dictionary mapping file names to dictionaries:: - - { filename: { (l1,l2): None, ... }, ...} - - """ - if self._debug.should('dataop'): - self._debug.write("Adding arcs: %d files, %d arcs total" % ( - len(arc_data), sum(len(arcs) for arcs in arc_data.values()) - )) - if self._has_lines(): - raise CoverageException("Can't add arcs to existing line data") - - if self._arcs is None: - self._arcs = {} - for filename, arcs in iitems(arc_data): - if filename in self._arcs: - new_arcs = set(self._arcs[filename]) - new_arcs.update(arcs) - arcs = new_arcs - self._arcs[filename] = list(arcs) - - self._validate() - - def add_file_tracers(self, file_tracers): - """Add per-file plugin information. - - `file_tracers` is { filename: plugin_name, ... } - - """ - if self._debug.should('dataop'): - self._debug.write("Adding file tracers: %d files" % (len(file_tracers),)) - - existing_files = self._arcs or self._lines or {} - for filename, plugin_name in iitems(file_tracers): - if filename not in existing_files: - raise CoverageException( - "Can't add file tracer data for unmeasured file '%s'" % (filename,) - ) - existing_plugin = self._file_tracers.get(filename) - if existing_plugin is not None and plugin_name != existing_plugin: - raise CoverageException( - "Conflicting file tracer name for '%s': %r vs %r" % ( - filename, existing_plugin, plugin_name, - ) - ) - self._file_tracers[filename] = plugin_name - - self._validate() - - def add_run_info(self, **kwargs): - """Add information about the run. - - Keywords are arbitrary, and are stored in the run dictionary. Values - must be JSON serializable. You may use this function more than once, - but repeated keywords overwrite each other. - - """ - if self._debug.should('dataop'): - self._debug.write("Adding run info: %r" % (kwargs,)) - if not self._runs: - self._runs = [{}] - self._runs[0].update(kwargs) - self._validate() - - def touch_file(self, filename, plugin_name=""): - """Ensure that `filename` appears in the data, empty if needed. - - `plugin_name` is the name of the plugin responsible for this file. It is used - to associate the right filereporter, etc. - """ - if self._debug.should('dataop'): - self._debug.write("Touching %r" % (filename,)) - if not self._has_arcs() and not self._has_lines(): - raise CoverageException("Can't touch files in an empty CoverageData") - - if self._has_arcs(): - where = self._arcs - else: - where = self._lines - where.setdefault(filename, []) - if plugin_name: - # Set the tracer for this file - self._file_tracers[filename] = plugin_name - - self._validate() - - def set_context(self, context): - """Set the context. Not implemented for JSON storage.""" - if context: - raise CoverageException("JSON storage doesn't support contexts") - - def write(self): - """Write the collected coverage data to a file. - - `suffix` is a suffix to append to the base file name. This can be used - for multiple or parallel execution, so that many coverage data files - can exist simultaneously. A dot will be used to join the base name and - the suffix. - - """ - filename = self.filename - suffix = filename_suffix(self.suffix) - if suffix: - filename += "." + suffix - self._write_file(filename) - - def _write_fileobj(self, file_obj): - """Write the coverage data to `file_obj`.""" - - # Create the file data. - file_data = {} - - if self._has_arcs(): - file_data['arcs'] = self._arcs - - if self._has_lines(): - file_data['lines'] = self._lines - - if self._file_tracers: - file_data['file_tracers'] = self._file_tracers - - if self._runs: - file_data['runs'] = self._runs - - # Write the data to the file. - file_obj.write(self._GO_AWAY) - json.dump(file_data, file_obj, separators=(',', ':')) - - def _write_file(self, filename): - """Write the coverage data to `filename`.""" - if self._debug.should('dataio'): - self._debug.write("Writing data to %r" % (filename,)) - with open(filename, 'w') as fdata: - self._write_fileobj(fdata) - - def erase(self, parallel=False): - """Erase the data in this object. - - If `parallel` is true, then also deletes data files created from the - basename by parallel-mode. - - """ - self._lines = None - self._arcs = None - self._file_tracers = {} - self._runs = [] - self._validate() - - if self._debug.should('dataio'): - self._debug.write("Erasing data file %r" % (self.filename,)) - file_be_gone(self.filename) - if parallel: - data_dir, local = os.path.split(self.filename) - localdot = local + '.*' - pattern = os.path.join(os.path.abspath(data_dir), localdot) - for filename in glob.glob(pattern): - if self._debug.should('dataio'): - self._debug.write("Erasing parallel data file %r" % (filename,)) - file_be_gone(filename) - - def update(self, other_data, aliases=None): - """Update this data with data from another `CoverageData`. - - If `aliases` is provided, it's a `PathAliases` object that is used to - re-map paths to match the local machine's. - - """ - if self._has_lines() and other_data._has_arcs(): - raise CoverageException("Can't combine arc data with line data") - if self._has_arcs() and other_data._has_lines(): - raise CoverageException("Can't combine line data with arc data") - - aliases = aliases or PathAliases() - - # _file_tracers: only have a string, so they have to agree. - # Have to do these first, so that our examination of self._arcs and - # self._lines won't be confused by data updated from other_data. - for filename in other_data.measured_files(): - other_plugin = other_data.file_tracer(filename) - filename = aliases.map(filename) - this_plugin = self.file_tracer(filename) - if this_plugin is None: - if other_plugin: - self._file_tracers[filename] = other_plugin - elif this_plugin != other_plugin: - raise CoverageException( - "Conflicting file tracer name for '%s': %r vs %r" % ( - filename, this_plugin, other_plugin, - ) - ) - - # _runs: add the new runs to these runs. - self._runs.extend(other_data._runs) - - # _lines: merge dicts. - if other_data._has_lines(): - if self._lines is None: - self._lines = {} - for filename, file_lines in iitems(other_data._lines): - filename = aliases.map(filename) - if filename in self._lines: - lines = set(self._lines[filename]) - lines.update(file_lines) - file_lines = list(lines) - self._lines[filename] = file_lines - - # _arcs: merge dicts. - if other_data._has_arcs(): - if self._arcs is None: - self._arcs = {} - for filename, file_arcs in iitems(other_data._arcs): - filename = aliases.map(filename) - if filename in self._arcs: - arcs = set(self._arcs[filename]) - arcs.update(file_arcs) - file_arcs = list(arcs) - self._arcs[filename] = file_arcs - - self._validate() - - ## - ## Miscellaneous - ## - - def _validate(self): - """If we are in paranoid mode, validate that everything is right.""" - if env.TESTING: - self._validate_invariants() - - def _validate_invariants(self): - """Validate internal invariants.""" - # Only one of _lines or _arcs should exist. - assert not(self._has_lines() and self._has_arcs()), ( - "Shouldn't have both _lines and _arcs" - ) - - # _lines should be a dict of lists of ints. - if self._has_lines(): - for fname, lines in iitems(self._lines): - assert isinstance(fname, string_class), "Key in _lines shouldn't be %r" % (fname,) - assert all(isinstance(x, int) for x in lines), ( - "_lines[%r] shouldn't be %r" % (fname, lines) - ) - - # _arcs should be a dict of lists of pairs of ints. - if self._has_arcs(): - for fname, arcs in iitems(self._arcs): - assert isinstance(fname, string_class), "Key in _arcs shouldn't be %r" % (fname,) - assert all(isinstance(x, int) and isinstance(y, int) for x, y in arcs), ( - "_arcs[%r] shouldn't be %r" % (fname, arcs) - ) - - # _file_tracers should have only non-empty strings as values. - for fname, plugin in iitems(self._file_tracers): - assert isinstance(fname, string_class), ( - "Key in _file_tracers shouldn't be %r" % (fname,) - ) - assert plugin and isinstance(plugin, string_class), ( - "_file_tracers[%r] shoudn't be %r" % (fname, plugin) - ) - - # _runs should be a list of dicts. - for val in self._runs: - assert isinstance(val, dict) - for key in val: - assert isinstance(key, string_class), "Key in _runs shouldn't be %r" % (key,) - - ## - ## Internal - ## - - def _has_lines(self): - """Do we have data in self._lines?""" - return self._lines is not None - - def _has_arcs(self): - """Do we have data in self._arcs?""" - return self._arcs is not None +""" +import glob +import os.path -# $set_env.py: COVERAGE_STORAGE - The storage implementation to use: sql (default), or json. -STORAGE = os.environ.get("COVERAGE_STORAGE", "sql") -if STORAGE == "json": - CoverageData = CoverageJsonData -elif STORAGE == "sql": - from coverage.sqldata import CoverageSqliteData - CoverageData = CoverageSqliteData +from coverage.misc import CoverageException, file_be_gone +from coverage.sqldata import CoverageData def line_counts(data, fullpath=False): @@ -769,53 +122,3 @@ def combine_parallel_data(data, aliases=None, data_paths=None, strict=False): if strict and not files_combined: raise CoverageException("No usable data files") - -def canonicalize_json_data(data): - """Canonicalize our JSON data so it can be compared.""" - for fname, lines in iitems(data.get('lines', {})): - data['lines'][fname] = sorted(lines) - for fname, arcs in iitems(data.get('arcs', {})): - data['arcs'][fname] = sorted(arcs) - - -def pretty_data(data): - """Format data as JSON, but as nicely as possible. - - Returns a string. - - """ - # Start with a basic JSON dump. - out = json.dumps(data, indent=4, sort_keys=True) - # But pairs of numbers shouldn't be split across lines... - out = re.sub(r"\[\s+(-?\d+),\s+(-?\d+)\s+]", r"[\1, \2]", out) - # Trailing spaces mess with tests, get rid of them. - out = re.sub(r"(?m)\s+$", "", out) - return out - - -def debug_main(args): - """Dump the raw data from data files. - - Run this as:: - - $ python -m coverage.data [FILE] - - """ - parser = optparse.OptionParser() - parser.add_option( - "-c", "--canonical", action="store_true", - help="Sort data into a canonical order", - ) - options, args = parser.parse_args(args) - - for filename in (args or [".coverage"]): - print("--- {0} ------------------------------".format(filename)) - data = CoverageData._read_raw_data_file(filename) - if options.canonical: - canonicalize_json_data(data) - print(pretty_data(data)) - - -if __name__ == '__main__': - import sys - debug_main(sys.argv[1:]) |