summaryrefslogtreecommitdiff
path: root/coverage/data.py
diff options
context:
space:
mode:
Diffstat (limited to 'coverage/data.py')
-rw-r--r--coverage/data.py71
1 files changed, 45 insertions, 26 deletions
diff --git a/coverage/data.py b/coverage/data.py
index 4bdfe301..798d167f 100644
--- a/coverage/data.py
+++ b/coverage/data.py
@@ -11,6 +11,7 @@ imports working.
"""
import glob
+import hashlib
import os.path
from coverage.exceptions import CoverageException, NoDataError
@@ -110,7 +111,9 @@ def combine_parallel_data(
if strict and not files_to_combine:
raise NoDataError("No data to combine")
- files_combined = 0
+ file_hashes = set()
+ combined_any = False
+
for f in files_to_combine:
if f == data.data_filename():
# Sometimes we are combining into a file which is one of the
@@ -118,34 +121,50 @@ def combine_parallel_data(
if data._debug.should('dataio'):
data._debug.write(f"Skipping combining ourself: {f!r}")
continue
- if data._debug.should('dataio'):
- data._debug.write(f"Combining data file {f!r}")
+
try:
- new_data = CoverageData(f, debug=data._debug)
- new_data.read()
- except CoverageException as exc:
- if data._warn:
- # The CoverageException has the file name in it, so just
- # use the message as the warning.
- data._warn(str(exc))
+ rel_file_name = os.path.relpath(f)
+ except ValueError:
+ # ValueError can be raised under Windows when os.getcwd() returns a
+ # folder from a different drive than the drive of f, in which case
+ # we print the original value of f instead of its relative path
+ rel_file_name = f
+
+ with open(f, "rb") as fobj:
+ hasher = hashlib.new("sha3_256")
+ hasher.update(fobj.read())
+ sha = hasher.digest()
+ combine_this_one = sha not in file_hashes
+
+ delete_this_one = not keep
+ if combine_this_one:
+ if data._debug.should('dataio'):
+ data._debug.write(f"Combining data file {f!r}")
+ file_hashes.add(sha)
+ try:
+ new_data = CoverageData(f, debug=data._debug)
+ new_data.read()
+ except CoverageException as exc:
+ if data._warn:
+ # The CoverageException has the file name in it, so just
+ # use the message as the warning.
+ data._warn(str(exc))
+ delete_this_one = False
+ else:
+ data.update(new_data, aliases=aliases)
+ combined_any = True
+ if message:
+ message(f"Combined data file {rel_file_name}")
else:
- data.update(new_data, aliases=aliases)
- files_combined += 1
if message:
- try:
- file_name = os.path.relpath(f)
- except ValueError:
- # ValueError can be raised under Windows when os.getcwd() returns a
- # folder from a different drive than the drive of f, in which case
- # we print the original value of f instead of its relative path
- file_name = f
- message(f"Combined data file {file_name}")
- if not keep:
- if data._debug.should('dataio'):
- data._debug.write(f"Deleting combined data file {f!r}")
- file_be_gone(f)
-
- if strict and not files_combined:
+ message(f"Skipping duplicate data {rel_file_name}")
+
+ if delete_this_one:
+ if data._debug.should('dataio'):
+ data._debug.write(f"Deleting data file {f!r}")
+ file_be_gone(f)
+
+ if strict and not combined_any:
raise NoDataError("No usable data files")