summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorNed Batchelder <ned@nedbatchelder.com>2022-11-07 16:11:26 -0500
committerNed Batchelder <ned@nedbatchelder.com>2022-11-08 06:36:47 -0500
commit09bc2d6ab0f951c58546dab234edeaa9de7d4c44 (patch)
treed53ba990b4a90d264a83ee11f89cef9800db3317 /tests
parentbc630b58b5d1c58cc8108e584a480f3a1cd5ab70 (diff)
downloadpython-coveragepy-git-09bc2d6ab0f951c58546dab234edeaa9de7d4c44.tar.gz
perf: hash data files during combining to avoid unneeded work. #1483
When generating many parallel data files, often some data files will be exact copies of each other. Checking the hashes, we can avoid combining the duplicates, speeding the process. On a coverage.py metacov, we had 651 duplicates out of 2189 files (29%). The time to combine was reduced by 17%.
Diffstat (limited to 'tests')
-rw-r--r--tests/test_api.py2
-rw-r--r--tests/test_concurrency.py6
2 files changed, 6 insertions, 2 deletions
diff --git a/tests/test_api.py b/tests/test_api.py
index ce44b9b1..19545232 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -1362,7 +1362,7 @@ class CombiningTest(CoverageTest):
# Make bogus data files.
self.make_file(".coverage.bad1", "This isn't a coverage data file.")
- self.make_file(".coverage.bad2", "This isn't a coverage data file.")
+ self.make_file(".coverage.bad2", "This isn't a coverage data file either.")
# Combine the parallel coverage data files into .coverage, but nothing is readable.
cov = coverage.Coverage()
diff --git a/tests/test_concurrency.py b/tests/test_concurrency.py
index 0a51d4d9..2c827760 100644
--- a/tests/test_concurrency.py
+++ b/tests/test_concurrency.py
@@ -484,9 +484,13 @@ class MultiprocessingTest(CoverageTest):
out_lines = out.splitlines()
assert len(out_lines) == nprocs + 1
assert all(
- re.fullmatch(r"Combined data file \.coverage\..*\.\d+\.\d+", line)
+ re.fullmatch(
+ r"(Combined data file|Skipping duplicate data) \.coverage\..*\.\d+\.\d+",
+ line
+ )
for line in out_lines
)
+ assert len(glob.glob(".coverage.*")) == 0
out = self.run_command("coverage report -m")
last_line = self.squeezed_lines(out)[-1]