summaryrefslogtreecommitdiff
path: root/tests/test_concurrency.py
diff options
context:
space:
mode:
authorNed Batchelder <ned@nedbatchelder.com>2022-11-07 16:11:26 -0500
committerNed Batchelder <ned@nedbatchelder.com>2022-11-08 06:36:47 -0500
commit09bc2d6ab0f951c58546dab234edeaa9de7d4c44 (patch)
treed53ba990b4a90d264a83ee11f89cef9800db3317 /tests/test_concurrency.py
parentbc630b58b5d1c58cc8108e584a480f3a1cd5ab70 (diff)
downloadpython-coveragepy-git-09bc2d6ab0f951c58546dab234edeaa9de7d4c44.tar.gz
perf: hash data files during combining to avoid unneeded work. #1483
When generating many parallel data files, often some data files will be exact copies of each other. Checking the hashes, we can avoid combining the duplicates, speeding the process. On a coverage.py metacov, we had 651 duplicates out of 2189 files (29%). The time to combine was reduced by 17%.
Diffstat (limited to 'tests/test_concurrency.py')
-rw-r--r--tests/test_concurrency.py6
1 files changed, 5 insertions, 1 deletions
diff --git a/tests/test_concurrency.py b/tests/test_concurrency.py
index 0a51d4d9..2c827760 100644
--- a/tests/test_concurrency.py
+++ b/tests/test_concurrency.py
@@ -484,9 +484,13 @@ class MultiprocessingTest(CoverageTest):
out_lines = out.splitlines()
assert len(out_lines) == nprocs + 1
assert all(
- re.fullmatch(r"Combined data file \.coverage\..*\.\d+\.\d+", line)
+ re.fullmatch(
+ r"(Combined data file|Skipping duplicate data) \.coverage\..*\.\d+\.\d+",
+ line
+ )
for line in out_lines
)
+ assert len(glob.glob(".coverage.*")) == 0
out = self.run_command("coverage report -m")
last_line = self.squeezed_lines(out)[-1]