diff options
author | Ned Batchelder <ned@nedbatchelder.com> | 2022-11-07 16:11:26 -0500 |
---|---|---|
committer | Ned Batchelder <ned@nedbatchelder.com> | 2022-11-08 06:36:47 -0500 |
commit | 09bc2d6ab0f951c58546dab234edeaa9de7d4c44 (patch) | |
tree | d53ba990b4a90d264a83ee11f89cef9800db3317 /tests/test_concurrency.py | |
parent | bc630b58b5d1c58cc8108e584a480f3a1cd5ab70 (diff) | |
download | python-coveragepy-git-09bc2d6ab0f951c58546dab234edeaa9de7d4c44.tar.gz |
perf: hash data files during combining to avoid unneeded work. #1483
When generating many parallel data files, often some data files will be exact
copies of each other. Checking the hashes, we can avoid combining the
duplicates, speeding the process.
On a coverage.py metacov, we had 651 duplicates out of 2189 files (29%).
The time to combine was reduced by 17%.
Diffstat (limited to 'tests/test_concurrency.py')
-rw-r--r-- | tests/test_concurrency.py | 6 |
1 files changed, 5 insertions, 1 deletions
diff --git a/tests/test_concurrency.py b/tests/test_concurrency.py index 0a51d4d9..2c827760 100644 --- a/tests/test_concurrency.py +++ b/tests/test_concurrency.py @@ -484,9 +484,13 @@ class MultiprocessingTest(CoverageTest): out_lines = out.splitlines() assert len(out_lines) == nprocs + 1 assert all( - re.fullmatch(r"Combined data file \.coverage\..*\.\d+\.\d+", line) + re.fullmatch( + r"(Combined data file|Skipping duplicate data) \.coverage\..*\.\d+\.\d+", + line + ) for line in out_lines ) + assert len(glob.glob(".coverage.*")) == 0 out = self.run_command("coverage report -m") last_line = self.squeezed_lines(out)[-1] |