From 09bc2d6ab0f951c58546dab234edeaa9de7d4c44 Mon Sep 17 00:00:00 2001 From: Ned Batchelder Date: Mon, 7 Nov 2022 16:11:26 -0500 Subject: perf: hash data files during combining to avoid unneeded work. #1483 When generating many parallel data files, often some data files will be exact copies of each other. Checking the hashes, we can avoid combining the duplicates, speeding the process. On a coverage.py metacov, we had 651 duplicates out of 2189 files (29%). The time to combine was reduced by 17%. --- coverage/sqldata.py | 3 --- 1 file changed, 3 deletions(-) (limited to 'coverage/sqldata.py') diff --git a/coverage/sqldata.py b/coverage/sqldata.py index 2b773053..2fbc53f5 100644 --- a/coverage/sqldata.py +++ b/coverage/sqldata.py @@ -4,7 +4,6 @@ """SQLite coverage data.""" import collections -import datetime import functools import glob import itertools @@ -56,7 +55,6 @@ CREATE TABLE meta ( -- 'has_arcs' boolean -- Is this data recording branches? -- 'sys_argv' text -- The coverage command line that recorded the data. -- 'version' text -- The version of coverage.py that made the file. - -- 'when' text -- Datetime when the file was created. ); CREATE TABLE file ( @@ -305,7 +303,6 @@ class CoverageData(SimpleReprMixin): [ ("sys_argv", str(getattr(sys, "argv", None))), ("version", __version__), - ("when", datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")), ] ) -- cgit v1.2.1