summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lab/benchmark.py300
-rw-r--r--perf/bug397.py54
-rw-r--r--perf/perf_measure.py188
-rw-r--r--perf/solve_poly.py246
4 files changed, 300 insertions, 488 deletions
diff --git a/lab/benchmark.py b/lab/benchmark.py
new file mode 100644
index 00000000..27d05423
--- /dev/null
+++ b/lab/benchmark.py
@@ -0,0 +1,300 @@
+"""Run performance comparisons for versions of coverage"""
+
+import contextlib
+import dataclasses
+import os
+import shutil
+import statistics
+import subprocess
+import time
+from pathlib import Path
+
+from typing import Iterator, List, Optional, Tuple, Union
+
+
+class ShellSession:
+ """A logged shell session.
+
+ The duration of the last command is available as .last_duration.
+ """
+
+ def __init__(self, output_filename: str):
+ self.output_filename = output_filename
+ self.last_duration: float = 0
+
+ def __enter__(self):
+ self.foutput = open(self.output_filename, "a", encoding="utf-8")
+ print(f"Logging output to {os.path.abspath(self.output_filename)}")
+ return self
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ self.foutput.close()
+
+ def print(self, *args, **kwargs):
+ print(*args, **kwargs, file=self.foutput)
+
+ def run_command(self, cmd: str) -> str:
+ """
+ Run a command line (with a shell).
+
+ Returns:
+ str: the output of the command.
+
+ """
+ self.print(f"\n========================\n$ {cmd}")
+ start = time.perf_counter()
+ proc = subprocess.run(
+ cmd,
+ shell=True,
+ check=False,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT,
+ )
+ output = proc.stdout.decode("utf-8")
+ self.last_duration = time.perf_counter() - start
+ self.print(output, end="")
+ self.print(f"(was: {cmd})")
+ self.print(f"(in {os.getcwd()}, duration: {self.last_duration:.3f}s)")
+
+ if proc.returncode != 0:
+ self.print(f"ERROR: command returned {proc.returncode}")
+ raise Exception(
+ f"Command failed ({proc.returncode}): {cmd!r}, output was:\n{output}"
+ )
+
+ return output.strip()
+
+
+def rmrf(path: Path) -> None:
+ """
+ Remove a directory tree. It's OK if it doesn't exist.
+ """
+ if path.exists():
+ shutil.rmtree(path)
+
+
+@contextlib.contextmanager
+def change_dir(newdir: Path) -> Iterator[Path]:
+ """
+ Change to a new directory, and then change back.
+
+ Will make the directory if needed.
+ """
+ old_dir = os.getcwd()
+ newdir.mkdir(parents=True, exist_ok=True)
+ os.chdir(newdir)
+ try:
+ yield newdir
+ finally:
+ os.chdir(old_dir)
+
+
+@contextlib.contextmanager
+def file_replace(file_name: Path, old_text: str, new_text: str) -> Iterator[None]:
+ """
+ Replace some text in `file_name`, and change it back.
+ """
+ if old_text:
+ file_text = file_name.read_text()
+ if old_text not in file_text:
+ raise Exception("Old text {old_text!r} not found in {file_name}")
+ updated_text = file_text.replace(old_text, new_text)
+ file_name.write_text(updated_text)
+ try:
+ yield
+ finally:
+ if old_text:
+ file_name.write_text(file_text)
+
+
+class ProjectToTest:
+ """Information about a project to use as a test case."""
+
+ # Where can we clone the project from?
+ git_url: Optional[str] = None
+
+ def __init__(self):
+ self.slug = self.git_url.split("/")[-1]
+ self.dir = Path(self.slug)
+
+ def get_source(self, shell):
+ """Get the source of the project."""
+ if self.dir.exists():
+ rmrf(self.dir)
+ shell.run_command(f"git clone {self.git_url}")
+
+ def prep_environment(self, env):
+ """Prepare the environment to run the test suite.
+
+ This is not timed.
+ """
+ pass
+
+ def run_no_coverage(self, env):
+ """Run the test suite with no coverage measurement."""
+ pass
+
+ def run_with_coverage(self, env, pip_args, cov_options):
+ """Run the test suite with coverage measurement."""
+ pass
+
+
+class ToxProject(ProjectToTest):
+ """A project using tox to run the test suite."""
+
+ def prep_environment(self, env):
+ env.shell.run_command(f"{env.python} -m pip install tox")
+ self.run_tox(env, env.pyver.toxenv, "--notest")
+
+ def run_tox(self, env, toxenv, toxargs=""):
+ env.shell.run_command(f"{env.python} -m tox -e {toxenv} {toxargs}")
+ return env.shell.last_duration
+
+ def run_no_coverage(self, env):
+ return self.run_tox(env, env.pyver.toxenv, "--skip-pkg-install")
+
+
+class PytestHtml(ToxProject):
+ """pytest-dev/pytest-html"""
+
+ git_url = "https://github.com/pytest-dev/pytest-html"
+
+ def run_with_coverage(self, env, pip_args, cov_options):
+ covenv = env.pyver.toxenv + "-cov"
+ self.run_tox(env, covenv, "--notest")
+ env.shell.run_command(f".tox/{covenv}/bin/python -m pip install {pip_args}")
+ if cov_options:
+ replace = ("# reference: https", f"[run]\n{cov_options}\n#")
+ else:
+ replace = ("", "")
+ with file_replace(Path(".coveragerc"), *replace):
+ env.shell.run_command("cat .coveragerc")
+ env.shell.run_command(f".tox/{covenv}/bin/python -m coverage debug sys")
+ return self.run_tox(env, covenv, "--skip-pkg-install")
+
+
+class PyVersion:
+ # The command to run this Python
+ command: str
+ # The tox environment to run this Python
+ toxenv: str
+
+
+class Python(PyVersion):
+ """A version of CPython to use."""
+
+ def __init__(self, major, minor):
+ self.command = f"python{major}.{minor}"
+ self.toxenv = f"py{major}{minor}"
+
+
+class PyPy(PyVersion):
+ """A version of PyPy to use."""
+
+ def __init__(self, major, minor):
+ self.command = f"pypy{major}.{minor}"
+ self.toxenv = f"pypy{major}{minor}"
+
+
+@dataclasses.dataclass
+class Env:
+ """An environment to run a test suite in."""
+
+ pyver: PyVersion
+ python: Path
+ shell: ShellSession
+
+
+def run_experiments(
+ py_versions: List[PyVersion],
+ cov_versions: List[Tuple[str, Optional[str], Optional[str]]],
+ projects: List[ProjectToTest],
+ num_runs=3,
+):
+ """Run test suites under different conditions."""
+
+ for proj in projects:
+ print(f"Testing with {proj.git_url}")
+ with ShellSession(f"output_{proj.slug}.log") as shell:
+ proj.get_source(shell)
+
+ for pyver in py_versions:
+ print(f"Making venv for {proj.slug} {pyver.command}")
+ venv_dir = f"venv_{proj.slug}_{pyver.command}"
+ shell.run_command(f"{pyver.command} -m venv {venv_dir}")
+ python = Path.cwd() / f"{venv_dir}/bin/python"
+ shell.run_command(f"{python} -V")
+ env = Env(pyver, python, shell)
+
+ with change_dir(Path(proj.slug)):
+ print(f"Prepping for {proj.slug} {pyver.command}")
+ proj.prep_environment(env)
+ for cov_slug, cov_pip, cov_options in cov_versions:
+ durations = []
+ for run_num in range(num_runs):
+ print(
+ f"Running tests, cov={cov_slug}, {run_num+1} of {num_runs}"
+ )
+ if cov_pip is None:
+ dur = proj.run_no_coverage(env)
+ else:
+ dur = proj.run_with_coverage(env, cov_pip, cov_options)
+ print(f"Tests took {dur:.3f}s")
+ durations.append(dur)
+ med = statistics.median(durations)
+ print(
+ f"## Median for {pyver.command}, cov={cov_slug}: {med:.3f}s"
+ )
+
+
+PERF_DIR = Path("/tmp/covperf")
+
+
+print(f"Removing and re-making {PERF_DIR}")
+rmrf(PERF_DIR)
+
+with change_dir(PERF_DIR):
+
+ run_experiments(
+ py_versions=[
+ Python(3, 10),
+ ],
+ cov_versions=[
+ ("none", None, None),
+ ("6.4", "coverage==6.4", ""),
+ ("6.4 timid", "coverage==6.4", "timid=True"),
+ (
+ "PR 1381",
+ "git+https://github.com/cfbolz/coveragepy.git@f_trace_lines",
+ "",
+ ),
+ (
+ "PR 1381 timid",
+ "git+https://github.com/cfbolz/coveragepy.git@f_trace_lines",
+ "timid=True",
+ ),
+ ],
+ projects=[
+ PytestHtml(),
+ ],
+ num_runs=3,
+ )
+
+ run_experiments(
+ py_versions=[
+ PyPy(3, 9),
+ ],
+ cov_versions=[
+ ("none", None, None),
+ ("6.4", "coverage==6.4", ""),
+ (
+ "PR 1381",
+ "git+https://github.com/cfbolz/coveragepy.git@f_trace_lines",
+ "",
+ ),
+ ],
+ projects=[
+ PytestHtml(),
+ ],
+ num_runs=3,
+ )
diff --git a/perf/bug397.py b/perf/bug397.py
deleted file mode 100644
index 18c979b8..00000000
--- a/perf/bug397.py
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/usr/bin/env python
-"""
-Run this file two ways under coverage and see that the times are the same:
-
- $ coverage run lab/bug397.py slow
- Runtime per example: 130.96 +/- 3.70 us
- $ coverage run lab/bug397.py fast
- Runtime per example: 131.34 +/- 4.48 us
-
-Written by David MacIver as part of https://github.com/nedbat/coveragepy/issues/397
-
-"""
-
-import sys
-import random
-import time
-import math
-
-if sys.argv[1] == "slow":
- sys.settrace(sys.gettrace())
-
-random.seed(1)
-
-
-def hash_str(s):
- h = 0
- for c in s:
- h = (h * 31 + ord(c)) & (2 ** 64 - 1)
- return h
-
-data = [
- hex(random.getrandbits(1024)) for _ in range(500)
-]
-
-N_SAMPLES = 100
-
-
-def mean(xs):
- xs = list(xs)
- return sum(xs) / len(xs)
-
-
-def sd(xs):
- return math.sqrt(mean(x ** 2 for x in xs) - mean(xs) ** 2)
-
-
-if __name__ == '__main__':
- timing = []
- for _ in range(N_SAMPLES):
- start = time.time()
- for d in data:
- hash_str(d)
- timing.append(1000000 * (time.time() - start) / len(data))
- print("Runtime per example:", f"{mean(timing):.2f} +/- {sd(timing):.2f} us")
diff --git a/perf/perf_measure.py b/perf/perf_measure.py
deleted file mode 100644
index e8f9ea98..00000000
--- a/perf/perf_measure.py
+++ /dev/null
@@ -1,188 +0,0 @@
-# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
-# For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt
-
-# Run like this:
-# .tox/py36/bin/python perf/perf_measure.py
-
-from collections import namedtuple
-import os
-import statistics
-import sys
-import tempfile
-import time
-
-from unittest_mixins.mixins import make_file
-
-import coverage
-from coverage.misc import import_local_file
-
-from tests.helpers import SuperModuleCleaner
-
-
-class StressResult(namedtuple('StressResult', ['files', 'calls', 'lines', 'baseline', 'covered'])):
- @property
- def overhead(self):
- return self.covered - self.baseline
-
-
-TEST_FILE = """\
-def parent(call_count, line_count):
- for _ in range(call_count):
- child(line_count)
-
-def child(line_count):
- for i in range(line_count):
- x = 1
-"""
-
-def mk_main(file_count, call_count, line_count):
- lines = []
- lines.extend(
- f"import test{idx}" for idx in range(file_count)
- )
- lines.extend(
- f"test{idx}.parent({call_count}, {line_count})" for idx in range(file_count)
- )
- return "\n".join(lines)
-
-
-class StressTest:
-
- def __init__(self):
- self.module_cleaner = SuperModuleCleaner()
-
- def _run_scenario(self, file_count, call_count, line_count):
- self.module_cleaner.clean_local_file_imports()
-
- for idx in range(file_count):
- make_file(f'test{idx}.py', TEST_FILE)
- make_file('testmain.py', mk_main(file_count, call_count, line_count))
-
- # Run it once just to get the disk caches loaded up.
- import_local_file("testmain")
- self.module_cleaner.clean_local_file_imports()
-
- # Run it to get the baseline time.
- start = time.perf_counter()
- import_local_file("testmain")
- baseline = time.perf_counter() - start
- self.module_cleaner.clean_local_file_imports()
-
- # Run it to get the covered time.
- start = time.perf_counter()
- cov = coverage.Coverage()
- cov.start()
- try: # pragma: nested
- # Import the Python file, executing it.
- import_local_file("testmain")
- finally: # pragma: nested
- # Stop coverage.py.
- covered = time.perf_counter() - start
- stats = cov._collector.tracers[0].get_stats()
- if stats:
- stats = stats.copy()
- cov.stop()
-
- return baseline, covered, stats
-
- def _compute_overhead(self, file_count, call_count, line_count):
- baseline, covered, stats = self._run_scenario(file_count, call_count, line_count)
-
- #print("baseline = {:.2f}, covered = {:.2f}".format(baseline, covered))
- # Empirically determined to produce the same numbers as the collected
- # stats from get_stats(), with Python 3.6.
- actual_file_count = 17 + file_count
- actual_call_count = file_count * call_count + 156 * file_count + 85
- actual_line_count = (
- 2 * file_count * call_count * line_count +
- 3 * file_count * call_count +
- 769 * file_count +
- 345
- )
-
- if stats is not None:
- assert actual_file_count == stats['files']
- assert actual_call_count == stats['calls']
- assert actual_line_count == stats['lines']
- print("File counts", file_count, actual_file_count, stats['files'])
- print("Call counts", call_count, actual_call_count, stats['calls'])
- print("Line counts", line_count, actual_line_count, stats['lines'])
- print()
-
- return StressResult(
- actual_file_count,
- actual_call_count,
- actual_line_count,
- baseline,
- covered,
- )
-
- fixed = 200
- numlo = 100
- numhi = 100
- step = 50
- runs = 5
-
- def count_operations(self):
-
- def operations(thing):
- for _ in range(self.runs):
- for n in range(self.numlo, self.numhi+1, self.step):
- kwargs = {
- "file_count": self.fixed,
- "call_count": self.fixed,
- "line_count": self.fixed,
- }
- kwargs[thing+"_count"] = n
- yield kwargs['file_count'] * kwargs['call_count'] * kwargs['line_count']
-
- ops = sum(sum(operations(thing)) for thing in ["file", "call", "line"])
- print(f"{ops/1e6:.1f}M operations")
-
- def check_coefficients(self):
- # For checking the calculation of actual stats:
- for f in range(1, 6):
- for c in range(1, 6):
- for l in range(1, 6):
- _, _, stats = self._run_scenario(f, c, l)
- print("{0},{1},{2},{3[files]},{3[calls]},{3[lines]}".format(f, c, l, stats))
-
- def stress_test(self):
- # For checking the overhead for each component:
- def time_thing(thing):
- per_thing = []
- pct_thing = []
- for _ in range(self.runs):
- for n in range(self.numlo, self.numhi+1, self.step):
- kwargs = {
- "file_count": self.fixed,
- "call_count": self.fixed,
- "line_count": self.fixed,
- }
- kwargs[thing+"_count"] = n
- res = self._compute_overhead(**kwargs)
- per_thing.append(res.overhead / getattr(res, f"{thing}s"))
- pct_thing.append(res.covered / res.baseline * 100)
-
- out = f"Per {thing}: "
- out += "mean = {:9.3f}us, stddev = {:8.3f}us, ".format(
- statistics.mean(per_thing)*1e6, statistics.stdev(per_thing)*1e6
- )
- out += f"min = {min(per_thing)*1e6:9.3f}us, "
- out += "pct = {:6.1f}%, stddev = {:6.1f}%".format(
- statistics.mean(pct_thing), statistics.stdev(pct_thing)
- )
- print(out)
-
- time_thing("file")
- time_thing("call")
- time_thing("line")
-
-
-if __name__ == '__main__':
- with tempfile.TemporaryDirectory(prefix="coverage_stress_") as tempdir:
- print(f"Working in {tempdir}")
- os.chdir(tempdir)
- sys.path.insert(0, ".")
-
- StressTest().stress_test()
diff --git a/perf/solve_poly.py b/perf/solve_poly.py
deleted file mode 100644
index 083dc544..00000000
--- a/perf/solve_poly.py
+++ /dev/null
@@ -1,246 +0,0 @@
-# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
-# For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt
-
-# Given empirical data from perf_measure.py, calculate the coefficients of the
-# polynomials for file, call, and line operation counts.
-#
-# Written by Kyle Altendorf.
-
-import attr
-import itertools
-import numpy
-import scipy.optimize
-
-
-def f(*args, simplify=False):
- p = ((),)
- for l in range(len(args)):
- l += 1
- p = itertools.chain(p, itertools.product(*(args,), repeat=l))
-
- if simplify:
- p = {tuple(sorted(set(x))) for x in p}
- p = sorted(p, key=lambda x: (len(x), x))
-
- return p
-
-def m(*args):
- if len(args) == 0:
- return 0
-
- r = 1
- for arg in args:
- r *= arg
-
- return r
-
-
-class Poly:
- def __init__(self, *names):
- self.names = names
-
- self.terms = f(*self.names, simplify=True)
-
- def calculate(self, coefficients, **name_values):
- for name in name_values:
- if name not in self.names:
- raise Exception('bad parameter')
-
- substituted_terms = []
- for term in self.terms:
- substituted_terms.append(tuple(name_values[name] for name in term))
-
- c_tuples = ((c,) for c in coefficients)
-
- terms = tuple(a + b for a, b in zip(c_tuples, substituted_terms))
-
- multiplied = tuple(m(*t) for t in terms)
- total = sum(multiplied)
-
- return total
-
-
-poly = Poly('f', 'c', 'l')
-
-#print('\n'.join(str(t) for t in poly.terms))
-
-@attr.s
-class FCL:
- f = attr.ib()
- c = attr.ib()
- l = attr.ib()
-
-INPUT = """\
-1,1,1,18,242,1119
-1,1,2,18,242,1121
-1,1,3,18,242,1123
-1,1,4,18,242,1125
-1,1,5,18,242,1127
-1,2,1,18,243,1124
-1,2,2,18,243,1128
-1,2,3,18,243,1132
-1,2,4,18,243,1136
-1,2,5,18,243,1140
-1,3,1,18,244,1129
-1,3,2,18,244,1135
-1,3,3,18,244,1141
-1,3,4,18,244,1147
-1,3,5,18,244,1153
-1,4,1,18,245,1134
-1,4,2,18,245,1142
-1,4,3,18,245,1150
-1,4,4,18,245,1158
-1,4,5,18,245,1166
-1,5,1,18,246,1139
-1,5,2,18,246,1149
-1,5,3,18,246,1159
-1,5,4,18,246,1169
-1,5,5,18,246,1179
-2,1,1,19,399,1893
-2,1,2,19,399,1897
-2,1,3,19,399,1901
-2,1,4,19,399,1905
-2,1,5,19,399,1909
-2,2,1,19,401,1903
-2,2,2,19,401,1911
-2,2,3,19,401,1919
-2,2,4,19,401,1927
-2,2,5,19,401,1935
-2,3,1,19,403,1913
-2,3,2,19,403,1925
-2,3,3,19,403,1937
-2,3,4,19,403,1949
-2,3,5,19,403,1961
-2,4,1,19,405,1923
-2,4,2,19,405,1939
-2,4,3,19,405,1955
-2,4,4,19,405,1971
-2,4,5,19,405,1987
-2,5,1,19,407,1933
-2,5,2,19,407,1953
-2,5,3,19,407,1973
-2,5,4,19,407,1993
-2,5,5,19,407,2013
-3,1,1,20,556,2667
-3,1,2,20,556,2673
-3,1,3,20,556,2679
-3,1,4,20,556,2685
-3,1,5,20,556,2691
-3,2,1,20,559,2682
-3,2,2,20,559,2694
-3,2,3,20,559,2706
-3,2,4,20,559,2718
-3,2,5,20,559,2730
-3,3,1,20,562,2697
-3,3,2,20,562,2715
-3,3,3,20,562,2733
-3,3,4,20,562,2751
-3,3,5,20,562,2769
-3,4,1,20,565,2712
-3,4,2,20,565,2736
-3,4,3,20,565,2760
-3,4,4,20,565,2784
-3,4,5,20,565,2808
-3,5,1,20,568,2727
-3,5,2,20,568,2757
-3,5,3,20,568,2787
-3,5,4,20,568,2817
-3,5,5,20,568,2847
-4,1,1,21,713,3441
-4,1,2,21,713,3449
-4,1,3,21,713,3457
-4,1,4,21,713,3465
-4,1,5,21,713,3473
-4,2,1,21,717,3461
-4,2,2,21,717,3477
-4,2,3,21,717,3493
-4,2,4,21,717,3509
-4,2,5,21,717,3525
-4,3,1,21,721,3481
-4,3,2,21,721,3505
-4,3,3,21,721,3529
-4,3,4,21,721,3553
-4,3,5,21,721,3577
-4,4,1,21,725,3501
-4,4,2,21,725,3533
-4,4,3,21,725,3565
-4,4,4,21,725,3597
-4,4,5,21,725,3629
-4,5,1,21,729,3521
-4,5,2,21,729,3561
-4,5,3,21,729,3601
-4,5,4,21,729,3641
-4,5,5,21,729,3681
-5,1,1,22,870,4215
-5,1,2,22,870,4225
-5,1,3,22,870,4235
-5,1,4,22,870,4245
-5,1,5,22,870,4255
-5,2,1,22,875,4240
-5,2,2,22,875,4260
-5,2,3,22,875,4280
-5,2,4,22,875,4300
-5,2,5,22,875,4320
-5,3,1,22,880,4265
-5,3,2,22,880,4295
-5,3,3,22,880,4325
-5,3,4,22,880,4355
-5,3,5,22,880,4385
-5,4,1,22,885,4290
-5,4,2,22,885,4330
-5,4,3,22,885,4370
-5,4,4,22,885,4410
-5,4,5,22,885,4450
-5,5,1,22,890,4315
-5,5,2,22,890,4365
-5,5,3,22,890,4415
-5,5,4,22,890,4465
-5,5,5,22,890,4515
-"""
-
-inputs_outputs = {}
-for row in INPUT.splitlines():
- row = [int(v) for v in row.split(",")]
- inputs_outputs[FCL(*row[:3])] = FCL(*row[3:])
-
-#print('\n'.join(str(t) for t in inputs_outputs.items()))
-
-def calc_poly_coeff(poly, coefficients):
- c_tuples = list((c,) for c in coefficients)
- poly = list(f(*poly))
- poly = list(a + b for a, b in zip(c_tuples, poly))
- multiplied = list(m(*t) for t in poly)
- total = sum(multiplied)
- return total
-
-def calc_error(inputs, output, coefficients):
- result = poly.calculate(coefficients, **inputs)
- return result - output
-
-
-def calc_total_error(inputs_outputs, coefficients, name):
- total_error = 0
- for inputs, outputs in inputs_outputs.items():
- total_error += abs(calc_error(attr.asdict(inputs), attr.asdict(outputs)[name], coefficients))
-
- return total_error
-
-coefficient_count = len(poly.terms)
-#print('count: {}'.format(coefficient_count))
-x0 = numpy.array((0,) * coefficient_count)
-
-#print(x0)
-
-with open('results', 'w') as f:
- for name in sorted(attr.asdict(FCL(0,0,0))):
- c = scipy.optimize.minimize(
- fun=lambda c: calc_total_error(inputs_outputs, c, name),
- x0=x0
- )
-
- coefficients = [int(round(x)) for x in c.x]
- terms = [''.join(t) for t in poly.terms]
- message = f"{name}' = "
- message += ' + '.join("{}{}".format(coeff if coeff != 1 else '', term) for coeff, term in reversed(list(zip(coefficients, terms))) if coeff != 0)
- print(message)
- f.write(message)