summaryrefslogtreecommitdiff
path: root/lab/benchmark/benchmark.py
diff options
context:
space:
mode:
Diffstat (limited to 'lab/benchmark/benchmark.py')
-rw-r--r--lab/benchmark/benchmark.py584
1 files changed, 584 insertions, 0 deletions
diff --git a/lab/benchmark/benchmark.py b/lab/benchmark/benchmark.py
new file mode 100644
index 00000000..af0e33cf
--- /dev/null
+++ b/lab/benchmark/benchmark.py
@@ -0,0 +1,584 @@
+"""Run performance comparisons for versions of coverage"""
+
+import collections
+import contextlib
+import dataclasses
+import itertools
+import os
+import random
+import shutil
+import statistics
+import subprocess
+import sys
+import time
+from pathlib import Path
+
+from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple
+
+
+class ShellSession:
+ """A logged shell session.
+
+ The duration of the last command is available as .last_duration.
+ """
+
+ def __init__(self, output_filename: str):
+ self.output_filename = output_filename
+ self.last_duration: float = 0
+ self.foutput = None
+
+ def __enter__(self):
+ self.foutput = open(self.output_filename, "a", encoding="utf-8")
+ print(f"Logging output to {os.path.abspath(self.output_filename)}")
+ return self
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ self.foutput.close()
+
+ def print(self, *args, **kwargs):
+ """Print a message to this shell's log."""
+ print(*args, **kwargs, file=self.foutput)
+
+ def run_command(self, cmd: str) -> str:
+ """
+ Run a command line (with a shell).
+
+ Returns:
+ str: the output of the command.
+
+ """
+ self.print(f"\n========================\n$ {cmd}")
+ start = time.perf_counter()
+ proc = subprocess.run(
+ cmd,
+ shell=True,
+ check=False,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT,
+ )
+ output = proc.stdout.decode("utf-8")
+ self.last_duration = time.perf_counter() - start
+ self.print(output, end="")
+ self.print(f"(was: {cmd})")
+ self.print(f"(in {os.getcwd()}, duration: {self.last_duration:.3f}s)")
+
+ if proc.returncode != 0:
+ self.print(f"ERROR: command returned {proc.returncode}")
+ raise Exception(
+ f"Command failed ({proc.returncode}): {cmd!r}, output was:\n{output}"
+ )
+
+ return output.strip()
+
+
+def rmrf(path: Path) -> None:
+ """
+ Remove a directory tree. It's OK if it doesn't exist.
+ """
+ if path.exists():
+ shutil.rmtree(path)
+
+
+@contextlib.contextmanager
+def change_dir(newdir: Path) -> Iterator[Path]:
+ """
+ Change to a new directory, and then change back.
+
+ Will make the directory if needed.
+ """
+ old_dir = os.getcwd()
+ newdir.mkdir(parents=True, exist_ok=True)
+ os.chdir(newdir)
+ try:
+ yield newdir
+ finally:
+ os.chdir(old_dir)
+
+
+@contextlib.contextmanager
+def file_replace(file_name: Path, old_text: str, new_text: str) -> Iterator[None]:
+ """
+ Replace some text in `file_name`, and change it back.
+ """
+ if old_text:
+ file_text = file_name.read_text()
+ if old_text not in file_text:
+ raise Exception("Old text {old_text!r} not found in {file_name}")
+ updated_text = file_text.replace(old_text, new_text)
+ file_name.write_text(updated_text)
+ try:
+ yield
+ finally:
+ if old_text:
+ file_name.write_text(file_text)
+
+
+class ProjectToTest:
+ """Information about a project to use as a test case."""
+
+ # Where can we clone the project from?
+ git_url: Optional[str] = None
+ slug: Optional[str] = None
+
+ def __init__(self):
+ if not self.slug:
+ if self.git_url:
+ self.slug = self.git_url.split("/")[-1]
+
+ def shell(self):
+ return ShellSession(f"output_{self.slug}.log")
+
+ def make_dir(self):
+ self.dir = Path(f"work_{self.slug}")
+ if self.dir.exists():
+ rmrf(self.dir)
+
+ def get_source(self, shell):
+ """Get the source of the project."""
+ shell.run_command(f"git clone {self.git_url} {self.dir}")
+
+ def prep_environment(self, env):
+ """Prepare the environment to run the test suite.
+
+ This is not timed.
+ """
+ pass
+
+ def tweak_coverage_settings(
+ self, settings: Iterable[Tuple[str, Any]]
+ ) -> Iterator[None]:
+ """Tweak the coverage settings.
+
+ NOTE: This is not properly factored, and is only used by ToxProject now!!!
+ """
+ pass
+
+ def run_no_coverage(self, env):
+ """Run the test suite with no coverage measurement."""
+ pass
+
+ def run_with_coverage(self, env, pip_args, cov_tweaks):
+ """Run the test suite with coverage measurement."""
+ pass
+
+
+class EmptyProject(ProjectToTest):
+ """A dummy project for testing other parts of this code."""
+
+ def __init__(self, slug: str = "empty", fake_durations: Iterable[float] = (1.23,)):
+ self.slug = slug
+ self.durations = iter(itertools.cycle(fake_durations))
+
+ def get_source(self, shell):
+ pass
+
+ def run_with_coverage(self, env, pip_args, cov_tweaks):
+ """Run the test suite with coverage measurement."""
+ return next(self.durations)
+
+
+class ToxProject(ProjectToTest):
+ """A project using tox to run the test suite."""
+
+ def prep_environment(self, env):
+ env.shell.run_command(f"{env.python} -m pip install 'tox<4'")
+ self.run_tox(env, env.pyver.toxenv, "--notest")
+
+ def run_tox(self, env, toxenv, toxargs=""):
+ """Run a tox command. Return the duration."""
+ env.shell.run_command(f"{env.python} -m tox -e {toxenv} {toxargs}")
+ return env.shell.last_duration
+
+ def run_no_coverage(self, env):
+ return self.run_tox(env, env.pyver.toxenv, "--skip-pkg-install")
+
+ def run_with_coverage(self, env, pip_args, cov_tweaks):
+ self.run_tox(env, env.pyver.toxenv, "--notest")
+ env.shell.run_command(
+ f".tox/{env.pyver.toxenv}/bin/python -m pip install {pip_args}"
+ )
+ with self.tweak_coverage_settings(cov_tweaks):
+ self.pre_check(env) # NOTE: Not properly factored, and only used from here.
+ duration = self.run_tox(env, env.pyver.toxenv, "--skip-pkg-install")
+ self.post_check(
+ env
+ ) # NOTE: Not properly factored, and only used from here.
+ return duration
+
+
+class ProjectPytestHtml(ToxProject):
+ """pytest-dev/pytest-html"""
+
+ git_url = "https://github.com/pytest-dev/pytest-html"
+
+ def run_with_coverage(self, env, pip_args, cov_tweaks):
+ raise Exception("This doesn't work because options changed to tweaks")
+ covenv = env.pyver.toxenv + "-cov"
+ self.run_tox(env, covenv, "--notest")
+ env.shell.run_command(f".tox/{covenv}/bin/python -m pip install {pip_args}")
+ if cov_tweaks:
+ replace = ("# reference: https", f"[run]\n{cov_tweaks}\n#")
+ else:
+ replace = ("", "")
+ with file_replace(Path(".coveragerc"), *replace):
+ env.shell.run_command("cat .coveragerc")
+ env.shell.run_command(f".tox/{covenv}/bin/python -m coverage debug sys")
+ return self.run_tox(env, covenv, "--skip-pkg-install")
+
+
+class ProjectDateutil(ToxProject):
+ """dateutil/dateutil"""
+
+ git_url = "https://github.com/dateutil/dateutil"
+
+ def prep_environment(self, env):
+ super().prep_environment(env)
+ env.shell.run_command(f"{env.python} updatezinfo.py")
+
+ def run_no_coverage(self, env):
+ env.shell.run_command("echo No option to run without coverage")
+ return 0
+
+
+class ProjectAttrs(ToxProject):
+ """python-attrs/attrs"""
+
+ git_url = "https://github.com/python-attrs/attrs"
+
+ def tweak_coverage_settings(
+ self, tweaks: Iterable[Tuple[str, Any]]
+ ) -> Iterator[None]:
+ return tweak_toml_coverage_settings("pyproject.toml", tweaks)
+
+ def pre_check(self, env):
+ env.shell.run_command("cat pyproject.toml")
+
+ def post_check(self, env):
+ env.shell.run_command("ls -al")
+
+
+def tweak_toml_coverage_settings(
+ toml_file: str, tweaks: Iterable[Tuple[str, Any]]
+) -> Iterator[None]:
+ if tweaks:
+ toml_inserts = []
+ for name, value in tweaks:
+ if isinstance(value, bool):
+ toml_inserts.append(f"{name} = {str(value).lower()}")
+ elif isinstance(value, str):
+ toml_inserts.append(f"{name} = '{value}'")
+ else:
+ raise Exception(f"Can't tweak toml setting: {name} = {value!r}")
+ header = "[tool.coverage.run]\n"
+ insert = header + "\n".join(toml_inserts) + "\n"
+ else:
+ header = insert = ""
+ return file_replace(Path(toml_file), header, insert)
+
+
+class AdHocProject(ProjectToTest):
+ """A standalone program to run locally."""
+
+ def __init__(self, python_file, cur_dir=None, pip_args=None):
+ super().__init__()
+ self.python_file = Path(python_file)
+ if not self.python_file.exists():
+ raise ValueError(f"Couldn't find {self.python_file} to run ad-hoc.")
+ self.cur_dir = Path(cur_dir or self.python_file.parent)
+ if not self.cur_dir.exists():
+ raise ValueError(f"Couldn't find {self.cur_dir} to run in.")
+ self.pip_args = pip_args
+ self.slug = self.python_file.name
+
+ def get_source(self, shell):
+ pass
+
+ def prep_environment(self, env):
+ env.shell.run_command(f"{env.python} -m pip install {self.pip_args}")
+
+ def run_no_coverage(self, env):
+ with change_dir(self.cur_dir):
+ env.shell.run_command(f"{env.python} {self.python_file}")
+ return env.shell.last_duration
+
+ def run_with_coverage(self, env, pip_args, cov_tweaks):
+ env.shell.run_command(f"{env.python} -m pip install {pip_args}")
+ with change_dir(self.cur_dir):
+ env.shell.run_command(f"{env.python} -m coverage run {self.python_file}")
+ return env.shell.last_duration
+
+
+class SlipcoverBenchmark(AdHocProject):
+ """
+ For running code from the Slipcover benchmarks.
+
+ Clone https://github.com/plasma-umass/slipcover to /src/slipcover
+
+ """
+
+ def __init__(self, python_file):
+ super().__init__(
+ python_file=f"/src/slipcover/benchmarks/{python_file}",
+ cur_dir="/src/slipcover",
+ pip_args="six pyperf",
+ )
+
+
+class PyVersion:
+ """A version of Python to use."""
+
+ # The command to run this Python
+ command: str
+ # Short word for messages, directories, etc
+ slug: str
+ # The tox environment to run this Python
+ toxenv: str
+
+
+class Python(PyVersion):
+ """A version of CPython to use."""
+
+ def __init__(self, major, minor):
+ self.command = self.slug = f"python{major}.{minor}"
+ self.toxenv = f"py{major}{minor}"
+
+
+class PyPy(PyVersion):
+ """A version of PyPy to use."""
+
+ def __init__(self, major, minor):
+ self.command = self.slug = f"pypy{major}.{minor}"
+ self.toxenv = f"pypy{major}{minor}"
+
+
+class AdHocPython(PyVersion):
+ """A custom build of Python to use."""
+
+ def __init__(self, path, slug):
+ self.command = f"{path}/bin/python3"
+ self.slug = slug
+ self.toxenv = None
+
+
+@dataclasses.dataclass
+class Coverage:
+ """A version of coverage.py to use, maybe None."""
+
+ # Short word for messages, directories, etc
+ slug: str
+ # Arguments for "pip install ..."
+ pip_args: Optional[str] = None
+ # Tweaks to the .coveragerc file
+ tweaks: Optional[Iterable[Tuple[str, Any]]] = None
+
+
+class CoveragePR(Coverage):
+ """A version of coverage.py from a pull request."""
+
+ def __init__(self, number, tweaks=None):
+ super().__init__(
+ slug=f"#{number}",
+ pip_args=f"git+https://github.com/nedbat/coveragepy.git@refs/pull/{number}/merge",
+ tweaks=tweaks,
+ )
+
+
+class CoverageCommit(Coverage):
+ """A version of coverage.py from a specific commit."""
+
+ def __init__(self, sha, tweaks=None):
+ super().__init__(
+ slug=sha,
+ pip_args=f"git+https://github.com/nedbat/coveragepy.git@{sha}",
+ tweaks=tweaks,
+ )
+
+
+class CoverageSource(Coverage):
+ """The coverage.py in a working tree."""
+
+ def __init__(self, directory, tweaks=None):
+ super().__init__(
+ slug="source",
+ pip_args=directory,
+ tweaks=tweaks,
+ )
+
+
+@dataclasses.dataclass
+class Env:
+ """An environment to run a test suite in."""
+
+ pyver: PyVersion
+ python: Path
+ shell: ShellSession
+
+
+ResultKey = Tuple[str, str, str]
+
+DIMENSION_NAMES = ["proj", "pyver", "cov"]
+
+
+class Experiment:
+ """A particular time experiment to run."""
+
+ def __init__(
+ self,
+ py_versions: List[PyVersion],
+ cov_versions: List[Coverage],
+ projects: List[ProjectToTest],
+ ):
+ self.py_versions = py_versions
+ self.cov_versions = cov_versions
+ self.projects = projects
+ self.result_data: Dict[ResultKey, List[float]] = {}
+
+ def run(self, num_runs: int = 3) -> None:
+ total_runs = (
+ len(self.projects)
+ * len(self.py_versions)
+ * len(self.cov_versions)
+ * num_runs
+ )
+ total_run_nums = iter(itertools.count(start=1))
+
+ all_runs = []
+
+ for proj in self.projects:
+ print(f"Prepping project {proj.slug}")
+ with proj.shell() as shell:
+ proj.make_dir()
+ proj.get_source(shell)
+
+ for pyver in self.py_versions:
+ print(f"Making venv for {proj.slug} {pyver.slug}")
+ venv_dir = f"venv_{proj.slug}_{pyver.slug}"
+ shell.run_command(f"{pyver.command} -m venv {venv_dir}")
+ python = Path.cwd() / f"{venv_dir}/bin/python"
+ shell.run_command(f"{python} -V")
+ env = Env(pyver, python, shell)
+
+ with change_dir(proj.dir):
+ print(f"Prepping for {proj.slug} {pyver.slug}")
+ proj.prep_environment(env)
+ for cov_ver in self.cov_versions:
+ all_runs.append((proj, pyver, cov_ver, env))
+
+ all_runs *= num_runs
+ random.shuffle(all_runs)
+
+ run_data: Dict[ResultKey, List[float]] = collections.defaultdict(list)
+
+ for proj, pyver, cov_ver, env in all_runs:
+ total_run_num = next(total_run_nums)
+ print(
+ "Running tests: "
+ + f"{proj.slug}, {pyver.slug}, cov={cov_ver.slug}, "
+ + f"{total_run_num} of {total_runs}"
+ )
+ with env.shell:
+ with change_dir(proj.dir):
+ if cov_ver.pip_args is None:
+ dur = proj.run_no_coverage(env)
+ else:
+ dur = proj.run_with_coverage(
+ env,
+ cov_ver.pip_args,
+ cov_ver.tweaks,
+ )
+ print(f"Tests took {dur:.3f}s")
+ result_key = (proj.slug, pyver.slug, cov_ver.slug)
+ run_data[result_key].append(dur)
+
+ # Summarize and collect the data.
+ print("# Results")
+ for proj in self.projects:
+ for pyver in self.py_versions:
+ for cov_ver in self.cov_versions:
+ result_key = (proj.slug, pyver.slug, cov_ver.slug)
+ med = statistics.median(run_data[result_key])
+ self.result_data[result_key] = med
+ print(
+ f"Median for {proj.slug}, {pyver.slug}, "
+ + f"cov={cov_ver.slug}: {med:.3f}s"
+ )
+
+ def show_results(
+ self,
+ rows: List[str],
+ column: str,
+ ratios: Iterable[Tuple[str, str, str]] = (),
+ ) -> None:
+ dimensions = {
+ "cov": [cov_ver.slug for cov_ver in self.cov_versions],
+ "pyver": [pyver.slug for pyver in self.py_versions],
+ "proj": [proj.slug for proj in self.projects],
+ }
+
+ table_axes = [dimensions[rowname] for rowname in rows]
+ data_order = [*rows, column]
+ remap = [data_order.index(datum) for datum in DIMENSION_NAMES]
+
+ WIDTH = 20
+
+ def as_table_row(vals):
+ return "| " + " | ".join(v.ljust(WIDTH) for v in vals) + " |"
+
+ header = []
+ header.extend(rows)
+ header.extend(dimensions[column])
+ header.extend(slug for slug, _, _ in ratios)
+
+ print()
+ print(as_table_row(header))
+ dashes = [":---"] * len(rows) + ["---:"] * (len(header) - len(rows))
+ print(as_table_row(dashes))
+ for tup in itertools.product(*table_axes):
+ row = []
+ row.extend(tup)
+ col_data = {}
+ for col in dimensions[column]:
+ key = (*tup, col)
+ key = tuple(key[i] for i in remap)
+ result_time = self.result_data[key] # type: ignore
+ row.append(f"{result_time:.3f} s")
+ col_data[col] = result_time
+ for _, num, denom in ratios:
+ ratio = col_data[num] / col_data[denom]
+ row.append(f"{ratio * 100:.2f}%")
+ print(as_table_row(row))
+
+
+PERF_DIR = Path("/tmp/covperf")
+
+
+def run_experiment(
+ py_versions: List[PyVersion],
+ cov_versions: List[Coverage],
+ projects: List[ProjectToTest],
+ rows: List[str],
+ column: str,
+ ratios: Iterable[Tuple[str, str, str]] = (),
+):
+ slugs = [v.slug for v in py_versions + cov_versions + projects]
+ if len(set(slugs)) != len(slugs):
+ raise Exception(f"Slugs must be unique: {slugs}")
+ if any(" " in slug for slug in slugs):
+ raise Exception(f"No spaces in slugs please: {slugs}")
+ ratio_slugs = [rslug for ratio in ratios for rslug in ratio[1:]]
+ if any(rslug not in slugs for rslug in ratio_slugs):
+ raise Exception(f"Ratio slug doesn't match a slug: {ratio_slugs}, {slugs}")
+ if set(rows + [column]) != set(DIMENSION_NAMES):
+ raise Exception(
+ f"All of these must be in rows or column: {', '.join(DIMENSION_NAMES)}"
+ )
+
+ print(f"Removing and re-making {PERF_DIR}")
+ rmrf(PERF_DIR)
+
+ with change_dir(PERF_DIR):
+ exp = Experiment(
+ py_versions=py_versions, cov_versions=cov_versions, projects=projects
+ )
+ exp.run(num_runs=int(sys.argv[1]))
+ exp.show_results(rows=rows, column=column, ratios=ratios)