From d2dd609c2b5004382acd0bcbee3ac27452e0894d Mon Sep 17 00:00:00 2001
From: not-matthias <matthias@codspeed.io>
Date: Tue, 2 Jun 2026 11:38:35 +0200
Subject: [PATCH] refactor(bench): migrate benchmarks to codspeed exec CLI

Replace the pytest-codspeed harness (bench.py) with a small generator
(generate_config.py) that emits a codspeed.yml of exec-harness targets,
run via `codspeed run`. Each target's name is kept byte-identical to the
old pytest test id (test_valgrind[<version>, <cmd>, <config>]) so the
dashboard names are unchanged.

The exec-harness reports the URI as `exec_harness::<name>` (prefix is
hardcoded in the runner) instead of `bench.py::<name>`; the platform keys
history on the full URI, so this starts a fresh history series with the
same display names.

CI now matrixes over the Valgrind version only (one job per version) and
generates that version's config from `valgrind --version`. Per-version
jobs preserve failure isolation: the exec-harness aborts the whole batch
on a single non-zero command, so upstream versions stay separate and keep
continue-on-error.
---
 .github/workflows/codspeed.yml |  23 ++--
 .gitignore                     |   4 +
 bench/bench.py                 | 198 ---------------------------------
 bench/generate_config.py       | 141 +++++++++++++++++++++++
 bench/pytest.ini               |   2 -
 5 files changed, 157 insertions(+), 211 deletions(-)
 delete mode 100755 bench/bench.py
 create mode 100755 bench/generate_config.py
 delete mode 100644 bench/pytest.ini
diff --git a/.github/workflows/codspeed.yml b/.github/workflows/codspeed.yml
index 928a8e613..48907fc02 100644
--- a/.github/workflows/codspeed.yml
+++ b/.github/workflows/codspeed.yml
@@ -10,16 +10,10 @@ on:
 jobs:
   benchmarks:
     runs-on: codspeed-macro
-    timeout-minutes: 20
+    timeout-minutes: 30
     strategy:
+      fail-fast: false
       matrix:
-        # IMPORTANT: The binary has to match the architecture of the runner!
-        cmd:
-          - testdata/take_strings-aarch64 varbinview_non_null
-          - echo Hello, World!
-          - python3 testdata/test.py
-          - stress-ng --cpu 1 --cpu-ops 10
-          - stress-ng --cpu 4 --cpu-ops 10
         valgrind:
           - "3.26.0"
           - "3.25.1"
@@ -89,10 +83,17 @@ jobs:
       - name: Install uv
         uses: astral-sh/setup-uv@v5
 
+      # Generate the codspeed.yml for this Valgrind version. The script derives
+      # the version label from `valgrind --version`, so each matrix job emits its
+      # own config (e.g. valgrind.codspeed / valgrind-3.26.0 / valgrind-3.25.1).
+      - name: Generate CodSpeed config
+        working-directory: bench
+        run: ./generate_config.py --valgrind /usr/local/bin/valgrind --output codspeed.yml
+
       - name: Run the benchmarks
         uses: CodSpeedHQ/action@main
-        continue-on-error: ${{ matrix.valgrind != 'local' }}
+        env:
+          CODSPEED_WALLTIME_PROFILER: samply
         with:
-          working-directory: bench
           mode: walltime
-          run: ./bench.py --cmd "${{ matrix.cmd }}" --valgrind-path /usr/local/bin/valgrind
+          config: bench/codspeed.yml
diff --git a/.gitignore b/.gitignore
index ea71bb0aa..4144ddc92 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2550,3 +2550,7 @@ none/tests/freebsd/bug499212
 *.vgtest*.log
 /test-suite-overall.log
 test-suite.log
+
+# /bench (generated benchmark config)
+/bench/codspeed.yml
+/bench/codspeed.yaml
diff --git a/bench/bench.py b/bench/bench.py
deleted file mode 100755
index 6f525d435..000000000
--- a/bench/bench.py
+++ /dev/null
@@ -1,198 +0,0 @@
-#!/usr/bin/env -S uv run --script
-# /// script
-# requires-python = ">=3.9"
-# dependencies = [
-#     "pytest>=8.4.2",
-#     "pytest-codspeed>=4.2.0",
-# ]
-# ///
-
-import argparse
-import shlex
-import subprocess
-
-import pytest
-
-
-class ValgrindRunner:
-    """Run Valgrind with different configurations."""
-
-    def __init__(
-        self,
-        cmd: str,
-        valgrind_path: str = "valgrind",
-    ):
-        """Initialize valgrind runner.
-
-        Args:
-            cmd: Command to profile (can be a path or arbitrary shell command)
-            valgrind_path: Path to valgrind executable
-        """
-        self.cmd = cmd
-        self.valgrind_path = valgrind_path
-
-        # Verify valgrind is available
-        result = subprocess.run(
-            [self.valgrind_path, "--version"],
-            capture_output=True,
-            text=True,
-        )
-        if result.returncode != 0:
-            raise RuntimeError(f"Valgrind not found at: {self.valgrind_path}")
-        self.valgrind_version = result.stdout.strip()
-
-    def run_valgrind(self, *args: str) -> None:
-        """Execute valgrind with given arguments.
-
-        Args:
-            *args: Valgrind arguments
-        """
-
-        cmd = [
-            self.valgrind_path,
-            "--tool=callgrind",
-            "--log-file=/dev/null",
-            *args,
-            *shlex.split(self.cmd),
-        ]
-
-        result = subprocess.run(
-            cmd,
-            capture_output=True,
-            text=True,
-        )
-        if result.returncode != 0:
-            raise RuntimeError(
-                f"Valgrind execution failed with code {result.returncode}\n"
-                f"Stdout:\n{result.stdout}\n"
-                f"Stderr:\n{result.stderr}"
-            )
-
-
-@pytest.fixture
-def runner(request):
-    """Fixture to provide runner instance to tests."""
-    return request.config._valgrind_runner
-
-
-def pytest_generate_tests(metafunc):
-    """Parametrize tests with valgrind configurations."""
-    if "valgrind_args" in metafunc.fixturenames:
-        runner = getattr(metafunc.config, "_valgrind_runner", None)
-        if not runner:
-            return
-
-        # Define valgrind configurations
-        configs = [
-            (["--read-inline-info=no"], "no-inline"),
-            (["--read-inline-info=yes"], "inline"),
-            (
-                [
-                    "--trace-children=yes",
-                    "--cache-sim=yes",
-                    "--I1=32768,8,64",
-                    "--D1=32768,8,64",
-                    "--LL=8388608,16,64",
-                    "--collect-systime=nsec",
-                    "--compress-strings=no",
-                    "--combine-dumps=yes",
-                    "--dump-line=no",
-                    "--read-inline-info=yes",
-                ],
-                "full-with-inline",
-            ),
-            (
-                [
-                    "--trace-children=yes",
-                    "--cache-sim=yes",
-                    "--I1=32768,8,64",
-                    "--D1=32768,8,64",
-                    "--LL=8388608,16,64",
-                    "--collect-systime=nsec",
-                    "--compress-strings=no",
-                    "--combine-dumps=yes",
-                    "--dump-line=no",
-                ],
-                "full-no-inline",
-            ),
-        ]
-
-        # If the valgrind version is from CodSpeed, we don't want to display the exact version
-        # to allow comparison against older versions. 
-        if "codspeed" in runner.valgrind_version:
-            runner.valgrind_version = "valgrind.codspeed"
-
-        # Create test IDs with format: valgrind-version, command, config-name
-        test_ids = [
-            f"{runner.valgrind_version}, {runner.cmd}, {config_name}"
-            for _, config_name in configs
-        ]
-
-        # Parametrize with just the args
-        metafunc.parametrize(
-            "valgrind_args",
-            [args for args, _ in configs],
-            ids=test_ids,
-        )
-
-
-@pytest.mark.benchmark
-def test_valgrind(runner, valgrind_args):
-    if runner:
-        runner.run_valgrind(*valgrind_args)
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description="Benchmark Valgrind with pytest-codspeed",
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-        epilog="""
-Examples:
-  # Run with a binary path
-  uv run bench.py --cmd /path/to/binary
-
-  # Run with an arbitrary command
-  uv run bench.py --cmd 'echo "hello world"'
-
-  # Run with custom valgrind installation
-  uv run bench.py --cmd /usr/bin/ls --valgrind-path /usr/local/bin/valgrind
-        """,
-    )
-
-    parser.add_argument(
-        "--cmd",
-        type=str,
-        required=True,
-        help="Command to profile (can be a path to a binary or any arbitrary command)",
-    )
-    parser.add_argument(
-        "--valgrind-path",
-        type=str,
-        default="valgrind",
-        help="Path to valgrind executable (default: valgrind)",
-    )
-    args = parser.parse_args()
-
-    # Create runner instance
-    runner = ValgrindRunner(
-        cmd=args.cmd,
-        valgrind_path=args.valgrind_path,
-    )
-    print(f"Valgrind version: {runner.valgrind_version}")
-    print(f"Command: {args.cmd}")
-
-    # Plugin to pass runner to tests
-    class RunnerPlugin:
-        def pytest_configure(self, config):
-            config._valgrind_runner = runner
-
-    exit_code = pytest.main(
-        [__file__, "-v", "--codspeed", "--codspeed-warmup-time=0", "--codspeed-max-time=5"],
-        plugins=[RunnerPlugin()],
-    )
-    if exit_code != 0 and exit_code != 5:
-        print(f"Benchmark execution returned exit code: {exit_code}")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/bench/generate_config.py b/bench/generate_config.py
new file mode 100755
index 000000000..761386c0b
--- /dev/null
+++ b/bench/generate_config.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env -S uv run --script
+# /// script
+# requires-python = ">=3.9"
+# dependencies = []
+# ///
+"""Generate a `codspeed.yml` that benchmarks Valgrind via `codspeed exec`.
+
+Usage:
+  ./generate_config.py --valgrind /usr/local/bin/valgrind --output codspeed.yml
+
+Then run the benchmarks with:
+  codspeed run --config codspeed.yml --mode walltime
+"""
+
+import argparse
+import json
+import subprocess
+import sys
+
+# Commands to profile.
+COMMANDS = [
+    "testdata/take_strings-aarch64 varbinview_non_null",
+    "echo Hello, World!",
+    "python3 testdata/test.py",
+    "stress-ng --cpu 1 --cpu-ops 10",
+    "stress-ng --cpu 4 --cpu-ops 10",
+]
+
+# Callgrind configurations: (extra args, config name). The config name is the
+# last segment of the benchmark id, e.g. `test_valgrind[<version>, <cmd>, no-inline]`.
+CONFIGS = [
+    (["--read-inline-info=no"], "no-inline"),
+    (["--read-inline-info=yes"], "inline"),
+    (
+        [
+            "--trace-children=yes",
+            "--cache-sim=yes",
+            "--I1=32768,8,64",
+            "--D1=32768,8,64",
+            "--LL=8388608,16,64",
+            "--collect-systime=nsec",
+            "--compress-strings=no",
+            "--combine-dumps=yes",
+            "--dump-line=no",
+            "--read-inline-info=yes",
+        ],
+        "full-with-inline",
+    ),
+    (
+        [
+            "--trace-children=yes",
+            "--cache-sim=yes",
+            "--I1=32768,8,64",
+            "--D1=32768,8,64",
+            "--LL=8388608,16,64",
+            "--collect-systime=nsec",
+            "--compress-strings=no",
+            "--combine-dumps=yes",
+            "--dump-line=no",
+        ],
+        "full-no-inline",
+    ),
+]
+
+
+def valgrind_version(valgrind_path: str) -> str:
+    """Return the normalized version label used in benchmark ids.
+
+    CodSpeed builds are collapsed to a single `valgrind.codspeed` label so they
+    can be compared against each other across iterations; upstream builds keep
+    their reported version string (e.g. `valgrind-3.26.0`).
+    """
+    result = subprocess.run(
+        [valgrind_path, "--version"],
+        capture_output=True,
+        text=True,
+    )
+    if result.returncode != 0:
+        raise RuntimeError(f"Valgrind not found or failed at: {valgrind_path}")
+
+    version = result.stdout.strip()
+    if "codspeed" in version:
+        return "valgrind.codspeed"
+    return version
+
+
+def build_config(valgrind_paths: list) -> dict:
+    """Build the codspeed.yml document for all valgrind builds and commands."""
+    benchmarks = []
+    for valgrind_path in valgrind_paths:
+        version = valgrind_version(valgrind_path)
+        for cmd in COMMANDS:
+            for args, config_name in CONFIGS:
+                name = f"test_valgrind[{version}, {cmd}, {config_name}]"
+                exec_cmd = " ".join(
+                    [valgrind_path, "--tool=callgrind", "--log-file=/dev/null", *args, cmd]
+                )
+                benchmarks.append({"name": name, "exec": exec_cmd})
+
+    return {
+        "benchmarks": benchmarks,
+    }
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate a codspeed.yml that benchmarks Valgrind via codspeed exec",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument(
+        "--valgrind",
+        dest="valgrinds",
+        action="append",
+        required=True,
+        metavar="PATH",
+        help="Path to a valgrind executable (repeat for each build to benchmark). "
+        "The version label is derived from `<path> --version`.",
+    )
+    parser.add_argument(
+        "--output",
+        type=str,
+        default="codspeed.yml",
+        help="Path to write the generated config (default: codspeed.yml)",
+    )
+    args = parser.parse_args()
+
+    config = build_config(args.valgrinds)
+
+    with open(args.output, "w") as f:
+        json.dump(config, f, indent=2)
+        f.write("\n")
+
+    print(
+        f"Wrote {args.output} with {len(config['benchmarks'])} benchmarks "
+        f"({len(args.valgrinds)} valgrind builds x {len(COMMANDS)} commands x {len(CONFIGS)} configs)",
+        file=sys.stderr,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/bench/pytest.ini b/bench/pytest.ini
deleted file mode 100644
index 1afcedaf6..000000000
--- a/bench/pytest.ini
+++ /dev/null
@@ -1,2 +0,0 @@
-[pytest]
-norecursedirs = testdata __pycache__ .pytest_cache *.egg-info