CodSpeedHQ · not-matthias · Jun 3, 2026 · Jun 3, 2026 · Jun 2, 2026 · greptile-apps
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -87,30 +87,3 @@ jobs:
         env:
           CODSPEED_SKIP_UPLOAD: true
           CODSPEED_DEBUG: true
-
-  walltime-macos-test:
-    runs-on: macos-latest
-    steps:
-      - uses: "actions/checkout@v4"
-        with:
-          fetch-depth: 0
-          submodules: true
-      - uses: pnpm/action-setup@v2
-      - uses: actions/setup-node@v3
-        with:
-          cache: pnpm
-          node-version-file: .nvmrc
-      - name: Restore turbo cache
-        uses: ./.github/actions/turbo-cache
-      - run: pnpm install --frozen-lockfile --prefer-offline
-      - run: pnpm turbo run build
-
-      - name: Run benchmarks
-        uses: CodSpeedHQ/action@main
-        env:
-          CODSPEED_SKIP_UPLOAD: "true"
-          # Samply fails to profile pnpm targets for now
-          CODSPEED_PROFILER_ENABLED: "false"
-        with:
-          run: pnpm turbo run bench --filter=@codspeed/vitest-plugin
-          mode: walltime
diff --git a/.github/workflows/codspeed.yml b/.github/workflows/codspeed.yml
@@ -91,6 +91,34 @@ jobs:
             pnpm --workspace-concurrency 1 -r bench-tinybench
             pnpm --workspace-concurrency 1 -r bench-vitest
 
+  codspeed-walltime-macos:
+    runs-on: macos-latest
+    steps:
+      - uses: "actions/checkout@v4"
+        with:
+          fetch-depth: 0
+          submodules: true
+      - uses: pnpm/action-setup@v2
+      - uses: actions/setup-node@v3
+        with:
+          cache: pnpm
+          node-version-file: .nvmrc
+      - name: Restore turbo cache
+        uses: ./.github/actions/turbo-cache
+      - run: pnpm install --frozen-lockfile --prefer-offline
+      - run: pnpm turbo run build
+
+      - name: Run macOS-only benchmarks
+        uses: CodSpeedHQ/action@main
+        with:
+          working-directory: packages/vitest-plugin
+          # Only run the macOS-only bench file: the rest of the suite already
+          # runs on the linux walltime job, and uploading the same benchmark
+          # twice for one commit is not supported.
+          run: pnpm turbo run bench --env-mode=loose --filter=@codspeed/vitest-plugin -- macos
+          mode: walltime
+          runner-version: latest
+
   electron-e2e:
     name: Run electron inbox e2e
     runs-on: codspeed-macro

diff --git a/packages/benchmark.js-plugin/src/index.ts b/packages/benchmark.js-plugin/src/index.ts
@@ -9,6 +9,8 @@ import {
   SetupInstrumentsResponse,
   teardownCore,
   tryIntrospect,
+  wrapWithRootFrame,
+  wrapWithRootFrameSync,
 } from "@codspeed/core";
 import Benchmark from "benchmark";
 import buildSuiteAdd from "./buildSuiteAdd";
@@ -195,7 +197,7 @@ async function runBenchmarks({
       await optimizeFunction(benchPayload);
       await mongoMeasurement.start(uri);
       global.gc?.();
-      await (async function __codspeed_root_frame__() {
+      await wrapWithRootFrame(async () => {
         InstrumentHooks.startBenchmark();
         await benchPayload();
         InstrumentHooks.stopBenchmark();
@@ -205,7 +207,7 @@ async function runBenchmarks({
     } else {
       optimizeFunctionSync(benchPayload);
       await mongoMeasurement.start(uri);
-      (function __codspeed_root_frame__() {
+      wrapWithRootFrameSync(() => {
         InstrumentHooks.startBenchmark();
         benchPayload();
         InstrumentHooks.stopBenchmark();

diff --git a/packages/benchmark.js-plugin/tests/index.integ.test.ts b/packages/benchmark.js-plugin/tests/index.integ.test.ts
@@ -11,6 +11,8 @@ jest.mock("@codspeed/core", () => {
   const actual = jest.requireActual("@codspeed/core");
   mockCore.getGitDir = actual.getGitDir;
   mockCore.getCallingFile = actual.getCallingFile;
+  mockCore.wrapWithRootFrame = actual.wrapWithRootFrame;
+  mockCore.wrapWithRootFrameSync = actual.wrapWithRootFrameSync;
   return mockCore;
 });
 

diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
@@ -77,6 +77,7 @@ export type {
 } from "./generated/openapi";
 export { getV8Flags, tryIntrospect } from "./introspection";
 export { optimizeFunction, optimizeFunctionSync } from "./optimization";
+export { wrapWithRootFrame, wrapWithRootFrameSync } from "./rootFrame";
 export * from "./utils";
 export * from "./walltime";
 export type { InstrumentMode };

diff --git a/packages/core/src/native_core/instruments/hooks b/packages/core/src/native_core/instruments/hooks
diff --git a/packages/core/src/rootFrame.ts b/packages/core/src/rootFrame.ts
@@ -0,0 +1,19 @@
+/**
+ * Wrap a benchmark function so it executes under a frame named
+ * `__codspeed_root_frame__`. CodSpeed uses this frame to locate the
+ * benchmark root in collected call stacks; samples without it cannot be
+ * attributed to a benchmark.
+ */
+export function wrapWithRootFrame<T>(
+  fn: () => T | Promise<T>,
+): () => Promise<T> {
+  return async function __codspeed_root_frame__() {
+    return await fn();
+  };
+}
+
+export function wrapWithRootFrameSync<T>(fn: () => T): () => T {
+  return function __codspeed_root_frame__() {
+    return fn();
+  };
+}
diff --git a/packages/tinybench-plugin/src/analysis.ts b/packages/tinybench-plugin/src/analysis.ts
@@ -3,6 +3,8 @@ import {
   InstrumentHooks,
   mongoMeasurement,
   optimizeFunction,
+  wrapWithRootFrame,
+  wrapWithRootFrameSync,
 } from "@codspeed/core";
 import { Bench, Fn, FnOptions, Task } from "tinybench";
 import { BaseBenchRunner } from "./shared";
@@ -25,18 +27,6 @@ class AnalysisBenchRunner extends BaseBenchRunner {
     return InstrumentHooks.isInstrumented() ? "Measured" : "Checked";
   }
 
-  private wrapFunctionWithFrame(fn: Fn, isAsync: boolean): Fn {
-    if (isAsync) {
-      return async function __codspeed_root_frame__() {
-        await fn();
-      };
-    } else {
-      return function __codspeed_root_frame__() {
-        fn();
-      };
-    }
-  }
-
   protected async runTaskAsync(task: Task, uri: string): Promise<void> {
     const { fnOpts, fn } = task as unknown as { fnOpts?: FnOptions; fn: Fn };
 
@@ -50,10 +40,7 @@ class AnalysisBenchRunner extends BaseBenchRunner {
     await mongoMeasurement.start(uri);
 
     global.gc?.();
-    await this.wrapWithInstrumentHooksAsync(
-      this.wrapFunctionWithFrame(fn, true),
-      uri,
-    );
+    await this.wrapWithInstrumentHooksAsync(wrapWithRootFrame(fn), uri);
 
     await mongoMeasurement.stop(uri);
     await fnOpts?.afterEach?.call(task, "run");
@@ -68,7 +55,7 @@ class AnalysisBenchRunner extends BaseBenchRunner {
     fnOpts?.beforeAll?.call(task, "run");
     fnOpts?.beforeEach?.call(task, "run");
 
-    this.wrapWithInstrumentHooks(this.wrapFunctionWithFrame(fn, false), uri);
+    this.wrapWithInstrumentHooks(wrapWithRootFrameSync(fn), uri);
 
     fnOpts?.afterEach?.call(task, "run");
     fnOpts?.afterAll?.call(task, "run");

diff --git a/packages/tinybench-plugin/src/shared.ts b/packages/tinybench-plugin/src/shared.ts
@@ -1,4 +1,11 @@
-import { InstrumentHooks, setupCore, teardownCore } from "@codspeed/core";
+import {
+  getInstrumentMode,
+  InstrumentHooks,
+  MARKER_TYPE_BENCHMARK_END,
+  MARKER_TYPE_BENCHMARK_START,
+  setupCore,
+  teardownCore,
+} from "@codspeed/core";
 import { Bench, Fn, Task } from "tinybench";
 import { getTaskUri } from "./uri";
 
@@ -40,21 +47,31 @@ export abstract class BaseBenchRunner {
 
   protected wrapWithInstrumentHooks<T>(fn: () => T, uri: string): T {
     InstrumentHooks.startBenchmark();
-    const result = fn();
-    InstrumentHooks.stopBenchmark();
-    InstrumentHooks.setExecutedBenchmark(process.pid, uri);
-    return result;
+    const runStart = InstrumentHooks.currentTimestamp();
+    try {
+      return fn();
+    } finally {
+      const runEnd = InstrumentHooks.currentTimestamp();
+      InstrumentHooks.stopBenchmark();
+      InstrumentHooks.setExecutedBenchmark(process.pid, uri);
+      this.sendBenchmarkMarkers(runStart, runEnd);
+    }
-    try {
-      return fn();
-    } finally {
-      const runEnd = InstrumentHooks.currentTimestamp();
-      InstrumentHooks.stopBenchmark();
-      InstrumentHooks.setExecutedBenchmark(process.pid, uri);
-      this.sendBenchmarkMarkers(runStart, runEnd);
-    }
+    try {
+      return fn();
+    } finally {
+      const runEnd = InstrumentHooks.currentTimestamp();
+      this.sendBenchmarkMarkers(runStart, runEnd);
+      InstrumentHooks.stopBenchmark();
+      InstrumentHooks.setExecutedBenchmark(process.pid, uri);
+    }
-    try {
-      return fn();
-    } finally {
-      const runEnd = InstrumentHooks.currentTimestamp();
-      InstrumentHooks.stopBenchmark();
-      InstrumentHooks.setExecutedBenchmark(process.pid, uri);
-      this.sendBenchmarkMarkers(runStart, runEnd);
-    }
+    try {
+      return fn();
+    } finally {
+      const runEnd = InstrumentHooks.currentTimestamp();
+      this.sendBenchmarkMarkers(runStart, runEnd);
+      InstrumentHooks.stopBenchmark();
+      InstrumentHooks.setExecutedBenchmark(process.pid, uri);
+    }
   }
 
   protected async wrapWithInstrumentHooksAsync(
     fn: Fn,
     uri: string,
   ): Promise<unknown> {
     InstrumentHooks.startBenchmark();
-    const result = await fn();
-    InstrumentHooks.stopBenchmark();
-    InstrumentHooks.setExecutedBenchmark(process.pid, uri);
-    return result;
+    const runStart = InstrumentHooks.currentTimestamp();
+    try {
+      return await fn();
+    } finally {
+      const runEnd = InstrumentHooks.currentTimestamp();
+      InstrumentHooks.stopBenchmark();
+      InstrumentHooks.setExecutedBenchmark(process.pid, uri);
+      this.sendBenchmarkMarkers(runStart, runEnd);
+    }
   }
 
   protected abstract getModeName(): string;
@@ -63,6 +80,23 @@ export abstract class BaseBenchRunner {
   protected abstract finalizeAsyncRun(): Task[];
   protected abstract finalizeSyncRun(): Task[];
 
+  // Benchmark markers bracket a single benchmark and must sit inside the sample
+  // window opened by startBenchmark(), so they are emitted per task before
+  // stopBenchmark() closes it. The runner consumes the FIFO stream in order:
+  // a marker sent after StopBenchmark falls outside the sample and breaks the
+  // expected SampleStart > BenchmarkStart > BenchmarkEnd > SampleEnd nesting.
+  private sendBenchmarkMarkers(runStart: bigint, runEnd: bigint): void {
+    if (getInstrumentMode() !== "walltime") {
+      return;
+    }
+    InstrumentHooks.addMarker(
+      process.pid,
+      MARKER_TYPE_BENCHMARK_START,
+      runStart,
+    );
+    InstrumentHooks.addMarker(process.pid, MARKER_TYPE_BENCHMARK_END, runEnd);
+  }
+
   public setupBenchMethods(): void {
     this.bench.run = async () => {
       this.setupBenchRun();

diff --git a/packages/tinybench-plugin/src/walltime.ts b/packages/tinybench-plugin/src/walltime.ts
@@ -3,6 +3,8 @@ import {
   mongoMeasurement,
   msToNs,
   msToS,
+  wrapWithRootFrame,
+  wrapWithRootFrameSync,
   writeWalltimeResults,
   type BenchmarkStats,
   type Benchmark as CodspeedBenchmark,
@@ -64,21 +66,10 @@ class WalltimeBenchRunner extends BaseBenchRunner {
 
   private wrapTaskFunction(task: Task, isAsync: boolean): void {
     const { fn } = task as unknown as { fn: Fn };
-    if (isAsync) {
-      // eslint-disable-next-line no-inner-declarations
-      async function __codspeed_root_frame__() {
-        await fn();
-      }
-      // eslint-disable-next-line @typescript-eslint/no-explicit-any
-      (task as any).fn = __codspeed_root_frame__;
-    } else {
-      // eslint-disable-next-line no-inner-declarations
-      function __codspeed_root_frame__() {
-        fn();
-      }
-      // eslint-disable-next-line @typescript-eslint/no-explicit-any
-      (task as any).fn = __codspeed_root_frame__;
-    }
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    (task as any).fn = isAsync
+      ? wrapWithRootFrame(fn)
+      : wrapWithRootFrameSync(fn);
   }
 
   private registerCodspeedBenchmarkFromTask(task: Task): void {

diff --git a/packages/tinybench-plugin/tests/index.integ.test.ts b/packages/tinybench-plugin/tests/index.integ.test.ts
@@ -16,6 +16,8 @@
       startBenchmark: vi.fn(),
       stopBenchmark: vi.fn(),
       setExecutedBenchmark: vi.fn(),
+      currentTimestamp: vi.fn().mockReturnValue(0n),
+      addMarker: vi.fn(),
     },
     optimizeFunction: vi
       .fn()
@@ -24,6 +26,7 @@
       }),
     setupCore: vi.fn(),
     teardownCore: vi.fn(),
+    writeWalltimeResults: vi.fn(),
   };
 });
 
@@ -205,6 +208,39 @@
     expect(afterAll).toHaveBeenCalledTimes(2);
   });
 
+  it("emits benchmark markers inside the sample window in walltime mode", async () => {
+    process.env.CODSPEED_RUNNER_MODE = "walltime";
+    mockCore.InstrumentHooks.isInstrumented.mockReturnValue(true);
+
+    let ts = 0n;
+    mockCore.InstrumentHooks.currentTimestamp.mockImplementation(() => ts++);
+
+    await withCodSpeed(
+      new Bench({ time: 0, iterations: 1, warmup: false }),
+    )
+      .add("RegExp", () => {
+        /o/.test("Hello World!");
+      })
+      .run();
+
+    const { startBenchmark, stopBenchmark, addMarker } =
+      mockCore.InstrumentHooks;
+
+    const startOrder = startBenchmark.mock.invocationCallOrder[0];
+    const stopOrder = stopBenchmark.mock.invocationCallOrder[0];
+    const markerOrders = addMarker.mock.invocationCallOrder;
+
+    // A BenchmarkStart/BenchmarkEnd pair must be emitted per benchmark...
+    expect(markerOrders).toHaveLength(2);
+    // ...and both must land between startBenchmark (SampleStart) and
+    // stopBenchmark (SampleEnd), otherwise the runner cannot bracket the
+    // perf samples and flame graph generation fails.
+    for (const order of markerOrders) {
+      expect(order).toBeGreaterThan(startOrder);
+      expect(order).toBeLessThan(stopOrder);
+    }
+  });
+
   it("should call setupCore and teardownCore only once", async () => {
     mockCore.InstrumentHooks.isInstrumented.mockReturnValue(true);
     const bench = withCodSpeed(new Bench())

diff --git a/packages/vitest-plugin/benches/macos.bench.ts b/packages/vitest-plugin/benches/macos.bench.ts
@@ -0,0 +1,16 @@
+import { bench, describe } from "vitest";
+
+const isMacOS = process.platform === "darwin";
+
+function fibo(n: number): number {
+  if (n < 2) return 1;
+  return fibo(n - 1) + fibo(n - 2);
+}
+
+// macOS-only benchmark: skipped on every other platform, so it only runs on
+// the `codspeed-walltime-macos` CI job (see .github/workflows/codspeed.yml).
+describe.skipIf(!isMacOS)("macos only", () => {
+  bench("fibo darwin", () => {
+    fibo(30);
+  });
+});
diff --git a/packages/vitest-plugin/src/analysis.ts b/packages/vitest-plugin/src/analysis.ts
@@ -5,6 +5,7 @@ import {
   optimizeFunction,
   setupCore,
   teardownCore,
+  wrapWithRootFrame,
 } from "@codspeed/core";
 import { Benchmark, type RunnerTestSuite } from "vitest";
 import { NodeBenchmarkRunner } from "vitest/runners";
@@ -47,7 +48,7 @@ async function runAnalysisBench(
   await callSuiteHook(suite, benchmark, "beforeEach");
   await mongoMeasurement.start(uri);
   global.gc?.();
-  await (async function __codspeed_root_frame__() {
+  await wrapWithRootFrame(async () => {
     InstrumentHooks.startBenchmark();
     // @ts-expect-error we do not need to bind the function to an instance of tinybench's Bench
     await fn();
+16 −0		.github/workflows/ci.yml
+11,566 −11,193		dist/core.c
+37 −25		includes/callgrind.h
+5 −0		includes/core.h
+5 −0		scripts/stub.c
+24 −0		src/c.zig
+1 −0		src/helpers/valgrind.zig
+6 −0		src/helpers/valgrind_wrapper.c