diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 15cb1a4ff..e465ac1c7 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -5,6 +5,9 @@ permissions: {} push: branches: - main + pull_request: + branches: + - main workflow_dispatch: jobs: @@ -12,9 +15,6 @@ jobs: name: EVM Contract Benchmark runs-on: ubuntu-latest timeout-minutes: 30 - permissions: - contents: write - issues: write steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Set up Go @@ -29,30 +29,112 @@ jobs: run: | cd test/e2e && go test -tags evm -bench=. -benchmem -run='^$' \ -timeout=10m --evm-binary=../../build/evm | tee output.txt - - name: Store benchmark result + - name: Run Block Executor benchmarks + run: | + go test -bench=BenchmarkProduceBlock -benchmem -run='^$' \ + ./block/internal/executing/... > block_executor_output.txt + - name: Upload benchmark results + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + with: + name: evm-benchmark-results + path: | + test/e2e/output.txt + block_executor_output.txt + + spamoor-benchmark: + name: Spamoor Trace Benchmark + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Set up Go + uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v6.2.0 + with: + go-version-file: ./go.mod + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0 + - name: Build binaries + run: make build-evm build-da + - name: Run Spamoor smoke test + run: | + cd test/e2e && BENCH_JSON_OUTPUT=spamoor_bench.json go test -tags evm \ + -run='^TestSpamoorSmoke$' -v -timeout=15m --evm-binary=../../build/evm + - name: Upload benchmark results + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + with: + name: spamoor-benchmark-results + path: test/e2e/spamoor_bench.json + + # single job to push all results to gh-pages sequentially, avoiding race conditions + publish-benchmarks: + name: Publish Benchmark Results + needs: [evm-benchmark, spamoor-benchmark] + runs-on: ubuntu-latest + permissions: + contents: write + issues: write + pull-requests: write + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Download EVM benchmark results + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 + with: + name: evm-benchmark-results + - name: Download Spamoor benchmark results + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 + with: + name: spamoor-benchmark-results + path: test/e2e/ + + # only update the benchmark baseline on push/dispatch, not on PRs + - name: Store EVM Contract Roundtrip result + if: always() uses: benchmark-action/github-action-benchmark@4bdcce38c94cec68da58d012ac24b7b1155efe8b # v1.20.7 with: name: EVM Contract Roundtrip tool: 'go' output-file-path: test/e2e/output.txt - auto-push: true + auto-push: ${{ github.event_name != 'pull_request' }} + save-data-file: ${{ github.event_name != 'pull_request' }} github-token: ${{ secrets.GITHUB_TOKEN }} alert-threshold: '150%' fail-on-alert: true comment-on-alert: true - - name: Run Block Executor benchmarks - run: | - go test -bench=BenchmarkProduceBlock -benchmem -run='^$' \ - ./block/internal/executing/... > block_executor_output.txt - - name: Store Block Executor benchmark result + # delete local gh-pages so the next benchmark action step fetches fresh from remote + - name: Reset local gh-pages branch + if: always() + run: git branch -D gh-pages || true + + - name: Store Block Executor result + if: always() uses: benchmark-action/github-action-benchmark@4bdcce38c94cec68da58d012ac24b7b1155efe8b # v1.20.7 with: name: Block Executor Benchmark tool: 'go' output-file-path: block_executor_output.txt - auto-push: true + auto-push: ${{ github.event_name != 'pull_request' }} + save-data-file: ${{ github.event_name != 'pull_request' }} github-token: ${{ secrets.GITHUB_TOKEN }} alert-threshold: '150%' fail-on-alert: true comment-on-alert: true + + # delete local gh-pages so the next benchmark action step fetches fresh from remote + - name: Reset local gh-pages branch + if: always() + run: git branch -D gh-pages || true + + - name: Store Spamoor Trace result + if: always() + uses: benchmark-action/github-action-benchmark@4bdcce38c94cec68da58d012ac24b7b1155efe8b # v1.20.7 + with: + name: Spamoor Trace Benchmarks + tool: 'customSmallerIsBetter' + output-file-path: test/e2e/spamoor_bench.json + auto-push: ${{ github.event_name != 'pull_request' }} + save-data-file: ${{ github.event_name != 'pull_request' }} + github-token: ${{ secrets.GITHUB_TOKEN }} + alert-threshold: '150%' + fail-on-alert: false + comment-on-alert: true diff --git a/test/e2e/evm_spamoor_smoke_test.go b/test/e2e/evm_spamoor_smoke_test.go index ca172948a..d86f34307 100644 --- a/test/e2e/evm_spamoor_smoke_test.go +++ b/test/e2e/evm_spamoor_smoke_test.go @@ -6,6 +6,7 @@ import ( "context" "fmt" "net/http" + "os" "path/filepath" "testing" "time" @@ -164,6 +165,11 @@ func TestSpamoorSmoke(t *testing.T) { evRethSpans := extractSpansFromTraces(evRethTraces) printTraceReport(t, "ev-reth", toTraceSpans(evRethSpans)) + // write benchmark JSON for ev-node spans when output path is configured + if outputPath := os.Getenv("BENCH_JSON_OUTPUT"); outputPath != "" { + writeTraceBenchmarkJSON(t, "SpamoorSmoke", toTraceSpans(evNodeSpans), outputPath) + } + // assert expected ev-node span names are present. // these spans reliably appear during block production with transactions flowing. expectedSpans := []string{ diff --git a/test/e2e/evm_test_common.go b/test/e2e/evm_test_common.go index 5ddaf935b..3ea537a35 100644 --- a/test/e2e/evm_test_common.go +++ b/test/e2e/evm_test_common.go @@ -16,6 +16,7 @@ package e2e import ( "context" + "encoding/json" "flag" "fmt" "math/big" @@ -855,21 +856,17 @@ type traceSpan interface { SpanDuration() time.Duration } -// printTraceReport aggregates spans by operation name and prints a timing breakdown. -func printTraceReport(t testing.TB, label string, spans []traceSpan) { - t.Helper() - if len(spans) == 0 { - t.Logf("WARNING: no spans found for %s", label) - return - } +// spanStats holds aggregated timing statistics for a single span operation. +type spanStats struct { + count int + total time.Duration + min time.Duration + max time.Duration +} - type stats struct { - count int - total time.Duration - min time.Duration - max time.Duration - } - m := make(map[string]*stats) +// aggregateSpanStats groups spans by operation name and computes count, total, min, max. +func aggregateSpanStats(spans []traceSpan) map[string]*spanStats { + m := make(map[string]*spanStats) for _, span := range spans { d := span.SpanDuration() if d <= 0 { @@ -878,7 +875,7 @@ func printTraceReport(t testing.TB, label string, spans []traceSpan) { name := span.SpanName() s, ok := m[name] if !ok { - s = &stats{min: d, max: d} + s = &spanStats{min: d, max: d} m[name] = s } s.count++ @@ -890,6 +887,18 @@ func printTraceReport(t testing.TB, label string, spans []traceSpan) { s.max = d } } + return m +} + +// printTraceReport aggregates spans by operation name and prints a timing breakdown. +func printTraceReport(t testing.TB, label string, spans []traceSpan) { + t.Helper() + if len(spans) == 0 { + t.Logf("WARNING: no spans found for %s", label) + return + } + + m := aggregateSpanStats(spans) names := make([]string, 0, len(m)) for name := range m { @@ -924,3 +933,49 @@ func printTraceReport(t testing.TB, label string, spans []traceSpan) { t.Logf("%-40s %5.1f%% %s", name, pct, bar) } } + +// benchmarkEntry matches the customSmallerIsBetter format for github-action-benchmark. +type benchmarkEntry struct { + Name string `json:"name"` + Unit string `json:"unit"` + Value float64 `json:"value"` +} + +// writeTraceBenchmarkJSON aggregates spans and writes a customSmallerIsBetter JSON file. +// If outputPath is empty, the function is a no-op. +func writeTraceBenchmarkJSON(t testing.TB, label string, spans []traceSpan, outputPath string) { + t.Helper() + if outputPath == "" { + return + } + m := aggregateSpanStats(spans) + if len(m) == 0 { + t.Logf("WARNING: no span stats to write for %s", label) + return + } + + // sort by name for stable output + names := make([]string, 0, len(m)) + for name := range m { + names = append(names, name) + } + sort.Strings(names) + + var entries []benchmarkEntry + for _, name := range names { + s := m[name] + avg := float64(s.total.Microseconds()) / float64(s.count) + entries = append(entries, + benchmarkEntry{Name: fmt.Sprintf("%s - %s (avg)", label, name), Unit: "us", Value: avg}, + ) + } + + data, err := json.MarshalIndent(entries, "", " ") + if err != nil { + t.Fatalf("failed to marshal benchmark JSON: %v", err) + } + if err := os.WriteFile(outputPath, data, 0644); err != nil { + t.Fatalf("failed to write benchmark JSON to %s: %v", outputPath, err) + } + t.Logf("wrote %d benchmark entries to %s", len(entries), outputPath) +}