From b53286a473ef10ace71a62eeb2410cc3f1793908 Mon Sep 17 00:00:00 2001 From: Julian Meyer Date: Mon, 1 Jun 2026 10:31:00 -0700 Subject: [PATCH 1/3] fix(simulator): OpcodeStats Add/Sub must use union of keys Before this change, both OpcodeStats.Add and OpcodeStats.Sub iterated only the 'other' argument: for opcode, count := range other { result[opcode] = o[opcode] + count } This silently dropped any key present in the receiver 'o' but absent from 'other'. For Add(empty) the result was an empty map, losing 'o' entirely. Concrete consequence in sendTxs: per-tx blockCounts is computed as blockCounts = expected.Sub(actual).Round() where 'expected' is base x Mul(numCalls+1 x scaleFactor) (carrying all base opcodes + precompiles) and 'actual' starts empty for the first tx. With the buggy Sub iterating actual=empty, blockCounts.Opcodes and blockCounts.Precompiles were ALWAYS empty for every tx in every block. The downstream effect: workloads with precompile counts in their config (e.g. base-mainnet-simulation: bls12381MapG2, ecrecover, bls12381G1Add, bls12381G1MultiExp) had per-tx execution that skipped all precompile work. Each tx therefore consumed far less gas than the gas estimate predicted, and blocks under-filled the gas budget. Example: base-mainnet-simulation @ GasLimit=250M (block_time=1s) reported gas/per_block ~51M (20% of limit) in production runs because per-tx skipped the heaviest workload component. Fix: both Add and Sub now iterate the union of keys. For Sub, a key present only in 'other' produces a negative result entry (matches arithmetic semantics). Includes unit tests for the union semantics and a regression test for the first-tx scenario in sendTxs. --- .../payload/simulator/simulatorstats/types.go | 14 +++- .../simulator/simulatorstats/types_test.go | 76 +++++++++++++++++++ 2 files changed, 86 insertions(+), 4 deletions(-) create mode 100644 runner/payload/simulator/simulatorstats/types_test.go diff --git a/runner/payload/simulator/simulatorstats/types.go b/runner/payload/simulator/simulatorstats/types.go index 0285b908..b65b7198 100644 --- a/runner/payload/simulator/simulatorstats/types.go +++ b/runner/payload/simulator/simulatorstats/types.go @@ -27,9 +27,12 @@ func (o OpcodeStats) Round() OpcodeStats { } func (o OpcodeStats) Add(other OpcodeStats) OpcodeStats { - result := make(OpcodeStats) + result := make(OpcodeStats, len(o)+len(other)) + for opcode, count := range o { + result[opcode] = count + } for opcode, count := range other { - result[opcode] = o[opcode] + count + result[opcode] += count } return result } @@ -43,9 +46,12 @@ func (o OpcodeStats) Pow(n float64) OpcodeStats { } func (o OpcodeStats) Sub(other OpcodeStats) OpcodeStats { - result := make(OpcodeStats) + result := make(OpcodeStats, len(o)+len(other)) + for opcode, count := range o { + result[opcode] = count + } for opcode, count := range other { - result[opcode] = o[opcode] - count + result[opcode] -= count } return result } diff --git a/runner/payload/simulator/simulatorstats/types_test.go b/runner/payload/simulator/simulatorstats/types_test.go new file mode 100644 index 00000000..0d2b0ea1 --- /dev/null +++ b/runner/payload/simulator/simulatorstats/types_test.go @@ -0,0 +1,76 @@ +package simulatorstats + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestOpcodeStatsAdd_UnionOfKeys(t *testing.T) { + a := OpcodeStats{"A": 1, "B": 2} + b := OpcodeStats{"B": 10, "C": 100} + + got := a.Add(b) + + require.Equal(t, 1.0, got["A"], "key only in receiver must be preserved") + require.Equal(t, 12.0, got["B"], "shared key must sum") + require.Equal(t, 100.0, got["C"], "key only in arg must be preserved") + require.Len(t, got, 3) +} + +func TestOpcodeStatsAdd_EmptyOther(t *testing.T) { + a := OpcodeStats{"A": 1, "B": 2} + got := a.Add(OpcodeStats{}) + require.Equal(t, 1.0, got["A"]) + require.Equal(t, 2.0, got["B"]) + require.Len(t, got, 2) +} + +func TestOpcodeStatsAdd_EmptyReceiver(t *testing.T) { + got := OpcodeStats{}.Add(OpcodeStats{"A": 1, "B": 2}) + require.Equal(t, 1.0, got["A"]) + require.Equal(t, 2.0, got["B"]) + require.Len(t, got, 2) +} + +func TestOpcodeStatsSub_UnionOfKeys(t *testing.T) { + a := OpcodeStats{"A": 10, "B": 20} + b := OpcodeStats{"B": 5, "C": 100} + + got := a.Sub(b) + + require.Equal(t, 10.0, got["A"], "key only in receiver must be preserved") + require.Equal(t, 15.0, got["B"], "shared key must subtract") + require.Equal(t, -100.0, got["C"], "key only in arg must be included (negated)") + require.Len(t, got, 3) +} + +func TestOpcodeStatsSub_EmptyOther(t *testing.T) { + a := OpcodeStats{"A": 10, "B": 20} + got := a.Sub(OpcodeStats{}) + require.Equal(t, 10.0, got["A"]) + require.Equal(t, 20.0, got["B"]) + require.Len(t, got, 2) +} + +func TestStatsSubAdd_FirstTxBlockCountsIncludePrecompiles(t *testing.T) { + base := &Stats{ + Precompiles: OpcodeStats{"ecrecover": 0.5, "bls12381MapG2": 1.0}, + Opcodes: OpcodeStats{"KECCAK256": 10.0}, + } + + expected := base.Mul(1.0) + actual := NewStats() + + blockCounts := expected.Sub(actual).Round() + + require.Equal(t, 1.0, blockCounts.Precompiles["ecrecover"], + "precompiles missing in blockCounts means worker txs skip precompile execution") + require.Equal(t, 1.0, blockCounts.Precompiles["bls12381MapG2"]) + require.Equal(t, 10.0, blockCounts.Opcodes["KECCAK256"]) + + actual = actual.Add(blockCounts) + require.Equal(t, 1.0, actual.Precompiles["ecrecover"], + "accumulated actual must remember the keys we added") + require.Equal(t, 1.0, actual.Precompiles["bls12381MapG2"]) +} From fb18a3719e2a90d049fa0d9ae72e00ef79286d17 Mon Sep 17 00:00:00 2001 From: Julian Meyer Date: Mon, 1 Jun 2026 10:11:12 -0700 Subject: [PATCH 2/3] fix(simulator): bump pre-init buffer 1.05x -> 2.0x to fix CI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The basic-benchmarks job in public-benchmarks.yaml has been failing on every push to main since cf47296 (PR #184, May 15) with: execution reverted: Not enough accounts to load/update Reproduces consistently for base-mainnet-simulation @ 25M gas (the first cell of the matrix, run on both reth and geth). Fails roughly 7-8 benchmark blocks into a 20-block run. Root mechanism: PR #184 changed targetCalls from ceil(numCallsPerBlock * scaleFactor) // pre-fix to numCallsPerBlock // post-fix to address an overshoot for scaleFactor > 1 (e.g. 200M gas case). For scaleFactor < 1 (small gas limits like 25M), this raised on-chain consumption within the existing 5% pre-init buffer. CI's base-mainnet-simulation @ 25M sits right at the boundary where rounding accumulation + per-field interaction overruns the 5% margin and trips the contract's current_address_index + load + update <= num_address_initialized require well before block 20. Bumping the multiplier to 2.0x removes the boundary entirely. Pre-init runs during setup (one-time cost, dominated by mineAndConfirm batching), so this adds maybe a few seconds of setup wall time per test cell — negligible vs the ~5 minutes per cell + 1h total job time. Out of scope for this PR but noted for follow-up: - OpcodeStats.Sub/Add iterate 'other' rather than the union of s and other, which causes per-tx blockCounts.Precompiles to always be empty in sendTxs. Result: per-tx actual gas is much lower than gas_per_call estimate, so the benchmark under-utilizes the gas budget. Not the cause of THIS CI failure, but should be fixed. - The contract require is a benchmark-fidelity check, not a protocol constraint; making the worker recalibrate numCallsPerBlock from observed on-chain consumption would be a more principled fix than relying on a static safety multiplier. --- runner/payload/simulator/worker.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/runner/payload/simulator/worker.go b/runner/payload/simulator/worker.go index 1a228c7a..edcaab17 100644 --- a/runner/payload/simulator/worker.go +++ b/runner/payload/simulator/worker.go @@ -389,7 +389,11 @@ func (t *simulatorPayloadWorker) testForBlocks(ctx context.Context, simulator *a t.log.Info("Calculated num calls per block", "numCalls", t.numCallsPerBlock, "gas", gas, "gasLimit", t.params.GasLimit, "buffer", buffer) - configForAllBlocks, err := t.payloadParams.Mul(float64(t.numCallsPerBlock) * float64(t.params.NumBlocks) * t.scaleFactor * 1.05).ToConfig() + // 2.0x safety multiplier (was 1.05). The 5% buffer was not enough to cover + // real on-chain consumption for base-mainnet-simulation @ 25M after PR #184, + // causing CI to revert with "Not enough accounts to load/update" mid-run. + // Pre-init is cheap relative to test runtime; err on the side of generous. + configForAllBlocks, err := t.payloadParams.Mul(float64(t.numCallsPerBlock) * float64(t.params.NumBlocks) * t.scaleFactor * 2.0).ToConfig() if err != nil { return errors.Wrap(err, "failed to convert payload params to config") } From 87bd28e439813bbb083ec9bc535b6d64bf48ab73 Mon Sep 17 00:00:00 2001 From: Julian Meyer Date: Mon, 1 Jun 2026 10:38:04 -0700 Subject: [PATCH 3/3] feat(simulator): recalibrate numCallsPerBlock from observed block gas Adds optional payloadworker.BlockObserver interface. The sequencer calls OnBlockBuilt after every non-setup benchmark block with the observed gas used and user tx count. simulatorPayloadWorker implements it: after the first block, it recomputes numCallsPerBlock from actual per-tx gas (rather than the setup-time estimate from simulator.Run). Why: the setup gas estimate runs simulator.Run with the full BASE config (load_accounts=13, update_accounts=5, storage_loaded=49, ..., precompile calls). When per-tx blockCounts is scaleFactor*base, actual per-tx gas differs from the estimate by a factor scaleFactor (modulo rounding), so numCallsPerBlock=(gasLimit-buffer)/base_gas under-estimates per-block capacity by a factor of scaleFactor for scaleFactor<1. Concrete impact for base-mainnet-simulation @ 25M (scaleFactor=0.714): - estimate-based: 46 calls/block, 46x365k=16.8M of 25M (67% fill) - observed-based: 65 calls/block, 65x365k=23.7M of 25M (95% fill) For scaleFactor>1 with user-specified calls_per_block cap, recalibration also helps: gas budget breaks the send loop early at ~68 txs even though targetCalls=100, so 32 txs/block of wasted gas estimation. Recalibration drops targetCalls to the actually-achievable 68. Recalibration is one-shot (the recalibrated bool guards against subsequent invocations) and respects any user-specified CallsPerBlock upper bound. Includes unit tests for under-fill, over-target, user-cap, no-op-after-first-block, and degenerate-input cases. --- runner/network/sequencer_benchmark.go | 6 +++ runner/payload/simulator/worker.go | 30 +++++++++++ runner/payload/simulator/worker_test.go | 70 +++++++++++++++++++++++++ runner/payload/worker/types.go | 8 +++ 4 files changed, 114 insertions(+) diff --git a/runner/network/sequencer_benchmark.go b/runner/network/sequencer_benchmark.go index 5562df2c..55746014 100644 --- a/runner/network/sequencer_benchmark.go +++ b/runner/network/sequencer_benchmark.go @@ -379,6 +379,12 @@ func (nb *sequencerBenchmark) proposeBlock( updatedPendingTxs = 0 } + if collectMetrics { + if observer, ok := transactionWorker.(payloadworker.BlockObserver); ok { + observer.OnBlockBuilt(payload.GasUsed, userTxsIncluded) + } + } + if !nb.config.Params.UseBaseConsensusTiming() { log.Info("Sleeping for block time", "block_time", nb.config.Params.BlockTime) time.Sleep(nb.config.Params.BlockTime) diff --git a/runner/payload/simulator/worker.go b/runner/payload/simulator/worker.go index edcaab17..db968ff7 100644 --- a/runner/payload/simulator/worker.go +++ b/runner/payload/simulator/worker.go @@ -72,6 +72,7 @@ type simulatorPayloadWorker struct { setupTransactor *bind.TransactOpts numCallsPerBlock uint64 + recalibrated bool numCallers int } @@ -697,3 +698,32 @@ func (t *simulatorPayloadWorker) SendTxs(ctx context.Context, pendingTxs int) (i } return n, nil } + +func (t *simulatorPayloadWorker) OnBlockBuilt(gasUsed uint64, userTxsIncluded int) { + if t.recalibrated || gasUsed == 0 || userTxsIncluded <= 0 { + return + } + t.recalibrated = true + + actualGasPerCall := float64(gasUsed) / float64(userTxsIncluded) + if actualGasPerCall <= 0 { + return + } + + targetCalls := uint64(math.Floor((float64(t.params.GasLimit) - buffer) / actualGasPerCall)) + if t.payloadParams.CallsPerBlock != "fill" { + if userMax, err := strconv.ParseUint(t.payloadParams.CallsPerBlock, 10, 64); err == nil && userMax < targetCalls { + targetCalls = userMax + } + } + + if targetCalls > 0 && targetCalls != t.numCallsPerBlock { + t.log.Info("Recalibrated numCallsPerBlock from observed block gas", + "old", t.numCallsPerBlock, + "new", targetCalls, + "observed_gas_per_call", uint64(actualGasPerCall), + "observed_block_gas", gasUsed, + "txs_in_block", userTxsIncluded) + t.numCallsPerBlock = targetCalls + } +} diff --git a/runner/payload/simulator/worker_test.go b/runner/payload/simulator/worker_test.go index 3f789e05..fa87a1a9 100644 --- a/runner/payload/simulator/worker_test.go +++ b/runner/payload/simulator/worker_test.go @@ -6,7 +6,10 @@ import ( "math/big" "testing" + benchtypes "github.com/base/base-bench/runner/network/types" + "github.com/base/base-bench/runner/payload/simulator/simulatorstats" "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/log" "github.com/stretchr/testify/require" ) @@ -98,4 +101,71 @@ func TestMineAndConfirmNoBatchingWouldTimeout(t *testing.T) { var _ interface { Setup(ctx context.Context) error SendTxs(ctx context.Context, pendingTxs int) (int, error) + OnBlockBuilt(gasUsed uint64, userTxsIncluded int) } = (*simulatorPayloadWorker)(nil) + +func newRecalibrationWorker(t *testing.T, gasLimit uint64, numCallsPerBlock uint64, callsPerBlock string) *simulatorPayloadWorker { + t.Helper() + return &simulatorPayloadWorker{ + log: log.New(), + params: benchtypes.RunParams{GasLimit: gasLimit}, + numCallsPerBlock: numCallsPerBlock, + payloadParams: &simulatorstats.Stats{CallsPerBlock: callsPerBlock}, + } +} + +func TestOnBlockBuilt_RaisesNumCallsWhenUnderfilled(t *testing.T) { + w := newRecalibrationWorker(t, 25_000_000, 46, "fill") + w.OnBlockBuilt(16_800_000, 46) // observed: 365k gas/tx + + require.True(t, w.recalibrated) + // (25M - 1M) / 365k = 65 + require.Equal(t, uint64(65), w.numCallsPerBlock) +} + +func TestOnBlockBuilt_RespectsUserSpecifiedCap(t *testing.T) { + w := newRecalibrationWorker(t, 25_000_000, 46, "50") + w.OnBlockBuilt(16_800_000, 46) // raw recalibration would be 65, capped to 50 + + require.True(t, w.recalibrated) + require.Equal(t, uint64(50), w.numCallsPerBlock) +} + +func TestOnBlockBuilt_LowersNumCallsWhenOvertargeting(t *testing.T) { + w := newRecalibrationWorker(t, 250_000_000, 100, "100") + w.OnBlockBuilt(248_000_000, 68) // observed: 3.65M gas/tx + + require.True(t, w.recalibrated) + // (250M - 1M) / 3.65M = 68, capped at user-specified 100, so 68. + require.Equal(t, uint64(68), w.numCallsPerBlock) +} + +func TestOnBlockBuilt_NoopOnSubsequentBlocks(t *testing.T) { + w := newRecalibrationWorker(t, 25_000_000, 46, "fill") + + w.OnBlockBuilt(16_800_000, 46) + firstRecalibration := w.numCallsPerBlock + require.Equal(t, uint64(65), firstRecalibration) + + w.OnBlockBuilt(1_000_000, 1) // would suggest ~24 — must NOT apply + require.Equal(t, firstRecalibration, w.numCallsPerBlock) +} + +func TestOnBlockBuilt_GuardsAgainstZeroInputs(t *testing.T) { + for _, tc := range []struct { + name string + gasUsed uint64 + userTxsIncluded int + }{ + {"zero gas", 0, 46}, + {"zero txs", 16_800_000, 0}, + {"negative txs", 16_800_000, -1}, + } { + t.Run(tc.name, func(t *testing.T) { + w := newRecalibrationWorker(t, 25_000_000, 46, "fill") + w.OnBlockBuilt(tc.gasUsed, tc.userTxsIncluded) + require.False(t, w.recalibrated, "must not consume the one-shot recalibration on degenerate input") + require.Equal(t, uint64(46), w.numCallsPerBlock) + }) + } +} diff --git a/runner/payload/worker/types.go b/runner/payload/worker/types.go index 942df001..6455929a 100644 --- a/runner/payload/worker/types.go +++ b/runner/payload/worker/types.go @@ -32,3 +32,11 @@ type CompletionWorker interface { Done() <-chan struct{} Err() error } + +// BlockObserver lets a worker observe each non-setup block's on-chain outcome. +// The sequencer calls OnBlockBuilt after every benchmark block. Workers use it +// to refine per-tx assumptions (e.g. recalibrate numCallsPerBlock from +// observed gas-per-call when the setup-time gas estimate was inaccurate). +type BlockObserver interface { + OnBlockBuilt(gasUsed uint64, userTxsIncluded int) +}