From 252dc118af2c00959bb190ca5105b6a4d8215f93 Mon Sep 17 00:00:00 2001 From: bdchatham Date: Fri, 8 May 2026 10:48:30 -0700 Subject: [PATCH] fix(planner): correct retry wall-clock formula + lock table contract MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to #209. Coral platform-engineer review caught two items post-merge: 1. The wall-clock formula in taskMaxRetries' godoc was wrong. The executor increments t.RetryCount BEFORE calling retryBackoff (executor.go:189-196), so the smallest argument ever passed is 1, not 0. retryBackoff(1)=10s, retryBackoff(2)=20s, retryBackoff(N>=3) caps at 30s. Real wall-clock: 10s for N=1; 10s + 20s + 30s*(N-2) for N>=2. Operators sizing budgets from the prior comment would compute the wrong wall-clock — e.g. discoverPeersMaxRetries=20 was documented as ~9 min but the real value is ~9.5 min. 2. The existing test in archive_test.go catches the regression (0 != 20) but doesn't pin the lookup table itself. A direct TestTaskMaxRetries makes a table swap fail loud — e.g. swapping discoverPeersMaxRetries for genesisConfigureMaxRetries would compile fine but the table test would catch it. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/planner/planner.go | 5 +++-- internal/planner/planner_test.go | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 internal/planner/planner_test.go diff --git a/internal/planner/planner.go b/internal/planner/planner.go index 2a09aca..221b911 100644 --- a/internal/planner/planner.go +++ b/internal/planner/planner.go @@ -456,8 +456,9 @@ func buildPlannedTask(planID, taskType string, planIndex int, params any) (seiv1 } // taskMaxRetries is the executor's retry budget per task type. Default 0 -// makes the first ExecutionFailed terminal. Wall-clock per N retries: -// 5s + 10s + 20s + 30s*(N-3) for N>=3. +// makes the first ExecutionFailed terminal. Wall-clock per N retries +// (RetryCount is incremented before retryBackoff is called): +// 10s for N=1; 10s + 20s + 30s*(N-2) for N>=2. func taskMaxRetries(taskType string) int { switch taskType { case TaskConfigureGenesis: diff --git a/internal/planner/planner_test.go b/internal/planner/planner_test.go new file mode 100644 index 0000000..d23e1d6 --- /dev/null +++ b/internal/planner/planner_test.go @@ -0,0 +1,18 @@ +package planner + +import "testing" + +func TestTaskMaxRetries(t *testing.T) { + cases := map[string]int{ + TaskConfigureGenesis: genesisConfigureMaxRetries, + TaskAssembleGenesis: groupAssemblyMaxRetries, + TaskDiscoverPeers: discoverPeersMaxRetries, + "unknown-task-type": 0, + "": 0, + } + for taskType, want := range cases { + if got := taskMaxRetries(taskType); got != want { + t.Errorf("taskMaxRetries(%q) = %d, want %d", taskType, got, want) + } + } +}