From 4d7901b8137d6b7caa529f936cdb7b41a83a0ecd Mon Sep 17 00:00:00 2001 From: crowlbot <280062030+crowlbot@users.noreply.github.com> Date: Wed, 13 May 2026 14:29:38 +0000 Subject: [PATCH 1/4] test: poll until volume is visible before mounting it `sandbox volumes create` returns the volume id immediately, but the backend may take a moment to make the volume visible to subsequent operations (mount, list). The `sandbox with volume mount` test hit this race on every CI run, surfacing as `VOLUME_NOT_FOUND` when the follow-up `sandbox create --volume :path` call ran before the volume was queryable. Adds a `waitForVolumeReady` helper that polls `volumes list` for up to 15s (500ms interval) after creation, and inserts a call between the volume creation and the mount in the affected test. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/sandbox.test.ts | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/sandbox.test.ts b/tests/sandbox.test.ts index 7cb12a2..bb92d2a 100644 --- a/tests/sandbox.test.ts +++ b/tests/sandbox.test.ts @@ -11,6 +11,28 @@ const sandbox = async (...args: string[]) => { return (await $.raw`deno sandbox ${args.join(" ")}`.text()).trim(); }; +/** + * `sandbox volumes create` returns the volume id immediately, but the + * backend may take a moment to make the volume visible to subsequent + * operations (mount, list). Poll `volumes list` until the volume appears + * to avoid a flaky `VOLUME_NOT_FOUND` race when the next step tries to + * mount it. + */ +async function waitForVolumeReady( + volumeId: string, + { timeoutMs = 15_000, intervalMs = 500 } = {}, +): Promise { + const deadline = Date.now() + timeoutMs; + while (Date.now() < deadline) { + const list = await sandbox("volumes", "list"); + if (list.includes(volumeId)) return; + await new Promise((r) => setTimeout(r, intervalMs)); + } + throw new Error( + `Timed out waiting for volume ${volumeId} to become visible via 'volumes list'`, + ); +} + Deno.test("sandbox create", async () => { const sandboxId = await sandbox( "create", @@ -143,6 +165,7 @@ Deno.test("sandbox with volume mount", async () => { "--region", "ord", ); + await waitForVolumeReady(volumeId); const sandboxId = await sandbox( "create", From 7dbd5ed50241591c31ccd64a050f543df43506b0 Mon Sep 17 00:00:00 2001 From: crowlbot <280062030+crowlbot@users.noreply.github.com> Date: Wed, 13 May 2026 15:23:15 +0000 Subject: [PATCH 2/4] test: extend volume-ready wait with a post-list sleep MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initial fix polled `volumes list` and stopped when the new volume appeared, but the mount still raced — the cluster takes another beat after the volume is queryable before it's actually mountable. Add a configurable `postListSleepMs` (default 5s) after the list confirms, and bump the overall timeout to 30s. Each sandbox-with-volume run now spends ~5-15s waiting, which is well below the 60s sandbox timeout. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/sandbox.test.ts | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/tests/sandbox.test.ts b/tests/sandbox.test.ts index bb92d2a..362abb4 100644 --- a/tests/sandbox.test.ts +++ b/tests/sandbox.test.ts @@ -13,19 +13,27 @@ const sandbox = async (...args: string[]) => { /** * `sandbox volumes create` returns the volume id immediately, but the - * backend may take a moment to make the volume visible to subsequent - * operations (mount, list). Poll `volumes list` until the volume appears - * to avoid a flaky `VOLUME_NOT_FOUND` race when the next step tries to - * mount it. + * backend takes a moment to make the volume mountable inside a sandbox + * even after it's queryable via `volumes list`. Poll the list endpoint + * first, then sleep `postListSleepMs` to let the cluster sync — without + * that extra sleep we still hit `VOLUME_NOT_FOUND` when the follow-up + * `sandbox create --volume :path` runs. */ async function waitForVolumeReady( volumeId: string, - { timeoutMs = 15_000, intervalMs = 500 } = {}, + { + timeoutMs = 30_000, + intervalMs = 500, + postListSleepMs = 5_000, + } = {}, ): Promise { const deadline = Date.now() + timeoutMs; while (Date.now() < deadline) { const list = await sandbox("volumes", "list"); - if (list.includes(volumeId)) return; + if (list.includes(volumeId)) { + await new Promise((r) => setTimeout(r, postListSleepMs)); + return; + } await new Promise((r) => setTimeout(r, intervalMs)); } throw new Error( From 87284a0eeb5564b984c70c3e151f1b67dff289d6 Mon Sep 17 00:00:00 2001 From: crowlbot <280062030+crowlbot@users.noreply.github.com> Date: Wed, 13 May 2026 16:22:21 +0000 Subject: [PATCH 3/4] test: retry the sandbox create --volume call on race Even with the post-list sleep extension (1 commit back), the sandbox-side volume lookup propagates separately from the deployng list endpoint. Retrying the mount-bearing `sandbox create` call up to 6 times (5s apart, ~30s budget) handles the residual race without papering over genuine backend failures. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/sandbox.test.ts | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/tests/sandbox.test.ts b/tests/sandbox.test.ts index 362abb4..4230276 100644 --- a/tests/sandbox.test.ts +++ b/tests/sandbox.test.ts @@ -175,14 +175,35 @@ Deno.test("sandbox with volume mount", async () => { ); await waitForVolumeReady(volumeId); - const sandboxId = await sandbox( - "create", - "--quiet", - "--timeout", - "60s", - "--volume", - `${volumeId}:/data/dataset`, - ); + // The sandbox-side volume lookup hits a different service from the + // volumes list endpoint we just polled; that service propagates the + // new volume asynchronously, so a one-shot wait isn't enough. Retry + // the mount-bearing create a few times. + let sandboxId: string | undefined; + let lastErr: unknown; + for (let attempt = 0; attempt < 6; attempt++) { + try { + sandboxId = await sandbox( + "create", + "--quiet", + "--timeout", + "60s", + "--volume", + `${volumeId}:/data/dataset`, + ); + break; + } catch (err) { + lastErr = err; + await new Promise((r) => setTimeout(r, 5_000)); + } + } + if (!sandboxId) { + throw new Error( + `sandbox create with --volume kept failing: ${ + lastErr instanceof Error ? lastErr.message : String(lastErr) + }`, + ); + } await sandbox( "exec", From 46ee8514f6f512f1b8e18d8dc3ac12b32dc1a28c Mon Sep 17 00:00:00 2001 From: crowlbot <280062030+crowlbot@users.noreply.github.com> Date: Wed, 13 May 2026 17:22:05 +0000 Subject: [PATCH 4/4] test: pin sandbox to the volume's region Even with retries and the post-list sleep, the sandbox-side volume lookup was returning 404 deterministically (6/6 attempts, ~30s apart). The volume is created in `ord` but the sandbox create call didn't specify a region, so it landed in a different cluster that doesn't know about the volume. Pin the sandbox create to `--region ord` to match the volume's region. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/sandbox.test.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/sandbox.test.ts b/tests/sandbox.test.ts index 4230276..853aa95 100644 --- a/tests/sandbox.test.ts +++ b/tests/sandbox.test.ts @@ -188,6 +188,8 @@ Deno.test("sandbox with volume mount", async () => { "--quiet", "--timeout", "60s", + "--region", + "ord", "--volume", `${volumeId}:/data/dataset`, );