From 9a41b2df09269ee42aeaad67c31e73baa8f45aa0 Mon Sep 17 00:00:00 2001 From: Helge Tesdal Date: Tue, 21 Apr 2026 13:36:45 +0200 Subject: [PATCH 1/5] test(phase-c): scaffold subagent-hang regression tests (skipped) Two .skip'd tests that will be filled in next commits: - SSE stall triggers retry, not indefinite hang (A.1/A.2 regression gate) - subagent question in headless run does not deadlock (Phase B regression gate) Scaffold compiles + typechecks green so the commit itself is clean; bodies will land in follow-up commits that also flip the .skip off. Per plan docs/superpowers/plans/2026-04-18-subagent-hang-hardening.md:1217-1244. --- .../session/subagent-hang-regression.test.ts | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 packages/opencode/test/session/subagent-hang-regression.test.ts diff --git a/packages/opencode/test/session/subagent-hang-regression.test.ts b/packages/opencode/test/session/subagent-hang-regression.test.ts new file mode 100644 index 000000000000..c17f96b431ff --- /dev/null +++ b/packages/opencode/test/session/subagent-hang-regression.test.ts @@ -0,0 +1,34 @@ +import { describe } from "bun:test" +import { Effect, Layer } from "effect" +import { testEffect } from "../lib/effect" + +// Phase C regression gate — any future change that reintroduces either failure +// mode (SSE stall = indefinite hang; subagent question in headless run = +// deadlock) will fail these tests. +// +// Step 1: scaffold with `.skip`. Step 2 (next commit) fleshes out bodies using +// the test LLM server's `hang` helper and the Phase B auto-reject wiring. + +const it = testEffect(Layer.empty) + +describe("subagent hang regression", () => { + it.live.skip("SSE stall triggers retry, not indefinite hang", () => + Effect.gen(function* () { + // TODO(phase-c-2a): drive a stalling provider through SessionPrompt.loop. + // - provideTmpdirServer with llm.hang queued as first reply + // - configure chunkTimeout short enough to trip within test budget + // - assert session reaches session.error within 2x chunkTimeout + // - assert SessionStatus.set was called with attempt >= 1 + }), + ) + + it.live.skip("subagent question in headless run does not deadlock", () => + Effect.gen(function* () { + // TODO(phase-c-2b): drive a provider that steers agent -> task -> subagent + // -> question in a single turn. + // - assert status.idle reached within N seconds (not indefinite) + // - assert tool-output contains the auto-rejection message from the + // Phase B handler (RunEvents auto-rejects descendant questions) + }), + ) +}) From 5764aa195355fa5d556e49aeb7ffd1b7f58ff615 Mon Sep 17 00:00:00 2001 From: Helge Tesdal Date: Tue, 21 Apr 2026 17:10:01 +0200 Subject: [PATCH 2/5] test(phase-c): flesh out subagent-hang regression tests Test A: SSE stall triggers retry, not indefinite hang. Queues a hang reply into the test LLM server; configures chunkTimeout=1000ms; asserts SessionStatus transitions to { type: 'retry', attempt >= 1, message ~/SSE|timed out/i } within 8s. Gates against regression of Phase A wrapSSE + retry classification. Test B: subagent question in headless run does not deadlock. Queues a root 'task' call and a subagent 'question' call. Mounts an in-test bus subscriber that mirrors the RunEvents contract (reject descendant questions/permissions). Asserts the root loop completes within 10s and settles idle. Gates against regression of Phase B handler contract + Question/Permission reject path. Implementation notes surfaced by the subagent refiner: - SSEStallError is classified transport-retryable by SessionRetry.retryable, so session.error only fires after 5 retries (60s). Test observes the retry status transition instead of session.error for fast regression. - The 'general' subagent inherits question: 'deny' from agent defaults, so Test B overrides it via config.agent.general.permission.question. - Effect 4 beta: Fiber.poll removed; using fiber.pollUnsafe() on instance. - RunEvents handler lives in the CLI layer and is not reusable; the test mirrors its contract (reject on bus events) to avoid importing CLI wiring into session tests. Verified: bun typecheck pass; 2/2 tests pass in 20s. Per plan docs/superpowers/plans/2026-04-18-subagent-hang-hardening.md:1246-1256. --- .../session/subagent-hang-regression.test.ts | 436 ++++++++++++++++-- 1 file changed, 407 insertions(+), 29 deletions(-) diff --git a/packages/opencode/test/session/subagent-hang-regression.test.ts b/packages/opencode/test/session/subagent-hang-regression.test.ts index c17f96b431ff..de8a6cef6be5 100644 --- a/packages/opencode/test/session/subagent-hang-regression.test.ts +++ b/packages/opencode/test/session/subagent-hang-regression.test.ts @@ -1,34 +1,412 @@ -import { describe } from "bun:test" -import { Effect, Layer } from "effect" +// Phase C regression gates for the subagent-hang hardening effort. +// +// Two failure modes this file pins down: +// +// 1. SSE stall = indefinite hang. If a provider starts a response and then +// stops sending chunks, the loop used to block forever. Phase A wrapped +// SSE bodies with `wrapSSE`, which raises `SSEStallError` on inter-chunk +// timeout. `SSEStallError` is classified transport-retryable by +// `SessionRetry.retryable` (see src/session/retry.ts:25) so the +// processor's `Effect.retry(SessionRetry.policy(...))` observes it, +// calls `SessionStatus.set({ type: "retry", ... })`, then backs off. +// The `session.error` bus event only fires AFTER retries are exhausted +// (5 transport attempts, 2+4+8+16+30s = 60s of backoff). This test +// therefore gates on the retry transition — if the stall surfaced as +// a terminal error instead, or hung indefinitely without triggering +// retry, this test fails fast. +// +// 2. Subagent question in headless run = deadlock. A subagent that invokes +// the `question` tool publishes `question.asked` and awaits an answer. +// In `opencode run` (headless) there is no interactive client, so +// Phase B added `RunEvents` which subscribes to the Bus and auto-rejects +// descendant questions/permissions. Without that handler the loop +// never returns. RunEvents lives in the CLI layer (see +// `src/cli/cmd/run-events.ts` + `src/cli/cmd/run.ts`); it is NOT wired +// into `SessionPrompt.loop` directly. This test therefore drives the +// loop directly and mounts an in-test subscriber that mirrors the +// RunEvents contract (reject descendant questions, reject permissions). +// That still pins the end-to-end contract — if the Bus events are no +// longer published, or Question.reject no longer unblocks the tool, or +// the task-tool flow no longer propagates subagent completion back to +// the parent, the test fails. +// +// Any change that makes either assertion fail is a regression. + +import { NodeFileSystem } from "@effect/platform-node" +import { FetchHttpClient } from "effect/unstable/http" +import { expect } from "bun:test" +import { Effect, Fiber, Layer } from "effect" +import { Agent as AgentSvc } from "../../src/agent/agent" +import { Bus } from "../../src/bus" +import { Command } from "../../src/command" +import { Config } from "../../src/config" +import { LSP } from "../../src/lsp" +import { MCP } from "../../src/mcp" +import { Permission } from "../../src/permission" +import { Plugin } from "../../src/plugin" +import { Provider as ProviderSvc } from "../../src/provider" +import { Env } from "../../src/env" +import { ModelID, ProviderID } from "../../src/provider/schema" +import { Question } from "../../src/question" +import { Todo } from "../../src/session/todo" +import { Session } from "../../src/session" +import { LLM } from "../../src/session/llm" +import { AppFileSystem } from "@opencode-ai/shared/filesystem" +import { SessionCompaction } from "../../src/session/compaction" +import { SessionSummary } from "../../src/session/summary" +import { Instruction } from "../../src/session/instruction" +import { SessionProcessor } from "../../src/session/processor" +import { SessionPrompt } from "../../src/session/prompt" +import { SessionRevert } from "../../src/session/revert" +import { SessionRunState } from "../../src/session/run-state" +import { MessageID, PartID, SessionID } from "../../src/session/schema" +import { SessionStatus } from "../../src/session/status" +import { Skill } from "../../src/skill" +import { SystemPrompt } from "../../src/session/system" +import { Snapshot } from "../../src/snapshot" +import { ToolRegistry } from "../../src/tool" +import { Truncate } from "../../src/tool" +import { Log } from "../../src/util" +import * as CrossSpawnSpawner from "../../src/effect/cross-spawn-spawner" +import { Ripgrep } from "../../src/file/ripgrep" +import { Format } from "../../src/format" +import { provideTmpdirServer } from "../fixture/fixture" import { testEffect } from "../lib/effect" +import { reply, TestLLMServer } from "../lib/llm-server" -// Phase C regression gate — any future change that reintroduces either failure -// mode (SSE stall = indefinite hang; subagent question in headless run = -// deadlock) will fail these tests. -// -// Step 1: scaffold with `.skip`. Step 2 (next commit) fleshes out bodies using -// the test LLM server's `hang` helper and the Phase B auto-reject wiring. - -const it = testEffect(Layer.empty) - -describe("subagent hang regression", () => { - it.live.skip("SSE stall triggers retry, not indefinite hang", () => - Effect.gen(function* () { - // TODO(phase-c-2a): drive a stalling provider through SessionPrompt.loop. - // - provideTmpdirServer with llm.hang queued as first reply - // - configure chunkTimeout short enough to trip within test budget - // - assert session reaches session.error within 2x chunkTimeout - // - assert SessionStatus.set was called with attempt >= 1 - }), - ) +void Log.init({ print: false }) + +const summary = Layer.succeed( + SessionSummary.Service, + SessionSummary.Service.of({ + summarize: () => Effect.void, + diff: () => Effect.succeed([]), + computeDiff: () => Effect.succeed([]), + }), +) - it.live.skip("subagent question in headless run does not deadlock", () => - Effect.gen(function* () { - // TODO(phase-c-2b): drive a provider that steers agent -> task -> subagent - // -> question in a single turn. - // - assert status.idle reached within N seconds (not indefinite) - // - assert tool-output contains the auto-rejection message from the - // Phase B handler (RunEvents auto-rejects descendant questions) - }), +const ref = { + providerID: ProviderID.make("test"), + modelID: ModelID.make("test-model"), +} + +const mcp = Layer.succeed( + MCP.Service, + MCP.Service.of({ + status: () => Effect.succeed({}), + clients: () => Effect.succeed({}), + tools: () => Effect.succeed({}), + prompts: () => Effect.succeed({}), + resources: () => Effect.succeed({}), + add: () => Effect.succeed({ status: { status: "disabled" as const } }), + connect: () => Effect.void, + disconnect: () => Effect.void, + getPrompt: () => Effect.succeed(undefined), + readResource: () => Effect.succeed(undefined), + startAuth: () => Effect.die("unexpected MCP auth in regression tests"), + authenticate: () => Effect.die("unexpected MCP auth in regression tests"), + finishAuth: () => Effect.die("unexpected MCP auth in regression tests"), + removeAuth: () => Effect.void, + supportsOAuth: () => Effect.succeed(false), + hasStoredTokens: () => Effect.succeed(false), + getAuthStatus: () => Effect.succeed("not_authenticated" as const), + }), +) + +const lsp = Layer.succeed( + LSP.Service, + LSP.Service.of({ + init: () => Effect.void, + status: () => Effect.succeed([]), + hasClients: () => Effect.succeed(false), + touchFile: () => Effect.void, + diagnostics: () => Effect.succeed({}), + hover: () => Effect.succeed(undefined), + definition: () => Effect.succeed([]), + references: () => Effect.succeed([]), + implementation: () => Effect.succeed([]), + documentSymbol: () => Effect.succeed([]), + workspaceSymbol: () => Effect.succeed([]), + prepareCallHierarchy: () => Effect.succeed([]), + incomingCalls: () => Effect.succeed([]), + outgoingCalls: () => Effect.succeed([]), + }), +) + +const status = SessionStatus.layer.pipe(Layer.provideMerge(Bus.layer)) +const run = SessionRunState.layer.pipe(Layer.provide(status)) +const infra = Layer.mergeAll(NodeFileSystem.layer, CrossSpawnSpawner.defaultLayer) + +// Copied verbatim from `prompt-effect.test.ts` — that file exports nothing, +// so we can't import the helper. Keeping the composition identical guarantees +// this regression gate exercises the same service wiring the rest of the +// loop tests do (real Session/SessionPrompt/ToolRegistry/Question/Permission, +// stubbed Summary/MCP/LSP). +function makeHttp() { + const deps = Layer.mergeAll( + Session.defaultLayer, + Snapshot.defaultLayer, + LLM.defaultLayer, + Env.defaultLayer, + AgentSvc.defaultLayer, + Command.defaultLayer, + Permission.defaultLayer, + Plugin.defaultLayer, + Config.defaultLayer, + ProviderSvc.defaultLayer, + lsp, + mcp, + AppFileSystem.defaultLayer, + status, + ).pipe(Layer.provideMerge(infra)) + const question = Question.layer.pipe(Layer.provideMerge(deps)) + const todo = Todo.layer.pipe(Layer.provideMerge(deps)) + const registry = ToolRegistry.layer.pipe( + Layer.provide(Skill.defaultLayer), + Layer.provide(FetchHttpClient.layer), + Layer.provide(CrossSpawnSpawner.defaultLayer), + Layer.provide(Ripgrep.defaultLayer), + Layer.provide(Format.defaultLayer), + Layer.provideMerge(todo), + Layer.provideMerge(question), + Layer.provideMerge(deps), ) + const trunc = Truncate.layer.pipe(Layer.provideMerge(deps)) + const proc = SessionProcessor.layer.pipe(Layer.provide(summary), Layer.provideMerge(deps)) + const compact = SessionCompaction.layer.pipe(Layer.provideMerge(proc), Layer.provideMerge(deps)) + return Layer.mergeAll( + TestLLMServer.layer, + SessionPrompt.layer.pipe( + Layer.provide(SessionRevert.defaultLayer), + Layer.provide(summary), + Layer.provideMerge(run), + Layer.provideMerge(compact), + Layer.provideMerge(proc), + Layer.provideMerge(registry), + Layer.provideMerge(trunc), + Layer.provide(Instruction.defaultLayer), + Layer.provide(SystemPrompt.defaultLayer), + Layer.provideMerge(deps), + ), + ).pipe(Layer.provide(summary)) +} + +const it = testEffect(makeHttp()) + +// Provider config matching `prompt-effect.test.ts` but with an aggressively +// short chunkTimeout so Test A surfaces `SSEStallError` within the 4s budget +// instead of the production default (120s / 600s). +function providerCfg(url: string, chunkTimeout?: number) { + return { + provider: { + test: { + name: "Test", + id: "test", + env: [], + npm: "@ai-sdk/openai-compatible", + models: { + "test-model": { + id: "test-model", + name: "Test Model", + attachment: false, + reasoning: false, + temperature: false, + tool_call: true, + release_date: "2025-01-01", + limit: { context: 100000, output: 10000 }, + cost: { input: 0, output: 0 }, + options: {}, + }, + }, + options: { + apiKey: "test-key", + baseURL: url, + ...(chunkTimeout !== undefined ? { chunkTimeout } : {}), + }, + }, + }, + } +} + +const user = Effect.fn("regression.user")(function* (sessionID: SessionID, text: string) { + const session = yield* Session.Service + const msg = yield* session.updateMessage({ + id: MessageID.ascending(), + role: "user", + sessionID, + agent: "build", + model: ref, + time: { created: Date.now() }, + }) + yield* session.updatePart({ + id: PartID.ascending(), + messageID: msg.id, + sessionID, + type: "text", + text, + }) + return msg }) + +it.live( + "SSE stall triggers retry, not indefinite hang", + () => + provideTmpdirServer( + Effect.fnUntraced(function* ({ llm }) { + const prompt = yield* SessionPrompt.Service + const sessions = yield* Session.Service + const sessionStatus = yield* SessionStatus.Service + + // Queue an SSE reply that opens the stream (role chunk) then never + // sends another frame. With chunkTimeout=1000ms the loop's wrapSSE + // fires SSEStallError after ~1s, which the retry schedule catches + // and converts into a status transition. + yield* llm.push(reply().hang().item()) + + const chat = yield* sessions.create({ + title: "SSE stall", + permission: [{ permission: "*", pattern: "*", action: "allow" }], + }) + yield* user(chat.id, "trigger stall") + + const fiber = yield* prompt.loop({ sessionID: chat.id }).pipe(Effect.forkChild) + + // Bounded wait for the retry transition. Budget covers: first + // setup pass (cold provider state, models.dev load), one chunk + // timeout (1s), plus schedule classification. If the fiber never + // transitions to retry, the hang regression is back. + const observed = yield* Effect.promise(async () => { + const end = Date.now() + 8_000 + while (Date.now() < end) { + const exit = fiber.pollUnsafe() + if (exit) { + throw new Error(`loop exited before retry observed: ${JSON.stringify(exit)}`) + } + const snap = await Effect.runPromise(sessionStatus.get(chat.id)) + if (snap.type === "retry") return snap + await new Promise((done) => setTimeout(done, 25)) + } + const snap = await Effect.runPromise(sessionStatus.get(chat.id)) + throw new Error(`expected retry status within 8s; last status: ${JSON.stringify(snap)}`) + }) + + expect(observed.type).toBe("retry") + expect(observed.attempt).toBeGreaterThanOrEqual(1) + // SessionRetry.transportMessage populates the retry message from + // SSEStallError.data.message ("SSE read timed out after 1000ms"). + expect(observed.message).toMatch(/SSE|timed out/i) + + // Stop the loop before the 2s exponential backoff fires a second + // attempt (and another 1s stall) and blows the 15s test budget. + yield* prompt.cancel(chat.id) + yield* Fiber.await(fiber) + }), + { git: true, config: (url) => providerCfg(url, 1_000) }, + ), + 20_000, +) + +it.live( + "subagent question in headless run does not deadlock", + () => + provideTmpdirServer( + Effect.fnUntraced(function* ({ llm }) { + const prompt = yield* SessionPrompt.Service + const sessions = yield* Session.Service + const bus = yield* Bus.Service + const question = yield* Question.Service + const permission = yield* Permission.Service + const sessionStatus = yield* SessionStatus.Service + + // Reply 1 (root): dispatch the task tool to spawn a subagent. + yield* llm.tool("task", { + description: "ask the user", + prompt: "use the question tool to ask the user", + subagent_type: "general", + }) + // Reply 2 (subagent): call the question tool. Our bus subscriber + // mirrors the RunEvents contract and rejects this question, which + // unblocks the subagent's question tool with RejectedError. + yield* llm.tool("question", { + questions: [ + { + question: "proceed?", + header: "confirm", + options: [ + { label: "yes", description: "go" }, + { label: "no", description: "stop" }, + ], + }, + ], + }) + // After question rejection the subagent's next call plus the root's + // follow-up call fall through to the server's auto "ok"/stop + // response, so no more queue entries are required. + + const chat = yield* sessions.create({ + title: "Subagent question", + // Allow task + subagent. The question tool will fire regardless of + // permission rules because the ask() path inside the tool publishes + // `question.asked` directly. Allow-all keeps the focus on the + // deadlock contract. + permission: [{ permission: "*", pattern: "*", action: "allow" }], + }) + yield* user(chat.id, "please ask something") + + // Mirror of RunEvents.make semantics (see src/cli/cmd/run-events.ts): + // reject any question or permission raised on a descendant of the + // root session. This test is a single root with one subagent, so we + // reject indiscriminately — the production handler does parent-chain + // lineage checks which are orthogonal to the hang contract. + let questionsRejected = 0 + const unsubQuestion = yield* bus.subscribeCallback(Question.Event.Asked, (event) => + Effect.runPromise( + Effect.gen(function* () { + questionsRejected += 1 + yield* question.reject(event.properties.id) + }), + ), + ) + const unsubPermission = yield* bus.subscribeCallback(Permission.Event.Asked, (event) => + Effect.runPromise( + Effect.gen(function* () { + yield* permission.reply({ requestID: event.properties.id, reply: "reject" }) + }), + ), + ) + yield* Effect.addFinalizer(() => + Effect.sync(() => { + unsubQuestion() + unsubPermission() + }), + ) + + const fiber = yield* prompt.loop({ sessionID: chat.id }).pipe(Effect.forkChild) + + // Primary gate: the root fiber must complete in bounded time. If the + // subagent's question tool were left blocked on an unanswered + // deferred, this poll would never see the fiber finish. 10s upper + // bound — the happy-path finish is well under a second. + yield* Effect.promise(async () => { + const end = Date.now() + 10_000 + while (Date.now() < end) { + if (fiber.pollUnsafe()) return + await new Promise((done) => setTimeout(done, 25)) + } + throw new Error("root loop did not complete within 10s — subagent question likely deadlocked") + }) + + // Fiber completed. The subagent's question tool should have been + // rejected at least once — that is the whole Phase B contract under + // test. + expect(questionsRejected).toBeGreaterThanOrEqual(1) + // And the root session should settle idle (not stuck busy). + const finalStatus = yield* sessionStatus.get(chat.id) + expect(finalStatus.type).toBe("idle") + + yield* Fiber.await(fiber) + }), + { git: true, config: (url) => ({ ...providerCfg(url), agent: { general: { permission: { question: "allow" } } } }) }, + ), + 15_000, +) From 9e6ab7690897d22553ff69a91956a09dc7434a87 Mon Sep 17 00:00:00 2001 From: Helge Tesdal Date: Wed, 22 Apr 2026 09:57:45 +0200 Subject: [PATCH 3/5] test(phase-c): address codex review blockers - Test B: replace poll+await pattern with Fiber.await+timeoutOrElse+Exit.isSuccess to surface defect paths instead of treating poll truthy as success. - Test B: assert RejectedError propagates to the question tool's error output by walking root + child sessions and matching /dismissed/i. - Compress header comment (31 lines -> 9). - Replace `function* ({ llm })` destructuring with `function* (input)` + input.llm. --- .../session/subagent-hang-regression.test.ts | 102 +++++++++--------- 1 file changed, 48 insertions(+), 54 deletions(-) diff --git a/packages/opencode/test/session/subagent-hang-regression.test.ts b/packages/opencode/test/session/subagent-hang-regression.test.ts index de8a6cef6be5..1e4e6d4666b4 100644 --- a/packages/opencode/test/session/subagent-hang-regression.test.ts +++ b/packages/opencode/test/session/subagent-hang-regression.test.ts @@ -1,41 +1,17 @@ // Phase C regression gates for the subagent-hang hardening effort. // -// Two failure modes this file pins down: -// -// 1. SSE stall = indefinite hang. If a provider starts a response and then -// stops sending chunks, the loop used to block forever. Phase A wrapped -// SSE bodies with `wrapSSE`, which raises `SSEStallError` on inter-chunk -// timeout. `SSEStallError` is classified transport-retryable by -// `SessionRetry.retryable` (see src/session/retry.ts:25) so the -// processor's `Effect.retry(SessionRetry.policy(...))` observes it, -// calls `SessionStatus.set({ type: "retry", ... })`, then backs off. -// The `session.error` bus event only fires AFTER retries are exhausted -// (5 transport attempts, 2+4+8+16+30s = 60s of backoff). This test -// therefore gates on the retry transition — if the stall surfaced as -// a terminal error instead, or hung indefinitely without triggering -// retry, this test fails fast. -// -// 2. Subagent question in headless run = deadlock. A subagent that invokes -// the `question` tool publishes `question.asked` and awaits an answer. -// In `opencode run` (headless) there is no interactive client, so -// Phase B added `RunEvents` which subscribes to the Bus and auto-rejects -// descendant questions/permissions. Without that handler the loop -// never returns. RunEvents lives in the CLI layer (see -// `src/cli/cmd/run-events.ts` + `src/cli/cmd/run.ts`); it is NOT wired -// into `SessionPrompt.loop` directly. This test therefore drives the -// loop directly and mounts an in-test subscriber that mirrors the -// RunEvents contract (reject descendant questions, reject permissions). -// That still pins the end-to-end contract — if the Bus events are no -// longer published, or Question.reject no longer unblocks the tool, or -// the task-tool flow no longer propagates subagent completion back to -// the parent, the test fails. -// -// Any change that makes either assertion fail is a regression. +// 1. SSE stall: Phase A's wrapSSE must convert a stalled stream into +// SSEStallError, which SessionRetry classifies as transport-retryable +// and surfaces as a `retry` SessionStatus. Gates against indefinite hangs. +// 2. Subagent question in headless: Phase B's Question→Bus publish + +// Question.reject→Deferred.fail contract must allow an external +// subscriber (mirroring RunEvents) to unblock a subagent question tool. +// Gates against headless deadlock when the user can't answer. import { NodeFileSystem } from "@effect/platform-node" import { FetchHttpClient } from "effect/unstable/http" import { expect } from "bun:test" -import { Effect, Fiber, Layer } from "effect" +import { Effect, Exit, Fiber, Layer } from "effect" import { Agent as AgentSvc } from "../../src/agent/agent" import { Bus } from "../../src/bus" import { Command } from "../../src/command" @@ -252,7 +228,7 @@ it.live( "SSE stall triggers retry, not indefinite hang", () => provideTmpdirServer( - Effect.fnUntraced(function* ({ llm }) { + Effect.fnUntraced(function* (input) { const prompt = yield* SessionPrompt.Service const sessions = yield* Session.Service const sessionStatus = yield* SessionStatus.Service @@ -261,7 +237,7 @@ it.live( // sends another frame. With chunkTimeout=1000ms the loop's wrapSSE // fires SSEStallError after ~1s, which the retry schedule catches // and converts into a status transition. - yield* llm.push(reply().hang().item()) + yield* input.llm.push(reply().hang().item()) const chat = yield* sessions.create({ title: "SSE stall", @@ -310,7 +286,7 @@ it.live( "subagent question in headless run does not deadlock", () => provideTmpdirServer( - Effect.fnUntraced(function* ({ llm }) { + Effect.fnUntraced(function* (input) { const prompt = yield* SessionPrompt.Service const sessions = yield* Session.Service const bus = yield* Bus.Service @@ -319,7 +295,7 @@ it.live( const sessionStatus = yield* SessionStatus.Service // Reply 1 (root): dispatch the task tool to spawn a subagent. - yield* llm.tool("task", { + yield* input.llm.tool("task", { description: "ask the user", prompt: "use the question tool to ask the user", subagent_type: "general", @@ -327,7 +303,7 @@ it.live( // Reply 2 (subagent): call the question tool. Our bus subscriber // mirrors the RunEvents contract and rejects this question, which // unblocks the subagent's question tool with RejectedError. - yield* llm.tool("question", { + yield* input.llm.tool("question", { questions: [ { question: "proceed?", @@ -383,28 +359,46 @@ it.live( const fiber = yield* prompt.loop({ sessionID: chat.id }).pipe(Effect.forkChild) - // Primary gate: the root fiber must complete in bounded time. If the - // subagent's question tool were left blocked on an unanswered - // deferred, this poll would never see the fiber finish. 10s upper - // bound — the happy-path finish is well under a second. - yield* Effect.promise(async () => { - const end = Date.now() + 10_000 - while (Date.now() < end) { - if (fiber.pollUnsafe()) return - await new Promise((done) => setTimeout(done, 25)) - } - throw new Error("root loop did not complete within 10s — subagent question likely deadlocked") - }) + // Primary gate: the root fiber must complete in bounded time and + // succeed. Join under a 10s timeout that fails with a clear message + // if the loop hangs. Exit check guards against silent defect paths — + // a passing `pollUnsafe()` truthy check could miss these. + const exit = yield* Fiber.await(fiber).pipe( + Effect.timeoutOrElse({ + duration: "10 seconds", + orElse: () => Effect.die(new Error("root loop did not complete within 10s — subagent question likely deadlocked")), + }), + ) + expect(Exit.isSuccess(exit)).toBe(true) - // Fiber completed. The subagent's question tool should have been - // rejected at least once — that is the whole Phase B contract under - // test. + // Phase B contract: the subagent's question tool must have been + // rejected at least once via the bus subscriber. expect(questionsRejected).toBeGreaterThanOrEqual(1) + + // The rejection must propagate into the subagent's tool output so + // the parent (task tool) sees the failure. Walk the root + child + // sessions and locate the question tool part — it must be in error + // state with the RejectedError message. + const children = yield* sessions.children(chat.id) + const allSessionIDs = [chat.id, ...children.map((c) => c.id)] + const questionErrors: string[] = [] + for (const sid of allSessionIDs) { + const messages = yield* sessions.messages({ sessionID: sid }) + for (const msg of messages) { + for (const part of msg.parts) { + if (part.type === "tool" && part.tool === "question" && part.state.status === "error") { + questionErrors.push(part.state.error) + } + } + } + } + expect(questionErrors.length).toBeGreaterThanOrEqual(1) + // Question.RejectedError.message => "The user dismissed this question". + expect(questionErrors.some((e) => /dismissed/i.test(e))).toBe(true) + // And the root session should settle idle (not stuck busy). const finalStatus = yield* sessionStatus.get(chat.id) expect(finalStatus.type).toBe("idle") - - yield* Fiber.await(fiber) }), { git: true, config: (url) => ({ ...providerCfg(url), agent: { general: { permission: { question: "allow" } } } }) }, ), From 6254b3b268c362734cb61f01952b41ecfb7e8747 Mon Sep 17 00:00:00 2001 From: Helge Tesdal Date: Wed, 22 Apr 2026 10:41:20 +0200 Subject: [PATCH 4/5] test(phase-c): use Effect.acquireRelease for dual subscription cleanup Addresses non-blocking review feedback. Replaces paired subscribeCallback + Effect.addFinalizer pattern with Effect.acquireRelease so acquire and release are colocated and scope-bound atomically. --- .../session/subagent-hang-regression.test.ts | 42 ++++++++++--------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/packages/opencode/test/session/subagent-hang-regression.test.ts b/packages/opencode/test/session/subagent-hang-regression.test.ts index 1e4e6d4666b4..db45289e8806 100644 --- a/packages/opencode/test/session/subagent-hang-regression.test.ts +++ b/packages/opencode/test/session/subagent-hang-regression.test.ts @@ -335,26 +335,30 @@ it.live( // reject indiscriminately — the production handler does parent-chain // lineage checks which are orthogonal to the hang contract. let questionsRejected = 0 - const unsubQuestion = yield* bus.subscribeCallback(Question.Event.Asked, (event) => - Effect.runPromise( - Effect.gen(function* () { - questionsRejected += 1 - yield* question.reject(event.properties.id) - }), - ), - ) - const unsubPermission = yield* bus.subscribeCallback(Permission.Event.Asked, (event) => - Effect.runPromise( - Effect.gen(function* () { - yield* permission.reply({ requestID: event.properties.id, reply: "reject" }) - }), - ), - ) - yield* Effect.addFinalizer(() => - Effect.sync(() => { - unsubQuestion() - unsubPermission() + yield* Effect.acquireRelease( + Effect.gen(function* () { + const unsubQuestion = yield* bus.subscribeCallback(Question.Event.Asked, (event) => + Effect.runPromise( + Effect.gen(function* () { + questionsRejected += 1 + yield* question.reject(event.properties.id) + }), + ), + ) + const unsubPermission = yield* bus.subscribeCallback(Permission.Event.Asked, (event) => + Effect.runPromise( + Effect.gen(function* () { + yield* permission.reply({ requestID: event.properties.id, reply: "reject" }) + }), + ), + ) + return { unsubQuestion, unsubPermission } }), + (handles) => + Effect.sync(() => { + handles.unsubQuestion() + handles.unsubPermission() + }), ) const fiber = yield* prompt.loop({ sessionID: chat.id }).pipe(Effect.forkChild) From 69ff8abcb7a2cdf55c52043220507791f9fb6b23 Mon Sep 17 00:00:00 2001 From: Helge Tesdal Date: Wed, 22 Apr 2026 10:54:34 +0200 Subject: [PATCH 5/5] test(phase-c): move Test A polling into Effect, drop nested runPromise MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses Copilot PR review (partial). Keeps `fiber.pollUnsafe()` — it is public API and the correct synchronous peek for early-exit detection — but lifts the poll loop out of `Effect.promise` and into `Effect.gen` so the `sessionStatus.get` call no longer needs a nested `Effect.runPromise`. Copilot's suggested `Effect.raceFirst(Effect.fnUntraced(...), ...)` snippet does not typecheck (`fnUntraced` returns a function, not an Effect), so the literal suggestion was not applied. --- .../session/subagent-hang-regression.test.ts | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/packages/opencode/test/session/subagent-hang-regression.test.ts b/packages/opencode/test/session/subagent-hang-regression.test.ts index db45289e8806..6b3b59013bd6 100644 --- a/packages/opencode/test/session/subagent-hang-regression.test.ts +++ b/packages/opencode/test/session/subagent-hang-regression.test.ts @@ -250,20 +250,23 @@ it.live( // Bounded wait for the retry transition. Budget covers: first // setup pass (cold provider state, models.dev load), one chunk // timeout (1s), plus schedule classification. If the fiber never - // transitions to retry, the hang regression is back. - const observed = yield* Effect.promise(async () => { + // transitions to retry, the hang regression is back. `pollUnsafe` is + // the public synchronous-peek API — we use it to short-circuit if + // the fiber dies early so the error cause surfaces, rather than + // timing out blindly at 8s. + const observed = yield* Effect.gen(function* () { const end = Date.now() + 8_000 while (Date.now() < end) { const exit = fiber.pollUnsafe() - if (exit) { - throw new Error(`loop exited before retry observed: ${JSON.stringify(exit)}`) - } - const snap = await Effect.runPromise(sessionStatus.get(chat.id)) + if (exit) return yield* Effect.fail(new Error(`loop exited before retry observed: ${JSON.stringify(exit)}`)) + const snap = yield* sessionStatus.get(chat.id) if (snap.type === "retry") return snap - await new Promise((done) => setTimeout(done, 25)) + yield* Effect.sleep("25 millis") } - const snap = await Effect.runPromise(sessionStatus.get(chat.id)) - throw new Error(`expected retry status within 8s; last status: ${JSON.stringify(snap)}`) + const snap = yield* sessionStatus.get(chat.id) + return yield* Effect.fail( + new Error(`expected retry status within 8s; last status: ${JSON.stringify(snap)}`), + ) }) expect(observed.type).toBe("retry")