diff --git a/src/agent/__tests__/session-store.test.ts b/src/agent/__tests__/session-store.test.ts index ff53e53..4484c15 100644 --- a/src/agent/__tests__/session-store.test.ts +++ b/src/agent/__tests__/session-store.test.ts @@ -73,6 +73,29 @@ describe("SessionStore", () => { expect(session?.status).toBe("active"); }); + test("clearAllSdkSessionIds clears every stale SDK ID", () => { + store.create("cli", "conv-1"); + store.create("slack", "conv-2"); + store.create("web", "conv-3"); + + store.updateSdkSessionId("cli:conv-1", "sdk-aaa"); + store.updateSdkSessionId("slack:conv-2", "sdk-bbb"); + // web:conv-3 has no SDK session ID + + const cleared = store.clearAllSdkSessionIds(); + expect(cleared).toBe(2); + + expect(store.getByKey("cli:conv-1")?.sdk_session_id).toBeNull(); + expect(store.getByKey("slack:conv-2")?.sdk_session_id).toBeNull(); + expect(store.getByKey("web:conv-3")?.sdk_session_id).toBeNull(); + }); + + test("clearAllSdkSessionIds returns 0 when no sessions have SDK IDs", () => { + store.create("cli", "conv-1"); + const cleared = store.clearAllSdkSessionIds(); + expect(cleared).toBe(0); + }); + test("create reactivates an expired session with the same key", () => { store.create("cli", "conv-1"); store.updateSdkSessionId("cli:conv-1", "old-sdk-id"); diff --git a/src/agent/runtime.ts b/src/agent/runtime.ts index d5aa421..c3cfc94 100644 --- a/src/agent/runtime.ts +++ b/src/agent/runtime.ts @@ -280,6 +280,22 @@ export class AgentRuntime { resultText = `Error: ${retryMsg}`; onEvent?.({ type: "error", message: retryMsg }); } + } else if (isResume) { + // Any other error during a resume attempt — the SDK session is + // likely unusable. Discard it and retry fresh. See #25. + console.log(`[runtime] Resume failed (${errorMsg}), retrying without resume: ${sessionKey}`); + this.sessionStore.clearSdkSessionId(sessionKey); + sdkSessionId = ""; + resultText = ""; + cost = emptyCost(); + emittedThinking = false; + try { + await runSdkQuery(false); + } catch (retryErr: unknown) { + const retryMsg = retryErr instanceof Error ? retryErr.message : String(retryErr); + resultText = `Error: ${retryMsg}`; + onEvent?.({ type: "error", message: retryMsg }); + } } else { resultText = `Error: ${errorMsg}`; onEvent?.({ type: "error", message: errorMsg }); diff --git a/src/agent/session-store.ts b/src/agent/session-store.ts index 88df65d..5cdb873 100644 --- a/src/agent/session-store.ts +++ b/src/agent/session-store.ts @@ -77,6 +77,21 @@ export class SessionStore { ); } + /** + * Clear all SDK session IDs on startup. + * + * SDK session IDs are process-local and do not survive restarts. + * Without this, container recreates leave stale IDs in SQLite + * (persisted volume), causing the runtime to attempt impossible + * resumes that deadlock the CLI channel. See #25. + */ + clearAllSdkSessionIds(): number { + const result = this.db.run( + "UPDATE sessions SET sdk_session_id = NULL WHERE sdk_session_id IS NOT NULL", + ); + return result.changes; + } + touch(sessionKey: string): void { this.db.run("UPDATE sessions SET last_active_at = datetime('now') WHERE session_key = ?", [sessionKey]); } diff --git a/src/index.ts b/src/index.ts index 50aed7a..c9ce064 100644 --- a/src/index.ts +++ b/src/index.ts @@ -122,6 +122,14 @@ async function main(): Promise { // agent, which means a single auth path and a single provider switch. const runtime = new AgentRuntime(config, db); + // SDK session IDs are process-local and never survive restarts. + // Clear them so the runtime does not attempt impossible resumes + // that deadlock CLI or other persistent channels. See #25. + { + const result = db.run("UPDATE sessions SET sdk_session_id = NULL WHERE sdk_session_id IS NOT NULL"); + if (result.changes > 0) console.log(`[phantom] Cleared ${result.changes} stale SDK session ID(s)`); + } + let evolution: EvolutionEngine | null = null; let evolutionCadence: EvolutionCadence | null = null; try {