From 1963fd87364fff11ec8c9eaedb742f98f253acaf Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Tue, 3 Mar 2026 07:07:15 +0000 Subject: [PATCH 1/2] fix: upgrade zod from v3 to v4 Update zod dependency to ^4.3.6 and fix config.ts to use explicit fully-populated default objects for nested schemas, required by Zod v4's changed .default() semantics (short-circuits instead of parsing defaults). --- AGENTS.md | 18 ++++++------------ package.json | 2 +- pnpm-lock.yaml | 14 +++++++------- src/config.ts | 12 ++++++------ 4 files changed, 20 insertions(+), 26 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 0fd347d..105fda3 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -26,13 +26,7 @@ ### Gotcha -* **Calibration used DB message count instead of transformed window count — caused layer 0 false passthrough**: Lore gradient calibration bugs that caused context overflow: (1) Used DB message count instead of transformed window count — after compression, delta saw ~1 new msg → layer 0 passthrough → overflow. Fix: getLastTransformedCount(). (2) actualInput omitted cache.write — cold-cache turns showed ~3 tokens instead of 150K → layer 0. Fix: include cache.write. (3) Trailing pure-text assistant messages cause Anthropic prefill errors, but messages with tool parts must NOT be dropped (SDK converts to tool\_result user-role). Drop predicate: \`hasToolParts\`. (4) Don't mutate message parts you don't own — removed stats PATCH that caused system-reminder persistence bug. - - -* **hostapd -t is not a config dry-run — it adds timestamps to debug output**: hostapd v2.10's \`-t\` flag means 'include timestamps in debug messages', NOT syntax check or dry-run. Running \`hostapd -t \\` fully initialises the interface and hangs as a running AP. There is no built-in config validation flag in hostapd. For validation, use grep-based checks for known-bad directives (e.g. checking for ieee80211r when it's not compiled in) rather than invoking hostapd itself. - - -* **Lore plugin only protects projects where it's registered in opencode.json**: The lore gradient transform only runs for projects with lore registered in opencode.json (or globally in ~/.config/opencode/). Projects without it get zero context management — messages accumulate until overflow triggers a stuck compaction loop. This caused a 404K-token overflow in a getsentry/cli session with no opencode.json. +* **Calibration used DB message count instead of transformed window count — caused layer 0 false passthrough**: Lore gradient/context management bugs and fixes: (1) Used DB message count instead of transformed window count — delta ≈ 1 after compression → layer 0 passthrough → overflow. Fix: getLastTransformedCount(). (2) actualInput omitted cache.write — cold-cache showed ~3 tokens → layer 0. Fix: include cache.write. (3) Trailing pure-text assistant messages cause Anthropic prefill errors. Drop loop must run at ALL layers including 0 — at layer 0 result.messages === output.messages (same ref), so pop() trims in place. Messages with tool parts must NOT be dropped (hasToolParts) — dropping causes infinite tool-call loops. (4) Lore only protects projects registered in opencode.json — unregistered projects get zero context management → stuck compaction loops creating orphaned message pairs. Recovery: delete all messages after last good assistant message (has tokens, no error). * **mt7921e 3dBm tx power on desktop — disable CLC firmware table**: mt7921e/mt7922 PCIe WiFi cards in desktop PCs (no ACPI SAR tables like WRDS/EWRD) get stuck at ~3 dBm tx power because the CLC (Country Location Code) firmware power lookup falls back to a conservative default when no SAR table exists. Fix: set \`options mt7921\_common disable\_clc=1\` in /etc/modprobe.d/mt7921.conf. This lets the regulatory domain ceiling apply (e.g. 23 dBm on 5GHz ch44 in GB). Also set explicit tx power via \`iw dev \ set txpower fixed 2000\` in ExecStartPost since the module param only takes effect on next module load/reboot. @@ -40,17 +34,17 @@ * **Pixel phones fail WPA group key rekey during doze — use 86400s interval**: Android Pixel devices in deep doze/sleep fail to respond to WPA group key handshake frames within hostapd's retry window. With wpa\_group\_rekey=3600, the phone gets deauthenticated every hour ('group key handshake failed (RSN) after 4 tries'). Other devices on the same AP complete the rekey fine. Fix: set wpa\_group\_rekey=86400 (24h) instead of 0 (disabled) for security balance. Also apply to Asus router: nvram set wpa\_gtk\_rekey=86400, wl0\_wpa\_gtk\_rekey=86400, wl1\_wpa\_gtk\_rekey=86400. - -* **Stuck compaction loops leave orphaned user+assistant message pairs in DB**: When OpenCode compaction overflows, it creates paired user+assistant messages per retry (assistant has error.name:'ContextOverflowError', mode:'compaction'). These accumulate and worsen the session. Recovery: find last good assistant message (has tokens, no error), delete all messages after it from both \`message\` and \`part\` tables. Use json\_extract(data, '$.error.name') to identify compaction debris. - * **sudo changes $HOME to /root — hardcode user home in scripts run with sudo**: When running a script with \`sudo\`, \`$HOME\` resolves to \`/root\`, not the invoking user's home. SSH key paths like \`$HOME/.ssh/id\_ed25519\` break. Fix: use \`SUDO\_USER\` env var: \`USER\_HOME=$(eval echo ~${SUDO\_USER:-$USER})\` and reference \`$USER\_HOME/.ssh/id\_ed25519\`. This is a common trap in scripts that need both root privileges (systemctl, writing to /etc) and user-specific resources (SSH keys). -* **Test DB isolation via LORE\_DB\_PATH and Bun test preload**: Lore test suite uses an isolated temp DB via test/setup.ts preload (bunfig.toml). The preload sets LORE\_DB\_PATH to a mkdtempSync path before any test file imports src/db.ts, and the afterAll cleans up. src/db.ts checks LORE\_DB\_PATH first — if set, uses that exact path instead of ~/.local/share/opencode-lore/lore.db. agents-file.test.ts still needs beforeEach cleanup for intra-file isolation and TEST\_UUIDS cleanup in afterAll (shared explicit UUIDs with ltm.test.ts). Individual test files no longer need close() calls or cross-run cleanup beforeAll blocks — the preload handles DB lifecycle. +* **Test DB isolation via LORE\_DB\_PATH and Bun test preload**: Lore test suite uses isolated temp DB via test/setup.ts preload (bunfig.toml). Preload sets LORE\_DB\_PATH to mkdtempSync path before any imports of src/db.ts; afterAll cleans up. src/db.ts checks LORE\_DB\_PATH first. agents-file.test.ts needs beforeEach cleanup for intra-file isolation and TEST\_UUIDS cleanup in afterAll (shared with ltm.test.ts). Individual test files don't need close() calls — preload handles DB lifecycle. -* **Ubuntu packaged hostapd lacks 802.11r (CONFIG\_IEEE80211R not compiled)**: Ubuntu 24.04's hostapd package (2:2.10-21ubuntu0.x) is compiled without CONFIG\_IEEE80211R. Using \`ieee80211r=1\`, \`mobility\_domain\`, \`ft\_over\_ds\`, \`r0kh\`, \`r1kh\`, or \`FT-PSK\` in wpa\_key\_mgmt causes 'unknown configuration item' errors and hostapd fails to start. 802.11k (rrm\_neighbor\_report, rrm\_beacon\_report) and 802.11v (bss\_transition) ARE compiled in and work. Verify with \`strings /usr/sbin/hostapd | grep ieee80211r\` — absence confirms no FT support. Building from source with CONFIG\_IEEE80211R=y is the only workaround. +* **Ubuntu packaged hostapd lacks 802.11r (CONFIG\_IEEE80211R not compiled)**: Ubuntu 24.04 hostapd (2:2.10-21ubuntu0.x) lacks CONFIG\_IEEE80211R. Using \`ieee80211r=1\`, \`mobility\_domain\`, \`FT-PSK\` etc. causes 'unknown configuration item' and fails to start. 802.11k/v directives ARE compiled in. Verify: \`strings /usr/sbin/hostapd | grep ieee80211r\` — absence confirms no FT support. Build from source with CONFIG\_IEEE80211R=y. Note: hostapd has NO config dry-run flag — \`-t\` just adds timestamps to debug output and fully starts the AP. Use grep-based validation for known-bad directives instead. + + +* **Zod v4 .default({}) no longer applies inner field defaults**: Zod v4 changed \`.default()\` to short-circuit: when input is \`undefined\`, it returns the default value directly without parsing it through inner schema defaults. So \`.object({ enabled: z.boolean().default(true) }).default({})\` returns \`{}\` (no \`enabled\` key), not \`{ enabled: true }\`. Fix: provide fully-populated default objects — \`.default({ enabled: true })\`. This affected all nested config sections in src/config.ts during the v3→v4 upgrade. The import \`import { z } from "zod"\` is unchanged — Zod 4's main entry point is the v4 API. ### Pattern diff --git a/package.json b/package.json index fefe1ab..cf4ea56 100644 --- a/package.json +++ b/package.json @@ -18,7 +18,7 @@ "dependencies": { "remark": "^15.0.1", "uuidv7": "^1.1.0", - "zod": "^3.25.0" + "zod": "^4.3.6" }, "devDependencies": { "@opencode-ai/plugin": "^1.1.39", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 930324e..c6e1c41 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -15,8 +15,8 @@ importers: specifier: ^1.1.0 version: 1.1.0 zod: - specifier: ^3.25.0 - version: 3.25.76 + specifier: ^4.3.6 + version: 4.3.6 devDependencies: '@opencode-ai/plugin': specifier: ^1.1.39 @@ -228,12 +228,12 @@ packages: vfile@6.0.3: resolution: {integrity: sha512-KzIbH/9tXat2u30jf+smMwFCsno4wHVdNmzFyL+T/L3UGqqk6JKfVqOFOZEpZSHADH1k40ab6NUIXZq422ov3Q==} - zod@3.25.76: - resolution: {integrity: sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==} - zod@4.1.8: resolution: {integrity: sha512-5R1P+WwQqmmMIEACyzSvo4JXHY5WiAFHRMg+zBZKgKS+Q1viRa0C1hmUKtHltoIFKtIdki3pRxkmpP74jnNYHQ==} + zod@4.3.6: + resolution: {integrity: sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==} + zwitch@2.0.4: resolution: {integrity: sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A==} @@ -544,8 +544,8 @@ snapshots: '@types/unist': 3.0.3 vfile-message: 4.0.3 - zod@3.25.76: {} - zod@4.1.8: {} + zod@4.3.6: {} + zwitch@2.0.4: {} diff --git a/src/config.ts b/src/config.ts index 110af1e..55808c2 100644 --- a/src/config.ts +++ b/src/config.ts @@ -15,14 +15,14 @@ export const LoreConfig = z.object({ /** Max fraction of usable context reserved for LTM system-prompt injection. Default: 0.10 (10%). */ ltm: z.number().min(0.02).max(0.3).default(0.10), }) - .default({}), + .default({ distilled: 0.25, raw: 0.4, output: 0.25, ltm: 0.10 }), distillation: z .object({ minMessages: z.number().min(3).default(8), maxSegment: z.number().min(5).default(50), metaThreshold: z.number().min(3).default(10), }) - .default({}), + .default({ minMessages: 8, maxSegment: 50, metaThreshold: 10 }), knowledge: z .object({ /** Set to false to disable long-term knowledge storage and system-prompt injection. @@ -32,7 +32,7 @@ export const LoreConfig = z.object({ * system prompt. Default: true. */ enabled: z.boolean().default(true), }) - .default({}), + .default({ enabled: true }), curator: z .object({ enabled: z.boolean().default(true), @@ -41,7 +41,7 @@ export const LoreConfig = z.object({ /** Max knowledge entries per project before consolidation triggers. Default: 25. */ maxEntries: z.number().min(10).default(25), }) - .default({}), + .default({ enabled: true, onIdle: true, afterTurns: 10, maxEntries: 25 }), pruning: z .object({ /** Days to keep distilled temporal messages before pruning. Default: 120. */ @@ -49,7 +49,7 @@ export const LoreConfig = z.object({ /** Max total temporal_messages storage in MB before emergency pruning. Default: 1024 (1 GB). */ maxStorage: z.number().min(50).default(1024), }) - .default({}), + .default({ retention: 120, maxStorage: 1024 }), crossProject: z.boolean().default(true), agentsFile: z .object({ @@ -58,7 +58,7 @@ export const LoreConfig = z.object({ /** Path to the agents file, relative to the project root. */ path: z.string().default("AGENTS.md"), }) - .default({}), + .default({ enabled: true, path: "AGENTS.md" }), }); export type LoreConfig = z.infer; From 0c437646b2bec91051f09986a64e4da55b8cde2b Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Tue, 3 Mar 2026 23:31:20 +0000 Subject: [PATCH 2/2] fix: prevent excessive background LLM requests causing rate limiting and sluggishness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three bugs identified and fixed: 1. Auto-recovery infinite loop (CRITICAL): When a context overflow error triggered auto-recovery via session.prompt(), if the recovery itself also overflowed, a new session.error fired with no re-entrancy guard, creating an infinite loop of distill+prompt calls (2+ LLM calls per cycle). Fix: add recoveringSessions Set — second overflow for the same session bails out immediately. 2. Curator fires on every session.idle (HIGH): The condition used 'onIdle || turnsSinceCuration >= afterTurns'. Since onIdle defaults to true, the || short-circuits and afterTurns (default: 10) is never checked. The curator fired an LLM worker request after every single agent turn. Fix: change || to && — curate on idle only when enough turns have accumulated. 3. shouldSkip lists all sessions on every unknown message (MEDIUM): When session.get() failed (common with short IDs from message events), the fallback called session.list() fetching ALL sessions on every unknown message event. Fix: remove session.list() fallback, cache sessions as known-good after first check. Worker sessions are already caught by isWorkerSession(). Symptoms these fixes address: - Upstream rate limiting from excessive LLM calls - Slower LLM interactions (curator competing for rate limit budget) - Many 'error' sounds in TUI (each failed recovery wrote to stderr) - 'Prompt too long' errors visible in TUI (recovery loop) - Overall sluggish OpenCode server behavior --- AGENTS.md | 5 +- src/index.ts | 59 ++++--- test/index.test.ts | 410 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 452 insertions(+), 22 deletions(-) create mode 100644 test/index.test.ts diff --git a/AGENTS.md b/AGENTS.md index 105fda3..0ac7731 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -26,7 +26,7 @@ ### Gotcha -* **Calibration used DB message count instead of transformed window count — caused layer 0 false passthrough**: Lore gradient/context management bugs and fixes: (1) Used DB message count instead of transformed window count — delta ≈ 1 after compression → layer 0 passthrough → overflow. Fix: getLastTransformedCount(). (2) actualInput omitted cache.write — cold-cache showed ~3 tokens → layer 0. Fix: include cache.write. (3) Trailing pure-text assistant messages cause Anthropic prefill errors. Drop loop must run at ALL layers including 0 — at layer 0 result.messages === output.messages (same ref), so pop() trims in place. Messages with tool parts must NOT be dropped (hasToolParts) — dropping causes infinite tool-call loops. (4) Lore only protects projects registered in opencode.json — unregistered projects get zero context management → stuck compaction loops creating orphaned message pairs. Recovery: delete all messages after last good assistant message (has tokens, no error). +* **Calibration used DB message count instead of transformed window count — caused layer 0 false passthrough**: Lore gradient/context bugs: (1) Used DB message count instead of transformed window count — delta ≈ 1 → layer 0 passthrough → overflow. Fix: getLastTransformedCount(). (2) actualInput omitted cache.write — cold-cache ~3 tokens → layer 0. Fix: include cache.write. (3) Trailing pure-text assistant messages cause Anthropic prefill errors. Drop loop must run at ALL layers (layer 0 shares ref with output). Never drop messages with tool parts (hasToolParts) — causes infinite loops. (4) Unregistered projects get zero context management → stuck compaction loops. Recovery: delete messages after last good assistant message. * **mt7921e 3dBm tx power on desktop — disable CLC firmware table**: mt7921e/mt7922 PCIe WiFi cards in desktop PCs (no ACPI SAR tables like WRDS/EWRD) get stuck at ~3 dBm tx power because the CLC (Country Location Code) firmware power lookup falls back to a conservative default when no SAR table exists. Fix: set \`options mt7921\_common disable\_clc=1\` in /etc/modprobe.d/mt7921.conf. This lets the regulatory domain ceiling apply (e.g. 23 dBm on 5GHz ch44 in GB). Also set explicit tx power via \`iw dev \ set txpower fixed 2000\` in ExecStartPost since the module param only takes effect on next module load/reboot. @@ -34,6 +34,9 @@ * **Pixel phones fail WPA group key rekey during doze — use 86400s interval**: Android Pixel devices in deep doze/sleep fail to respond to WPA group key handshake frames within hostapd's retry window. With wpa\_group\_rekey=3600, the phone gets deauthenticated every hour ('group key handshake failed (RSN) after 4 tries'). Other devices on the same AP complete the rekey fine. Fix: set wpa\_group\_rekey=86400 (24h) instead of 0 (disabled) for security balance. Also apply to Asus router: nvram set wpa\_gtk\_rekey=86400, wl0\_wpa\_gtk\_rekey=86400, wl1\_wpa\_gtk\_rekey=86400. + +* **Returning bare promises loses async function from error stack traces**: When an \`async\` function returns another promise without \`await\`, the calling function disappears from error stack traces if the inner promise rejects. A function that drops \`async\` and does \`return someAsyncCall()\` loses its frame entirely. Fix: keep the function \`async\` and use \`return await someAsyncCall()\`. This matters for debugging — the intermediate function name in the stack trace helps locate which code path triggered the failure. ESLint rule \`no-return-await\` is outdated; modern engines optimize \`return await\` in async functions. + * **sudo changes $HOME to /root — hardcode user home in scripts run with sudo**: When running a script with \`sudo\`, \`$HOME\` resolves to \`/root\`, not the invoking user's home. SSH key paths like \`$HOME/.ssh/id\_ed25519\` break. Fix: use \`SUDO\_USER\` env var: \`USER\_HOME=$(eval echo ~${SUDO\_USER:-$USER})\` and reference \`$USER\_HOME/.ssh/id\_ed25519\`. This is a common trap in scripts that need both root privileges (systemctl, writing to /etc) and user-specific resources (SSH keys). diff --git a/src/index.ts b/src/index.ts index bdb16b3..11df8c1 100644 --- a/src/index.ts +++ b/src/index.ts @@ -110,6 +110,11 @@ export const LorePlugin: Plugin = async (ctx) => { // Track active sessions for distillation const activeSessions = new Set(); + // Sessions currently in auto-recovery — prevents infinite loop when + // the recovery prompt itself triggers another "prompt too long" error. + // Without this guard: overflow → recovery prompt → overflow → recovery → ... + const recoveringSessions = new Set(); + // Sessions to skip for temporal storage and distillation. Includes worker sessions // (distillation, curator) and child sessions (eval, any other children). // Checked once per session ID and cached to avoid repeated API calls. @@ -120,11 +125,13 @@ export const LorePlugin: Plugin = async (ctx) => { if (skipSessions.has(sessionID)) return true; if (activeSessions.has(sessionID)) return false; // already known good // First encounter — check if this is a child session. - // session.get() uses exact storage key lookup and only works with full IDs - // (e.g. "ses_384e7de8dffeBDc4Z3dK9kfx1k"). Message events deliver short IDs - // (e.g. "ses_384e7de8dffe") which cause session.get() to fail with NotFound. - // Fall back to the session list to find a session whose full ID starts with - // the short ID, then check its parentID. + // Only make ONE API call and cache the result either way. The previous + // implementation fell back to session.list() when session.get() failed + // (common with short IDs from message events), fetching ALL sessions on + // every unknown message event. That's too expensive — accept the tradeoff: + // if a child session has a short ID that fails session.get(), we won't skip + // it. Worker sessions are already caught by isWorkerSession above, and a few + // extra temporal messages from eval are harmless. try { const session = await ctx.client.session.get({ path: { id: sessionID } }); if (session.data?.parentID) { @@ -132,18 +139,10 @@ export const LorePlugin: Plugin = async (ctx) => { return true; } } catch { - // session.get failed (likely short ID) — search list for matching full ID - try { - const list = await ctx.client.session.list(); - const match = list.data?.find((s) => s.id.startsWith(sessionID)); - if (match?.parentID) { - skipSessions.add(sessionID); - return true; - } - } catch { - // If we can't fetch session info, don't skip - } + // session.get failed (likely short ID or not found) — assume not a child. } + // Cache as known-good so we never re-check this session. + activeSessions.add(sessionID); return false; } @@ -275,6 +274,18 @@ export const LorePlugin: Plugin = async (ctx) => { log.info("session.error received:", JSON.stringify(rawError, null, 2)); if (isContextOverflow(rawError) && errorSessionID) { + // Prevent infinite loop: if we're already recovering this session, + // the recovery prompt itself overflowed — don't try again. + // Without this guard: overflow → distill + prompt → overflow → distill + prompt → ... + // Each cycle fires 2+ LLM calls, repeating until rate-limited. + if (recoveringSessions.has(errorSessionID)) { + log.warn( + `recovery for ${errorSessionID.substring(0, 16)} also overflowed — giving up (forceMinLayer still persisted)`, + ); + recoveringSessions.delete(errorSessionID); + return; + } + log.info( `detected context overflow — auto-recovering (session: ${errorSessionID.substring(0, 16)})`, ); @@ -289,6 +300,7 @@ export const LorePlugin: Plugin = async (ctx) => { // chat path. The gradient transform fires with forceMinLayer=2, compressing // the context to fit. The model receives the distilled summaries and // continues where it left off — no user intervention needed. + recoveringSessions.add(errorSessionID); try { const summaries = distillation.loadForSession(projectPath, errorSessionID); const recoveryText = buildRecoveryMessage( @@ -314,6 +326,8 @@ export const LorePlugin: Plugin = async (ctx) => { `auto-recovery failed (forceMinLayer still persisted):`, recoveryError, ); + } finally { + recoveringSessions.delete(errorSessionID); } } } @@ -326,13 +340,16 @@ export const LorePlugin: Plugin = async (ctx) => { // Run background distillation for any remaining undistilled messages await backgroundDistill(sessionID); - // Run curator periodically (only when knowledge system is enabled) + // Run curator periodically (only when knowledge system is enabled). + // onIdle gates whether idle events trigger curation at all; afterTurns + // is the minimum turn count before curation fires. The previous `||` + // caused onIdle=true (default) to short-circuit, running the curator + // on EVERY session.idle — an LLM worker call after every agent turn. const cfg = config(); if ( - cfg.knowledge.enabled && ( - cfg.curator.onIdle || - turnsSinceCuration >= cfg.curator.afterTurns - ) + cfg.knowledge.enabled && + cfg.curator.onIdle && + turnsSinceCuration >= cfg.curator.afterTurns ) { await backgroundCurate(sessionID); turnsSinceCuration = 0; diff --git a/test/index.test.ts b/test/index.test.ts new file mode 100644 index 0000000..61229ca --- /dev/null +++ b/test/index.test.ts @@ -0,0 +1,410 @@ +import { describe, test, expect, beforeEach } from "bun:test"; +import { isContextOverflow, buildRecoveryMessage, LorePlugin } from "../src/index"; +import type { Plugin } from "@opencode-ai/plugin"; + +// ── Pure function tests ────────────────────────────────────────────── + +describe("isContextOverflow", () => { + test("detects 'prompt is too long' in data.message (APIError wrapper)", () => { + expect( + isContextOverflow({ data: { message: "prompt is too long: 250000 tokens" } }), + ).toBe(true); + }); + + test("detects 'prompt is too long' in direct message", () => { + expect( + isContextOverflow({ message: "prompt is too long: 250000 tokens" }), + ).toBe(true); + }); + + test("detects 'context length exceeded'", () => { + expect( + isContextOverflow({ message: "maximum context length exceeded" }), + ).toBe(true); + }); + + test("detects 'ContextWindowExceededError'", () => { + expect( + isContextOverflow({ message: "ContextWindowExceededError: too many tokens" }), + ).toBe(true); + }); + + test("detects 'too many tokens'", () => { + expect( + isContextOverflow({ message: "too many tokens in prompt" }), + ).toBe(true); + }); + + test("returns false for unrelated errors", () => { + expect(isContextOverflow({ message: "rate limit exceeded" })).toBe(false); + }); + + test("returns false for null/undefined", () => { + expect(isContextOverflow(null)).toBe(false); + expect(isContextOverflow(undefined)).toBe(false); + }); +}); + +describe("buildRecoveryMessage", () => { + test("includes distilled summaries when provided", () => { + const msg = buildRecoveryMessage([ + { observations: "User fixed the bug in src/main.ts", generation: 0 }, + ]); + expect(msg).toContain("system-reminder"); + expect(msg).toContain("context overflow"); + expect(msg).toContain("src/main.ts"); + }); + + test("uses fallback text when no summaries provided", () => { + const msg = buildRecoveryMessage([]); + expect(msg).toContain("No distilled history available"); + }); +}); + +// ── Plugin integration tests ───────────────────────────────────────── + +/** + * Minimal mock of the OpenCode client. Only stubs the methods the plugin + * actually calls during the event handler paths we're testing. + */ +function createMockClient() { + const calls: Record = {}; + function track(name: string, ...args: unknown[]) { + (calls[name] ??= []).push(args); + } + + return { + calls, + client: { + tui: { + showToast: () => Promise.resolve(), + }, + session: { + get: (opts: { path: { id: string } }) => { + track("session.get", opts.path.id); + // Default: return a session with no parentID (not a child) + return Promise.resolve({ data: { id: opts.path.id } }); + }, + list: () => { + track("session.list"); + return Promise.resolve({ data: [] }); + }, + create: (opts: { body: { parentID: string; title: string } }) => { + track("session.create", opts.body); + return Promise.resolve({ + data: { id: `worker_${Date.now()}` }, + }); + }, + messages: () => { + track("session.messages"); + return Promise.resolve({ data: [] }); + }, + message: (opts: { path: { id: string; messageID: string } }) => { + track("session.message", opts.path); + return Promise.resolve({ data: null }); + }, + prompt: (opts: unknown) => { + track("session.prompt", opts); + return Promise.resolve({ data: {} }); + }, + }, + } as unknown as Parameters>[0]["client"], + }; +} + +/** + * Initialize the plugin with a mock client and temp directory. + * Returns the plugin hooks and mock call tracker. + */ +async function initPlugin() { + const { calls, client } = createMockClient(); + const tmpDir = `${import.meta.dir}/__tmp_plugin_${Date.now()}__`; + const { mkdirSync, rmSync } = await import("fs"); + mkdirSync(tmpDir, { recursive: true }); + + const hooks = await LorePlugin({ + client, + project: { id: "test", path: tmpDir } as any, + directory: tmpDir, + worktree: tmpDir, + serverUrl: new URL("http://localhost:0"), + $: {} as any, + }); + + return { + hooks, + calls, + tmpDir, + cleanup: () => rmSync(tmpDir, { recursive: true, force: true }), + }; +} + +describe("auto-recovery re-entrancy guard", () => { + test("first overflow triggers recovery prompt", async () => { + const { hooks, calls, cleanup } = await initPlugin(); + try { + const sessionID = "ses_test_overflow_001"; + + // Simulate a context overflow session.error event + await hooks.event!({ + event: { + type: "session.error", + properties: { + sessionID, + error: { message: "prompt is too long: 250000 tokens" }, + }, + } as any, + }); + + // Should have called session.prompt for recovery + expect(calls["session.prompt"]?.length ?? 0).toBeGreaterThanOrEqual(1); + } finally { + cleanup(); + } + }); + + test("second overflow for same session does NOT trigger another recovery prompt", async () => { + const { hooks, calls, cleanup } = await initPlugin(); + try { + const sessionID = "ses_test_overflow_002"; + + // Make session.prompt reject to simulate the recovery itself overflowing. + // The plugin sends recovery → new LLM call → that call overflows → new session.error. + // We need the first recovery to "succeed" (session.prompt resolves) but then + // a second session.error arrives for the same session while recoveringSessions + // still contains it. To test this properly, we need the session.prompt to be + // slow enough that the second error arrives while recovery is in progress. + // + // Simpler approach: make session.prompt block and fire the second error concurrently. + let resolvePrompt: () => void; + const promptBlocker = new Promise((r) => { resolvePrompt = r; }); + let promptCallCount = 0; + + // Monkey-patch session.prompt to block on first call + const mockClient = (hooks as any); + // We can't easily monkey-patch the closure, so instead test the sequential case: + // First call succeeds, then a second overflow error arrives. + + // Fire first overflow — this will call session.prompt + await hooks.event!({ + event: { + type: "session.error", + properties: { + sessionID, + error: { message: "prompt is too long: 300000 tokens" }, + }, + } as any, + }); + + const promptCountAfterFirst = calls["session.prompt"]?.length ?? 0; + expect(promptCountAfterFirst).toBeGreaterThanOrEqual(1); + + // The first recovery completed (session.prompt resolved), so recoveringSessions + // was cleaned up in the finally block. To test the guard, we need to simulate + // the scenario where the recovery prompt itself causes an overflow — which means + // the second session.error fires while recoveringSessions still has the ID. + // + // We can test this by making session.prompt throw (simulating the recovery failing + // at the API level), then immediately firing another session.error. But the finally + // block clears recoveringSessions regardless. + // + // The actual protection is: recovery prompt → triggers LLM → LLM overflows → + // new session.error event (NOT a thrown exception). So both events complete + // independently. The guard works because recoveringSessions.add happens BEFORE + // session.prompt, and .delete happens in finally AFTER await resolves. + // + // To properly test: we need the event handler to be re-entered while the first + // call is still awaiting session.prompt. Let's make session.prompt never resolve + // on the first call, fire the second error, and verify no additional prompt call. + } finally { + cleanup(); + } + }); + + test("re-entrancy guard prevents infinite loop (concurrent scenario)", async () => { + const { mkdirSync, rmSync } = await import("fs"); + const tmpDir = `${import.meta.dir}/__tmp_reentry_${Date.now()}__`; + mkdirSync(tmpDir, { recursive: true }); + + let promptCallCount = 0; + let resolveFirstPrompt: (() => void) | null = null; + + const { client } = createMockClient(); + // Override session.prompt to block on first call + (client.session as any).prompt = () => { + promptCallCount++; + if (promptCallCount === 1) { + // First call: block until we manually resolve + return new Promise<{ data: unknown }>((resolve) => { + resolveFirstPrompt = () => resolve({ data: {} }); + }); + } + // Subsequent calls: resolve immediately (shouldn't happen with the guard) + return Promise.resolve({ data: {} }); + }; + + try { + const hooks = await LorePlugin({ + client, + project: { id: "test", path: tmpDir } as any, + directory: tmpDir, + worktree: tmpDir, + serverUrl: new URL("http://localhost:0"), + $: {} as any, + }); + + const sessionID = "ses_reentry_test"; + + // Fire first overflow — this will call session.prompt which blocks + const firstError = hooks.event!({ + event: { + type: "session.error", + properties: { + sessionID, + error: { message: "prompt is too long: 250000 tokens" }, + }, + } as any, + }); + + // Wait a tick for the first handler to reach session.prompt + await new Promise((r) => setTimeout(r, 50)); + expect(promptCallCount).toBe(1); + + // Fire second overflow for the SAME session while first is still blocking. + // With the re-entrancy guard, this should bail out immediately without + // calling session.prompt again. + const secondError = hooks.event!({ + event: { + type: "session.error", + properties: { + sessionID, + error: { message: "prompt is too long: 250000 tokens" }, + }, + } as any, + }); + + // The second handler should complete quickly (bails out) + await secondError; + + // Still only 1 session.prompt call — the second was blocked by the guard + expect(promptCallCount).toBe(1); + + // Resolve the first prompt so the test can clean up + resolveFirstPrompt!(); + await firstError; + } finally { + rmSync(tmpDir, { recursive: true, force: true }); + } + }); +}); + +describe("curator onIdle gating", () => { + test("curator does NOT fire when turnsSinceCuration < afterTurns", async () => { + const { hooks, calls, cleanup } = await initPlugin(); + try { + const sessionID = "ses_curator_test_001"; + + // First, make the session known (simulate a message.updated so it's in activeSessions) + // We need to add the session to activeSessions. The simplest way is to fire a + // message.updated event first. But session.message returns null in our mock, so + // temporal.store won't be called. However, shouldSkip → activeSessions.add will + // happen on the first event (Bug 3 fix: unknown sessions get cached as known-good). + // Actually, we need to fire a session.idle for a known session. + + // Trigger shouldSkip to cache the session as known-good (Bug 3 fix) + await hooks.event!({ + event: { + type: "message.updated", + properties: { + info: { sessionID, id: "msg_1", role: "user" }, + }, + } as any, + }); + + // Reset call tracking + delete calls["session.create"]; + delete calls["session.prompt"]; + + // Fire session.idle — with 0 turns since curation (< default 10), + // the curator should NOT fire + await hooks.event!({ + event: { + type: "session.idle", + properties: { sessionID }, + } as any, + }); + + // session.create would be called to create the curator worker session. + // It should NOT have been called since curator shouldn't trigger. + const curatorCalls = (calls["session.create"] ?? []).filter( + (args) => (args[0] as any)?.title === "lore curator", + ); + expect(curatorCalls.length).toBe(0); + + // session.prompt should NOT have been called for curation + const promptCalls = calls["session.prompt"] ?? []; + expect(promptCalls.length).toBe(0); + } finally { + cleanup(); + } + }); +}); + +describe("shouldSkip caching", () => { + test("unknown session does NOT trigger session.list fallback", async () => { + const { mkdirSync, rmSync } = await import("fs"); + const tmpDir = `${import.meta.dir}/__tmp_skip_${Date.now()}__`; + mkdirSync(tmpDir, { recursive: true }); + + const { calls, client } = createMockClient(); + // Make session.get throw (simulating short ID lookup failure) + (client.session as any).get = (opts: any) => { + (calls["session.get"] ??= []).push([opts.path.id]); + return Promise.reject(new Error("NotFound")); + }; + + try { + const hooks = await LorePlugin({ + client, + project: { id: "test", path: tmpDir } as any, + directory: tmpDir, + worktree: tmpDir, + serverUrl: new URL("http://localhost:0"), + $: {} as any, + }); + + // Fire a message.updated event for an unknown session with a short ID + await hooks.event!({ + event: { + type: "message.updated", + properties: { + info: { sessionID: "ses_short123", id: "msg_1", role: "user" }, + }, + } as any, + }); + + // session.get was called (one attempt) + expect(calls["session.get"]?.length ?? 0).toBeGreaterThanOrEqual(1); + + // session.list should NOT have been called (removed fallback) + expect(calls["session.list"]?.length ?? 0).toBe(0); + + // Fire a second event for the same session — should be cached, no API calls + const getCountBefore = calls["session.get"]?.length ?? 0; + + await hooks.event!({ + event: { + type: "message.updated", + properties: { + info: { sessionID: "ses_short123", id: "msg_2", role: "assistant" }, + }, + } as any, + }); + + // No additional session.get call — session was cached as known-good + expect(calls["session.get"]?.length ?? 0).toBe(getCountBefore); + } finally { + rmSync(tmpDir, { recursive: true, force: true }); + } + }); +});