diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ece166b..c39022e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,25 +3,25 @@ name: CI on: push: branches: [main, dev] + # Run on every PR regardless of base branch. The `branches` filter on + # pull_request only matches base, so stacked / long-lived branches + # (e.g. `optimizations`) would otherwise skip the whole CI job. pull_request: - branches: [main, dev] permissions: contents: read pull-requests: write jobs: - test: - name: Typecheck and Test + duplication: + # Code-duplication regression guard. Pulled out of the `test` job so + # the PR checks table shows a dedicated pass/fail row — reviewers see + # at a glance whether the change introduced duplicated code without + # having to open the combined "Typecheck and Test" log. + name: Duplication check runs-on: ubuntu-latest - steps: - uses: actions/checkout@v4 - with: - # Full history so the "Build PR coverage comment" step can do - # `git diff origin/...HEAD` to detect touched src/ files. - # Default shallow checkout (depth=1) produces "no merge base". - fetch-depth: 0 - name: Setup Node.js uses: actions/setup-node@v4 @@ -31,10 +31,7 @@ jobs: - name: Install dependencies run: npm install - - name: Typecheck - run: npm run typecheck - - - name: Duplication check (jscpd) + - name: Run jscpd # Threshold 7% is the current baseline (see .jscpd.json). The job # fails if a future change pushes duplication above it, so the # number is a regression guard — reviewers can see the exact @@ -49,6 +46,29 @@ jobs: path: jscpd-report/ if-no-files-found: ignore + test: + name: Typecheck and Test + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + # Full history so the "Build PR coverage comment" step can do + # `git diff origin/...HEAD` to detect touched src/ files. + # Default shallow checkout (depth=1) produces "no merge base". + fetch-depth: 0 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: 22 + + - name: Install dependencies + run: npm install + + - name: Typecheck + run: npm run typecheck + - name: Run tests with coverage # Per-file 80% thresholds for PR #60 files are declared in # vitest.config.ts under `coverage.thresholds`. Vitest exits non-zero diff --git a/claude-code/bundle/capture.js b/claude-code/bundle/capture.js index 3b5a215..50551da 100755 --- a/claude-code/bundle/capture.js +++ b/claude-code/bundle/capture.js @@ -55,6 +55,9 @@ function loadConfig() { // dist/src/deeplake-api.js import { randomUUID } from "node:crypto"; +import { existsSync as existsSync2, mkdirSync, readFileSync as readFileSync2, writeFileSync } from "node:fs"; +import { join as join3 } from "node:path"; +import { tmpdir } from "node:os"; // dist/src/utils/debug.js import { appendFileSync } from "node:fs"; @@ -79,27 +82,48 @@ function sqlStr(value) { // dist/src/deeplake-api.js var log2 = (msg) => log("sdk", msg); -var TRACE_SQL = (process.env.HIVEMIND_TRACE_SQL ?? process.env.DEEPLAKE_TRACE_SQL) === "1" || (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; -var DEBUG_FILE_LOG = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; function summarizeSql(sql, maxLen = 220) { const compact = sql.replace(/\s+/g, " ").trim(); return compact.length > maxLen ? `${compact.slice(0, maxLen)}...` : compact; } function traceSql(msg) { - if (!TRACE_SQL) + const traceEnabled = (process.env.HIVEMIND_TRACE_SQL ?? process.env.DEEPLAKE_TRACE_SQL) === "1" || (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; + if (!traceEnabled) return; process.stderr.write(`[deeplake-sql] ${msg} `); - if (DEBUG_FILE_LOG) + const debugFileLog = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; + if (debugFileLog) log2(msg); } var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; var MAX_CONCURRENCY = 5; +var QUERY_TIMEOUT_MS = Number(process.env["HIVEMIND_QUERY_TIMEOUT_MS"] ?? process.env["DEEPLAKE_QUERY_TIMEOUT_MS"] ?? 1e4); +var INDEX_MARKER_TTL_MS = Number(process.env["HIVEMIND_INDEX_MARKER_TTL_MS"] ?? 6 * 60 * 6e4); function sleep(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } +function isTimeoutError(error) { + const name = error instanceof Error ? error.name.toLowerCase() : ""; + const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase(); + return name.includes("timeout") || name === "aborterror" || message.includes("timeout") || message.includes("timed out"); +} +function isDuplicateIndexError(error) { + const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase(); + return message.includes("duplicate key value violates unique constraint") || message.includes("pg_class_relname_nsp_index") || message.includes("already exists"); +} +function isSessionInsertQuery(sql) { + return /^\s*insert\s+into\s+"[^"]+"\s*\(\s*id\s*,\s*path\s*,\s*filename\s*,\s*message\s*,/i.test(sql); +} +function isTransientHtml403(text) { + const body = text.toLowerCase(); + return body.includes(" Object.fromEntries(raw.columns.map((col, i) => [col, row[i]]))); } const text = await resp.text().catch(() => ""); - if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { + const retryable403 = isSessionInsertQuery(sql) && (resp.status === 401 || resp.status === 403 && (text.length === 0 || isTransientHtml403(text))); + if (attempt < MAX_RETRIES && (RETRYABLE_CODES.has(resp.status) || retryable403)) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`); await sleep(delay); @@ -252,8 +284,61 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + buildLookupIndexName(table, suffix) { + return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); + } + getLookupIndexMarkerPath(table, suffix) { + const markerKey = [ + this.workspaceId, + this.orgId, + table, + suffix + ].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_"); + return join3(getIndexMarkerDir(), `${markerKey}.json`); + } + hasFreshLookupIndexMarker(table, suffix) { + const markerPath = this.getLookupIndexMarkerPath(table, suffix); + if (!existsSync2(markerPath)) + return false; + try { + const raw = JSON.parse(readFileSync2(markerPath, "utf-8")); + const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN; + if (!Number.isFinite(updatedAt) || Date.now() - updatedAt > INDEX_MARKER_TTL_MS) + return false; + return true; + } catch { + return false; + } + } + markLookupIndexReady(table, suffix) { + mkdirSync(getIndexMarkerDir(), { recursive: true }); + writeFileSync(this.getLookupIndexMarkerPath(table, suffix), JSON.stringify({ updatedAt: (/* @__PURE__ */ new Date()).toISOString() }), "utf-8"); + } + async ensureLookupIndex(table, suffix, columnsSql) { + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + const indexName = this.buildLookupIndexName(table, suffix); + try { + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" ${columnsSql}`); + this.markLookupIndexReady(table, suffix); + } catch (e) { + if (isDuplicateIndexError(e)) { + this.markLookupIndexReady(table, suffix); + return; + } + log2(`index "${indexName}" skipped: ${e.message}`); + } + } /** List all tables in the workspace (with retry). */ - async listTables() { + async listTables(forceRefresh = false) { + if (!forceRefresh && this._tablesCache) + return [...this._tablesCache]; + const { tables, cacheable } = await this._fetchTables(); + if (cacheable) + this._tablesCache = [...tables]; + return tables; + } + async _fetchTables() { for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { try { const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, { @@ -264,22 +349,25 @@ var DeeplakeApi = class { }); if (resp.ok) { const data = await resp.json(); - return (data.tables ?? []).map((t) => t.table_name); + return { + tables: (data.tables ?? []).map((t) => t.table_name), + cacheable: true + }; } if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { await sleep(BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200); continue; } - return []; + return { tables: [], cacheable: false }; } catch { if (attempt < MAX_RETRIES) { await sleep(BASE_DELAY_MS * Math.pow(2, attempt)); continue; } - return []; + return { tables: [], cacheable: false }; } } - return []; + return { tables: [], cacheable: false }; } /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */ async ensureTable(name) { @@ -289,6 +377,8 @@ var DeeplakeApi = class { log2(`table "${tbl}" not found, creating`); await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); + if (!tables.includes(tbl)) + this._tablesCache = [...tables, tbl]; } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ @@ -298,7 +388,10 @@ var DeeplakeApi = class { log2(`table "${name}" not found, creating`); await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${name}" created`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; } + await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); } }; @@ -309,37 +402,37 @@ function buildSessionPath(config, sessionId) { } // dist/src/hooks/summary-state.js -import { readFileSync as readFileSync2, writeFileSync, writeSync, mkdirSync, renameSync, existsSync as existsSync2, unlinkSync, openSync, closeSync } from "node:fs"; +import { readFileSync as readFileSync3, writeFileSync as writeFileSync2, writeSync, mkdirSync as mkdirSync2, renameSync, existsSync as existsSync3, unlinkSync, openSync, closeSync } from "node:fs"; import { homedir as homedir3 } from "node:os"; -import { join as join3 } from "node:path"; +import { join as join4 } from "node:path"; var dlog = (msg) => log("summary-state", msg); -var STATE_DIR = join3(homedir3(), ".claude", "hooks", "summary-state"); +var STATE_DIR = join4(homedir3(), ".claude", "hooks", "summary-state"); var YIELD_BUF = new Int32Array(new SharedArrayBuffer(4)); function statePath(sessionId) { - return join3(STATE_DIR, `${sessionId}.json`); + return join4(STATE_DIR, `${sessionId}.json`); } function lockPath(sessionId) { - return join3(STATE_DIR, `${sessionId}.lock`); + return join4(STATE_DIR, `${sessionId}.lock`); } function readState(sessionId) { const p = statePath(sessionId); - if (!existsSync2(p)) + if (!existsSync3(p)) return null; try { - return JSON.parse(readFileSync2(p, "utf-8")); + return JSON.parse(readFileSync3(p, "utf-8")); } catch { return null; } } function writeState(sessionId, state) { - mkdirSync(STATE_DIR, { recursive: true }); + mkdirSync2(STATE_DIR, { recursive: true }); const p = statePath(sessionId); const tmp = `${p}.${process.pid}.${Date.now()}.tmp`; - writeFileSync(tmp, JSON.stringify(state)); + writeFileSync2(tmp, JSON.stringify(state)); renameSync(tmp, p); } function withRmwLock(sessionId, fn) { - mkdirSync(STATE_DIR, { recursive: true }); + mkdirSync2(STATE_DIR, { recursive: true }); const rmwLock = statePath(sessionId) + ".rmw"; const deadline = Date.now() + 2e3; let fd = null; @@ -401,11 +494,11 @@ function shouldTrigger(state, cfg, now = Date.now()) { return false; } function tryAcquireLock(sessionId, maxAgeMs = 10 * 60 * 1e3) { - mkdirSync(STATE_DIR, { recursive: true }); + mkdirSync2(STATE_DIR, { recursive: true }); const p = lockPath(sessionId); - if (existsSync2(p)) { + if (existsSync3(p)) { try { - const ageMs = Date.now() - parseInt(readFileSync2(p, "utf-8"), 10); + const ageMs = Date.now() - parseInt(readFileSync3(p, "utf-8"), 10); if (Number.isFinite(ageMs) && ageMs < maxAgeMs) return false; } catch (readErr) { @@ -445,20 +538,20 @@ function releaseLock(sessionId) { // dist/src/hooks/spawn-wiki-worker.js import { spawn, execSync } from "node:child_process"; import { fileURLToPath } from "node:url"; -import { dirname, join as join5 } from "node:path"; -import { writeFileSync as writeFileSync2, mkdirSync as mkdirSync3 } from "node:fs"; -import { homedir as homedir4, tmpdir } from "node:os"; +import { dirname, join as join6 } from "node:path"; +import { writeFileSync as writeFileSync3, mkdirSync as mkdirSync4 } from "node:fs"; +import { homedir as homedir4, tmpdir as tmpdir2 } from "node:os"; // dist/src/utils/wiki-log.js -import { mkdirSync as mkdirSync2, appendFileSync as appendFileSync2 } from "node:fs"; -import { join as join4 } from "node:path"; +import { mkdirSync as mkdirSync3, appendFileSync as appendFileSync2 } from "node:fs"; +import { join as join5 } from "node:path"; function makeWikiLogger(hooksDir, filename = "deeplake-wiki.log") { - const path = join4(hooksDir, filename); + const path = join5(hooksDir, filename); return { path, log(msg) { try { - mkdirSync2(hooksDir, { recursive: true }); + mkdirSync3(hooksDir, { recursive: true }); appendFileSync2(path, `[${utcTimestamp()}] ${msg} `); } catch { @@ -469,7 +562,7 @@ function makeWikiLogger(hooksDir, filename = "deeplake-wiki.log") { // dist/src/hooks/spawn-wiki-worker.js var HOME = homedir4(); -var wikiLogger = makeWikiLogger(join5(HOME, ".claude", "hooks")); +var wikiLogger = makeWikiLogger(join6(HOME, ".claude", "hooks")); var WIKI_LOG = wikiLogger.path; var WIKI_PROMPT_TEMPLATE = `You are building a personal wiki from a coding session. Your goal is to extract every piece of knowledge \u2014 entities, decisions, relationships, and facts \u2014 into a structured, searchable wiki entry. Think of this as building a knowledge graph, not writing a summary. @@ -528,16 +621,16 @@ function findClaudeBin() { try { return execSync("which claude 2>/dev/null", { encoding: "utf-8" }).trim(); } catch { - return join5(HOME, ".claude", "local", "claude"); + return join6(HOME, ".claude", "local", "claude"); } } function spawnWikiWorker(opts) { const { config, sessionId, cwd, bundleDir, reason } = opts; const projectName = cwd.split("/").pop() || "unknown"; - const tmpDir = join5(tmpdir(), `deeplake-wiki-${sessionId}-${Date.now()}`); - mkdirSync3(tmpDir, { recursive: true }); - const configFile = join5(tmpDir, "config.json"); - writeFileSync2(configFile, JSON.stringify({ + const tmpDir = join6(tmpdir2(), `deeplake-wiki-${sessionId}-${Date.now()}`); + mkdirSync4(tmpDir, { recursive: true }); + const configFile = join6(tmpDir, "config.json"); + writeFileSync3(configFile, JSON.stringify({ apiUrl: config.apiUrl, token: config.token, orgId: config.orgId, @@ -550,11 +643,11 @@ function spawnWikiWorker(opts) { tmpDir, claudeBin: findClaudeBin(), wikiLog: WIKI_LOG, - hooksDir: join5(HOME, ".claude", "hooks"), + hooksDir: join6(HOME, ".claude", "hooks"), promptTemplate: WIKI_PROMPT_TEMPLATE })); wikiLog(`${reason}: spawning summary worker for ${sessionId}`); - const workerPath = join5(bundleDir, "wiki-worker.js"); + const workerPath = join6(bundleDir, "wiki-worker.js"); spawn("nohup", ["node", workerPath, configFile], { detached: true, stdio: ["ignore", "ignore", "ignore"] diff --git a/claude-code/bundle/commands/auth-login.js b/claude-code/bundle/commands/auth-login.js index 6d4cb13..064f11e 100755 --- a/claude-code/bundle/commands/auth-login.js +++ b/claude-code/bundle/commands/auth-login.js @@ -239,6 +239,9 @@ function loadConfig() { // dist/src/deeplake-api.js import { randomUUID } from "node:crypto"; +import { existsSync as existsSync3, mkdirSync as mkdirSync2, readFileSync as readFileSync3, writeFileSync as writeFileSync2 } from "node:fs"; +import { join as join4 } from "node:path"; +import { tmpdir } from "node:os"; // dist/src/utils/debug.js import { appendFileSync } from "node:fs"; @@ -260,27 +263,48 @@ function sqlStr(value) { // dist/src/deeplake-api.js var log2 = (msg) => log("sdk", msg); -var TRACE_SQL = (process.env.HIVEMIND_TRACE_SQL ?? process.env.DEEPLAKE_TRACE_SQL) === "1" || (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; -var DEBUG_FILE_LOG = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; function summarizeSql(sql, maxLen = 220) { const compact = sql.replace(/\s+/g, " ").trim(); return compact.length > maxLen ? `${compact.slice(0, maxLen)}...` : compact; } function traceSql(msg) { - if (!TRACE_SQL) + const traceEnabled = (process.env.HIVEMIND_TRACE_SQL ?? process.env.DEEPLAKE_TRACE_SQL) === "1" || (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; + if (!traceEnabled) return; process.stderr.write(`[deeplake-sql] ${msg} `); - if (DEBUG_FILE_LOG) + const debugFileLog = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; + if (debugFileLog) log2(msg); } var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; var MAX_CONCURRENCY = 5; +var QUERY_TIMEOUT_MS = Number(process.env["HIVEMIND_QUERY_TIMEOUT_MS"] ?? process.env["DEEPLAKE_QUERY_TIMEOUT_MS"] ?? 1e4); +var INDEX_MARKER_TTL_MS = Number(process.env["HIVEMIND_INDEX_MARKER_TTL_MS"] ?? 6 * 60 * 6e4); function sleep(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } +function isTimeoutError(error) { + const name = error instanceof Error ? error.name.toLowerCase() : ""; + const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase(); + return name.includes("timeout") || name === "aborterror" || message.includes("timeout") || message.includes("timed out"); +} +function isDuplicateIndexError(error) { + const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase(); + return message.includes("duplicate key value violates unique constraint") || message.includes("pg_class_relname_nsp_index") || message.includes("already exists"); +} +function isSessionInsertQuery(sql) { + return /^\s*insert\s+into\s+"[^"]+"\s*\(\s*id\s*,\s*path\s*,\s*filename\s*,\s*message\s*,/i.test(sql); +} +function isTransientHtml403(text) { + const body = text.toLowerCase(); + return body.includes(" Object.fromEntries(raw.columns.map((col, i) => [col, row[i]]))); } const text = await resp.text().catch(() => ""); - if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { + const retryable403 = isSessionInsertQuery(sql) && (resp.status === 401 || resp.status === 403 && (text.length === 0 || isTransientHtml403(text))); + if (attempt < MAX_RETRIES && (RETRYABLE_CODES.has(resp.status) || retryable403)) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`); await sleep(delay); @@ -433,8 +465,61 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + buildLookupIndexName(table, suffix) { + return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); + } + getLookupIndexMarkerPath(table, suffix) { + const markerKey = [ + this.workspaceId, + this.orgId, + table, + suffix + ].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_"); + return join4(getIndexMarkerDir(), `${markerKey}.json`); + } + hasFreshLookupIndexMarker(table, suffix) { + const markerPath = this.getLookupIndexMarkerPath(table, suffix); + if (!existsSync3(markerPath)) + return false; + try { + const raw = JSON.parse(readFileSync3(markerPath, "utf-8")); + const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN; + if (!Number.isFinite(updatedAt) || Date.now() - updatedAt > INDEX_MARKER_TTL_MS) + return false; + return true; + } catch { + return false; + } + } + markLookupIndexReady(table, suffix) { + mkdirSync2(getIndexMarkerDir(), { recursive: true }); + writeFileSync2(this.getLookupIndexMarkerPath(table, suffix), JSON.stringify({ updatedAt: (/* @__PURE__ */ new Date()).toISOString() }), "utf-8"); + } + async ensureLookupIndex(table, suffix, columnsSql) { + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + const indexName = this.buildLookupIndexName(table, suffix); + try { + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" ${columnsSql}`); + this.markLookupIndexReady(table, suffix); + } catch (e) { + if (isDuplicateIndexError(e)) { + this.markLookupIndexReady(table, suffix); + return; + } + log2(`index "${indexName}" skipped: ${e.message}`); + } + } /** List all tables in the workspace (with retry). */ - async listTables() { + async listTables(forceRefresh = false) { + if (!forceRefresh && this._tablesCache) + return [...this._tablesCache]; + const { tables, cacheable } = await this._fetchTables(); + if (cacheable) + this._tablesCache = [...tables]; + return tables; + } + async _fetchTables() { for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { try { const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, { @@ -445,22 +530,25 @@ var DeeplakeApi = class { }); if (resp.ok) { const data = await resp.json(); - return (data.tables ?? []).map((t) => t.table_name); + return { + tables: (data.tables ?? []).map((t) => t.table_name), + cacheable: true + }; } if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { await sleep(BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200); continue; } - return []; + return { tables: [], cacheable: false }; } catch { if (attempt < MAX_RETRIES) { await sleep(BASE_DELAY_MS * Math.pow(2, attempt)); continue; } - return []; + return { tables: [], cacheable: false }; } } - return []; + return { tables: [], cacheable: false }; } /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */ async ensureTable(name) { @@ -470,6 +558,8 @@ var DeeplakeApi = class { log2(`table "${tbl}" not found, creating`); await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); + if (!tables.includes(tbl)) + this._tablesCache = [...tables, tbl]; } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ @@ -479,7 +569,10 @@ var DeeplakeApi = class { log2(`table "${name}" not found, creating`); await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${name}" created`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; } + await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); } }; diff --git a/claude-code/bundle/pre-tool-use.js b/claude-code/bundle/pre-tool-use.js index cb59c9c..5076674 100755 --- a/claude-code/bundle/pre-tool-use.js +++ b/claude-code/bundle/pre-tool-use.js @@ -1,21 +1,20 @@ #!/usr/bin/env node // dist/src/hooks/pre-tool-use.js -import { existsSync as existsSync2 } from "node:fs"; -import { join as join3 } from "node:path"; -import { homedir as homedir3 } from "node:os"; -import { fileURLToPath } from "node:url"; -import { dirname } from "node:path"; +import { existsSync as existsSync3, mkdirSync as mkdirSync3, writeFileSync as writeFileSync3 } from "node:fs"; +import { homedir as homedir5 } from "node:os"; +import { join as join6, dirname, sep } from "node:path"; +import { fileURLToPath as fileURLToPath2 } from "node:url"; // dist/src/utils/stdin.js function readStdin() { - return new Promise((resolve, reject) => { + return new Promise((resolve2, reject) => { let data = ""; process.stdin.setEncoding("utf-8"); process.stdin.on("data", (chunk) => data += chunk); process.stdin.on("end", () => { try { - resolve(JSON.parse(data)); + resolve2(JSON.parse(data)); } catch (err) { reject(new Error(`Failed to parse hook input: ${err}`)); } @@ -62,6 +61,9 @@ function loadConfig() { // dist/src/deeplake-api.js import { randomUUID } from "node:crypto"; +import { existsSync as existsSync2, mkdirSync, readFileSync as readFileSync2, writeFileSync } from "node:fs"; +import { join as join3 } from "node:path"; +import { tmpdir } from "node:os"; // dist/src/utils/debug.js import { appendFileSync } from "node:fs"; @@ -86,26 +88,47 @@ function sqlLike(value) { // dist/src/deeplake-api.js var log2 = (msg) => log("sdk", msg); -var TRACE_SQL = (process.env.HIVEMIND_TRACE_SQL ?? process.env.DEEPLAKE_TRACE_SQL) === "1" || (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; -var DEBUG_FILE_LOG = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; function summarizeSql(sql, maxLen = 220) { const compact = sql.replace(/\s+/g, " ").trim(); return compact.length > maxLen ? `${compact.slice(0, maxLen)}...` : compact; } function traceSql(msg) { - if (!TRACE_SQL) + const traceEnabled = (process.env.HIVEMIND_TRACE_SQL ?? process.env.DEEPLAKE_TRACE_SQL) === "1" || (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; + if (!traceEnabled) return; process.stderr.write(`[deeplake-sql] ${msg} `); - if (DEBUG_FILE_LOG) + const debugFileLog = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; + if (debugFileLog) log2(msg); } var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; var MAX_CONCURRENCY = 5; +var QUERY_TIMEOUT_MS = Number(process.env["HIVEMIND_QUERY_TIMEOUT_MS"] ?? process.env["DEEPLAKE_QUERY_TIMEOUT_MS"] ?? 1e4); +var INDEX_MARKER_TTL_MS = Number(process.env["HIVEMIND_INDEX_MARKER_TTL_MS"] ?? 6 * 60 * 6e4); function sleep(ms) { - return new Promise((resolve) => setTimeout(resolve, ms)); + return new Promise((resolve2) => setTimeout(resolve2, ms)); +} +function isTimeoutError(error) { + const name = error instanceof Error ? error.name.toLowerCase() : ""; + const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase(); + return name.includes("timeout") || name === "aborterror" || message.includes("timeout") || message.includes("timed out"); +} +function isDuplicateIndexError(error) { + const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase(); + return message.includes("duplicate key value violates unique constraint") || message.includes("pg_class_relname_nsp_index") || message.includes("already exists"); +} +function isSessionInsertQuery(sql) { + return /^\s*insert\s+into\s+"[^"]+"\s*\(\s*id\s*,\s*path\s*,\s*filename\s*,\s*message\s*,/i.test(sql); +} +function isTransientHtml403(text) { + const body = text.toLowerCase(); + return body.includes(" this.waiting.push(resolve)); + await new Promise((resolve2) => this.waiting.push(resolve2)); } release() { this.active--; @@ -138,6 +161,7 @@ var DeeplakeApi = class { tableName; _pendingRows = []; _sem = new Semaphore(MAX_CONCURRENCY); + _tablesCache = null; constructor(token, apiUrl, orgId, workspaceId, tableName) { this.token = token; this.apiUrl = apiUrl; @@ -168,6 +192,7 @@ var DeeplakeApi = class { for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { let resp; try { + const signal = AbortSignal.timeout(QUERY_TIMEOUT_MS); resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables/query`, { method: "POST", headers: { @@ -175,9 +200,14 @@ var DeeplakeApi = class { "Content-Type": "application/json", "X-Activeloop-Org-Id": this.orgId }, + signal, body: JSON.stringify({ query: sql }) }); } catch (e) { + if (isTimeoutError(e)) { + lastError = new Error(`Query timeout after ${QUERY_TIMEOUT_MS}ms`); + throw lastError; + } lastError = e instanceof Error ? e : new Error(String(e)); if (attempt < MAX_RETRIES) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; @@ -194,7 +224,8 @@ var DeeplakeApi = class { return raw.rows.map((row) => Object.fromEntries(raw.columns.map((col, i) => [col, row[i]]))); } const text = await resp.text().catch(() => ""); - if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { + const retryable403 = isSessionInsertQuery(sql) && (resp.status === 401 || resp.status === 403 && (text.length === 0 || isTransientHtml403(text))); + if (attempt < MAX_RETRIES && (RETRYABLE_CODES.has(resp.status) || retryable403)) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`); await sleep(delay); @@ -259,8 +290,61 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + buildLookupIndexName(table, suffix) { + return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); + } + getLookupIndexMarkerPath(table, suffix) { + const markerKey = [ + this.workspaceId, + this.orgId, + table, + suffix + ].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_"); + return join3(getIndexMarkerDir(), `${markerKey}.json`); + } + hasFreshLookupIndexMarker(table, suffix) { + const markerPath = this.getLookupIndexMarkerPath(table, suffix); + if (!existsSync2(markerPath)) + return false; + try { + const raw = JSON.parse(readFileSync2(markerPath, "utf-8")); + const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN; + if (!Number.isFinite(updatedAt) || Date.now() - updatedAt > INDEX_MARKER_TTL_MS) + return false; + return true; + } catch { + return false; + } + } + markLookupIndexReady(table, suffix) { + mkdirSync(getIndexMarkerDir(), { recursive: true }); + writeFileSync(this.getLookupIndexMarkerPath(table, suffix), JSON.stringify({ updatedAt: (/* @__PURE__ */ new Date()).toISOString() }), "utf-8"); + } + async ensureLookupIndex(table, suffix, columnsSql) { + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + const indexName = this.buildLookupIndexName(table, suffix); + try { + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" ${columnsSql}`); + this.markLookupIndexReady(table, suffix); + } catch (e) { + if (isDuplicateIndexError(e)) { + this.markLookupIndexReady(table, suffix); + return; + } + log2(`index "${indexName}" skipped: ${e.message}`); + } + } /** List all tables in the workspace (with retry). */ - async listTables() { + async listTables(forceRefresh = false) { + if (!forceRefresh && this._tablesCache) + return [...this._tablesCache]; + const { tables, cacheable } = await this._fetchTables(); + if (cacheable) + this._tablesCache = [...tables]; + return tables; + } + async _fetchTables() { for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { try { const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, { @@ -271,22 +355,25 @@ var DeeplakeApi = class { }); if (resp.ok) { const data = await resp.json(); - return (data.tables ?? []).map((t) => t.table_name); + return { + tables: (data.tables ?? []).map((t) => t.table_name), + cacheable: true + }; } if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { await sleep(BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200); continue; } - return []; + return { tables: [], cacheable: false }; } catch { if (attempt < MAX_RETRIES) { await sleep(BASE_DELAY_MS * Math.pow(2, attempt)); continue; } - return []; + return { tables: [], cacheable: false }; } } - return []; + return { tables: [], cacheable: false }; } /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */ async ensureTable(name) { @@ -296,6 +383,8 @@ var DeeplakeApi = class { log2(`table "${tbl}" not found, creating`); await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); + if (!tables.includes(tbl)) + this._tablesCache = [...tables, tbl]; } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ @@ -305,10 +394,27 @@ var DeeplakeApi = class { log2(`table "${name}" not found, creating`); await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${name}" created`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; } + await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); } }; +// dist/src/utils/direct-run.js +import { resolve } from "node:path"; +import { fileURLToPath } from "node:url"; +function isDirectRun(metaUrl) { + const entry = process.argv[1]; + if (!entry) + return false; + try { + return resolve(fileURLToPath(metaUrl)) === resolve(entry); + } catch { + return false; + } +} + // dist/src/shell/grep-core.js var TOOL_INPUT_FIELDS = [ "command", @@ -518,29 +624,127 @@ function normalizeContent(path, raw) { return raw; return out; } +function buildPathCondition(targetPath) { + if (!targetPath || targetPath === "/") + return ""; + const clean = targetPath.replace(/\/+$/, ""); + if (/[*?]/.test(clean)) { + const likePattern = sqlLike(clean).replace(/\*/g, "%").replace(/\?/g, "_"); + return `path LIKE '${likePattern}' ESCAPE '\\'`; + } + const base = clean.split("/").pop() ?? ""; + if (base.includes(".")) { + return `path = '${sqlStr(clean)}'`; + } + return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%' ESCAPE '\\')`; +} async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { - const { pathFilter, contentScanOnly, likeOp, escapedPattern } = opts; + const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns } = opts; const limit = opts.limit ?? 100; - const memFilter = contentScanOnly ? "" : ` AND summary::text ${likeOp} '%${escapedPattern}%'`; - const sessFilter = contentScanOnly ? "" : ` AND message::text ${likeOp} '%${escapedPattern}%'`; - const memQuery = `SELECT path, summary::text AS content FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; - const sessQuery = `SELECT path, message::text AS content FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; - const [memRows, sessRows] = await Promise.all([ - api.query(memQuery).catch(() => []), - api.query(sessQuery).catch(() => []) - ]); - const rows = []; - for (const r of memRows) - rows.push({ path: String(r.path), content: String(r.content ?? "") }); - for (const r of sessRows) - rows.push({ path: String(r.path), content: String(r.content ?? "") }); - return rows; + const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : [escapedPattern]; + const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns); + const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns); + const memQuery = `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; + const sessQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; + const rows = await api.query(`SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`); + return rows.map((row) => ({ + path: String(row["path"]), + content: String(row["content"] ?? "") + })); } function buildPathFilter(targetPath) { - if (!targetPath || targetPath === "/") + const condition = buildPathCondition(targetPath); + return condition ? ` AND ${condition}` : ""; +} +function extractRegexLiteralPrefilter(pattern) { + if (!pattern) + return null; + const parts = []; + let current = ""; + for (let i = 0; i < pattern.length; i++) { + const ch = pattern[i]; + if (ch === "\\") { + const next = pattern[i + 1]; + if (!next) + return null; + if (/[dDsSwWbBAZzGkKpP]/.test(next)) + return null; + current += next; + i++; + continue; + } + if (ch === ".") { + if (pattern[i + 1] === "*") { + if (current) + parts.push(current); + current = ""; + i++; + continue; + } + return null; + } + if ("|()[]{}+?^$".includes(ch) || ch === "*") + return null; + current += ch; + } + if (current) + parts.push(current); + const literal = parts.reduce((best, part) => part.length > best.length ? part : best, ""); + return literal.length >= 2 ? literal : null; +} +function extractRegexAlternationPrefilters(pattern) { + if (!pattern.includes("|")) + return null; + const parts = []; + let current = ""; + let escaped = false; + for (let i = 0; i < pattern.length; i++) { + const ch = pattern[i]; + if (escaped) { + current += `\\${ch}`; + escaped = false; + continue; + } + if (ch === "\\") { + escaped = true; + continue; + } + if (ch === "|") { + if (!current) + return null; + parts.push(current); + current = ""; + continue; + } + if ("()[]{}^$".includes(ch)) + return null; + current += ch; + } + if (escaped || !current) + return null; + parts.push(current); + const literals = [...new Set(parts.map((part) => extractRegexLiteralPrefilter(part)).filter((part) => typeof part === "string" && part.length >= 2))]; + return literals.length > 0 ? literals : null; +} +function buildGrepSearchOptions(params, targetPath) { + const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(params.pattern); + const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(params.pattern) : null; + const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(params.pattern) : null; + return { + pathFilter: buildPathFilter(targetPath), + contentScanOnly: hasRegexMeta, + likeOp: params.ignoreCase ? "ILIKE" : "LIKE", + escapedPattern: sqlLike(params.pattern), + prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : void 0, + prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)) + }; +} +function buildContentFilter(column, likeOp, patterns) { + if (patterns.length === 0) return ""; - const clean = targetPath.replace(/\/+$/, ""); - return ` AND (path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%')`; + if (patterns.length === 1) + return ` AND ${column} ${likeOp} '%${patterns[0]}%'`; + return ` AND (${patterns.map((pattern) => `${column} ${likeOp} '%${pattern}%'`).join(" OR ")})`; } function compileGrepRegex(params) { let reStr = params.fixedString ? params.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") : params.pattern; @@ -584,81 +788,205 @@ function refineGrepMatches(rows, params, forceMultiFilePrefix) { return output; } async function grepBothTables(api, memoryTable, sessionsTable, params, targetPath) { - const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(params.pattern); - const rows = await searchDeeplakeTables(api, memoryTable, sessionsTable, { - pathFilter: buildPathFilter(targetPath), - contentScanOnly: hasRegexMeta, - likeOp: params.ignoreCase ? "ILIKE" : "LIKE", - escapedPattern: sqlLike(params.pattern) - }); + const rows = await searchDeeplakeTables(api, memoryTable, sessionsTable, buildGrepSearchOptions(params, targetPath)); const seen = /* @__PURE__ */ new Set(); const unique = rows.filter((r) => seen.has(r.path) ? false : (seen.add(r.path), true)); const normalized = unique.map((r) => ({ path: r.path, content: normalizeContent(r.path, r.content) })); return refineGrepMatches(normalized, params); } +// dist/src/utils/output-cap.js +var CLAUDE_OUTPUT_CAP_BYTES = 8 * 1024; +function byteLen(str) { + return Buffer.byteLength(str, "utf8"); +} +function capOutputForClaude(output, options = {}) { + const maxBytes = options.maxBytes ?? CLAUDE_OUTPUT_CAP_BYTES; + if (byteLen(output) <= maxBytes) + return output; + const kind = options.kind ?? "output"; + const footerReserve = 220; + const budget = Math.max(1, maxBytes - footerReserve); + let running = 0; + const lines = output.split("\n"); + const keptLines = []; + for (const line of lines) { + const lineBytes = byteLen(line) + 1; + if (running + lineBytes > budget) + break; + keptLines.push(line); + running += lineBytes; + } + if (keptLines.length === 0) { + const buf = Buffer.from(output, "utf8"); + let cutByte = Math.min(budget, buf.length); + while (cutByte > 0 && (buf[cutByte] & 192) === 128) + cutByte--; + const slice = buf.subarray(0, cutByte).toString("utf8"); + const footer2 = ` +... [${kind} truncated: ${(byteLen(output) / 1024).toFixed(1)} KB total; refine with '| head -N' or a tighter pattern]`; + return slice + footer2; + } + const totalLines = lines.length - (lines[lines.length - 1] === "" ? 1 : 0); + const elidedLines = Math.max(0, totalLines - keptLines.length); + const elidedBytes = byteLen(output) - byteLen(keptLines.join("\n")); + const footer = ` +... [${kind} truncated: ${elidedLines} more lines (${(elidedBytes / 1024).toFixed(1)} KB) elided \u2014 refine with '| head -N' or a tighter pattern]`; + return keptLines.join("\n") + footer; +} + // dist/src/hooks/grep-direct.js -function parseBashGrep(cmd) { - const first = cmd.trim().split(/\s*\|\s*/)[0]; - if (!/^(grep|egrep|fgrep)\b/.test(first)) - return null; - const isFixed = first.startsWith("fgrep"); +function splitFirstPipelineStage(cmd) { + const input = cmd.trim(); + let quote = null; + let escaped = false; + for (let i = 0; i < input.length; i++) { + const ch = input[i]; + if (escaped) { + escaped = false; + continue; + } + if (quote) { + if (ch === quote) { + quote = null; + continue; + } + if (ch === "\\" && quote === '"') { + escaped = true; + } + continue; + } + if (ch === "\\") { + escaped = true; + continue; + } + if (ch === "'" || ch === '"') { + quote = ch; + continue; + } + if (ch === "|") + return input.slice(0, i).trim(); + } + return quote ? null : input; +} +function tokenizeGrepStage(input) { const tokens = []; - let pos = 0; - while (pos < first.length) { - if (first[pos] === " " || first[pos] === " ") { - pos++; + let current = ""; + let quote = null; + for (let i = 0; i < input.length; i++) { + const ch = input[i]; + if (quote) { + if (ch === quote) { + quote = null; + } else if (ch === "\\" && quote === '"' && i + 1 < input.length) { + current += input[++i]; + } else { + current += ch; + } continue; } - if (first[pos] === "'" || first[pos] === '"') { - const q = first[pos]; - let end = pos + 1; - while (end < first.length && first[end] !== q) - end++; - tokens.push(first.slice(pos + 1, end)); - pos = end + 1; - } else { - let end = pos; - while (end < first.length && first[end] !== " " && first[end] !== " ") - end++; - tokens.push(first.slice(pos, end)); - pos = end; + if (ch === "'" || ch === '"') { + quote = ch; + continue; } + if (ch === "\\" && i + 1 < input.length) { + current += input[++i]; + continue; + } + if (/\s/.test(ch)) { + if (current) { + tokens.push(current); + current = ""; + } + continue; + } + current += ch; } + if (quote) + return null; + if (current) + tokens.push(current); + return tokens; +} +function parseBashGrep(cmd) { + const first = splitFirstPipelineStage(cmd); + if (!first) + return null; + if (!/^(grep|egrep|fgrep)\b/.test(first)) + return null; + const isFixed = first.startsWith("fgrep"); + const tokens = tokenizeGrepStage(first); + if (!tokens || tokens.length === 0) + return null; let ignoreCase = false, wordMatch = false, filesOnly = false, countOnly = false, lineNumber = false, invertMatch = false, fixedString = isFixed; + const explicitPatterns = []; let ti = 1; - while (ti < tokens.length && tokens[ti].startsWith("-") && tokens[ti] !== "--") { - const flag = tokens[ti]; - if (flag.startsWith("--")) { + while (ti < tokens.length) { + const token = tokens[ti]; + if (token === "--") { + ti++; + break; + } + if (!token.startsWith("-") || token === "-") + break; + if (token.startsWith("--")) { + const [flag, inlineValue] = token.split("=", 2); const handlers = { "--ignore-case": () => { ignoreCase = true; + return false; }, "--word-regexp": () => { wordMatch = true; + return false; }, "--files-with-matches": () => { filesOnly = true; + return false; }, "--count": () => { countOnly = true; + return false; }, "--line-number": () => { lineNumber = true; + return false; }, "--invert-match": () => { invertMatch = true; + return false; }, "--fixed-strings": () => { fixedString = true; + return false; + }, + "--after-context": () => inlineValue === void 0, + "--before-context": () => inlineValue === void 0, + "--context": () => inlineValue === void 0, + "--max-count": () => inlineValue === void 0, + "--regexp": () => { + if (inlineValue !== void 0) { + explicitPatterns.push(inlineValue); + return false; + } + return true; } }; - handlers[flag]?.(); + const consumeNext = handlers[flag]?.() ?? false; + if (consumeNext) { + ti++; + if (ti >= tokens.length) + return null; + if (flag === "--regexp") + explicitPatterns.push(tokens[ti]); + } ti++; continue; } - for (const c of flag.slice(1)) { - switch (c) { + const shortFlags = token.slice(1); + for (let i = 0; i < shortFlags.length; i++) { + const flag = shortFlags[i]; + switch (flag) { case "i": ignoreCase = true; break; @@ -680,19 +1008,48 @@ function parseBashGrep(cmd) { case "F": fixedString = true; break; + case "r": + case "R": + case "E": + break; + case "A": + case "B": + case "C": + case "m": + if (i === shortFlags.length - 1) { + ti++; + if (ti >= tokens.length) + return null; + } + i = shortFlags.length; + break; + case "e": { + const inlineValue = shortFlags.slice(i + 1); + if (inlineValue) { + explicitPatterns.push(inlineValue); + } else { + ti++; + if (ti >= tokens.length) + return null; + explicitPatterns.push(tokens[ti]); + } + i = shortFlags.length; + break; + } + default: + break; } } ti++; } - if (ti < tokens.length && tokens[ti] === "--") - ti++; - if (ti >= tokens.length) + const pattern = explicitPatterns.length > 0 ? explicitPatterns[0] : tokens[ti]; + if (!pattern) return null; - let target = tokens[ti + 1] ?? "/"; + let target = explicitPatterns.length > 0 ? tokens[ti] ?? "/" : tokens[ti + 1] ?? "/"; if (target === "." || target === "./") target = "/"; return { - pattern: tokens[ti], + pattern, targetPath: target, ignoreCase, wordMatch, @@ -717,18 +1074,665 @@ async function handleGrepDirect(api, table, sessionsTable, params) { fixedString: params.fixedString }; const output = await grepBothTables(api, table, sessionsTable, matchParams, params.targetPath); - return output.join("\n") || "(no matches)"; + const joined = output.join("\n") || "(no matches)"; + return capOutputForClaude(joined, { kind: "grep" }); } -// dist/src/hooks/pre-tool-use.js -var log3 = (msg) => log("pre", msg); -var MEMORY_PATH = join3(homedir3(), ".deeplake", "memory"); +// dist/src/hooks/virtual-table-query.js +function normalizeSessionPart(path, content) { + return normalizeContent(path, content); +} +function buildVirtualIndexContent(summaryRows, sessionRows = []) { + const total = summaryRows.length + sessionRows.length; + const lines = [ + "# Memory Index", + "", + `${total} entries (${summaryRows.length} summaries, ${sessionRows.length} sessions):`, + "" + ]; + if (summaryRows.length > 0) { + lines.push("## Summaries", ""); + for (const row of summaryRows) { + const path = row["path"]; + const project = row["project"] || ""; + const description = (row["description"] || "").slice(0, 120); + const date = (row["creation_date"] || "").slice(0, 10); + lines.push(`- [${path}](${path}) ${date} ${project ? `[${project}]` : ""} ${description}`); + } + lines.push(""); + } + if (sessionRows.length > 0) { + lines.push("## Sessions", ""); + for (const row of sessionRows) { + const path = row["path"]; + const description = (row["description"] || "").slice(0, 120); + lines.push(`- [${path}](${path}) ${description}`); + } + } + return lines.join("\n"); +} +function buildUnionQuery(memoryQuery, sessionsQuery) { + return `SELECT path, content, size_bytes, creation_date, source_order FROM ((${memoryQuery}) UNION ALL (${sessionsQuery})) AS combined ORDER BY path, source_order, creation_date`; +} +function buildInList(paths) { + return paths.map((path) => `'${sqlStr(path)}'`).join(", "); +} +function buildDirFilter(dirs) { + const cleaned = [...new Set(dirs.map((dir) => dir.replace(/\/+$/, "") || "/"))]; + if (cleaned.length === 0 || cleaned.includes("/")) + return ""; + const clauses = cleaned.map((dir) => `path LIKE '${sqlLike(dir)}/%' ESCAPE '\\'`); + return ` WHERE ${clauses.join(" OR ")}`; +} +async function queryUnionRows(api, memoryQuery, sessionsQuery) { + const unionQuery = buildUnionQuery(memoryQuery, sessionsQuery); + try { + return await api.query(unionQuery); + } catch { + const [memoryRows, sessionRows] = await Promise.all([ + api.query(memoryQuery).catch(() => []), + api.query(sessionsQuery).catch(() => []) + ]); + return [...memoryRows, ...sessionRows]; + } +} +async function readVirtualPathContents(api, memoryTable, sessionsTable, virtualPaths) { + const uniquePaths = [...new Set(virtualPaths)]; + const result = new Map(uniquePaths.map((path) => [path, null])); + if (uniquePaths.length === 0) + return result; + const inList = buildInList(uniquePaths); + const rows = await queryUnionRows(api, `SELECT path, summary::text AS content, NULL::bigint AS size_bytes, '' AS creation_date, 0 AS source_order FROM "${memoryTable}" WHERE path IN (${inList})`, `SELECT path, message::text AS content, NULL::bigint AS size_bytes, COALESCE(creation_date::text, '') AS creation_date, 1 AS source_order FROM "${sessionsTable}" WHERE path IN (${inList})`); + const memoryHits = /* @__PURE__ */ new Map(); + const sessionHits = /* @__PURE__ */ new Map(); + for (const row of rows) { + const path = row["path"]; + const content = row["content"]; + const sourceOrder = Number(row["source_order"] ?? 0); + if (typeof path !== "string" || typeof content !== "string") + continue; + if (sourceOrder === 0) { + memoryHits.set(path, content); + } else { + const current = sessionHits.get(path) ?? []; + current.push(normalizeSessionPart(path, content)); + sessionHits.set(path, current); + } + } + for (const path of uniquePaths) { + if (memoryHits.has(path)) { + result.set(path, memoryHits.get(path) ?? null); + continue; + } + const sessionParts = sessionHits.get(path) ?? []; + if (sessionParts.length > 0) { + result.set(path, sessionParts.join("\n")); + } + } + if (result.get("/index.md") === null && uniquePaths.includes("/index.md")) { + const [summaryRows, sessionRows] = await Promise.all([ + api.query(`SELECT path, project, description, creation_date FROM "${memoryTable}" WHERE path LIKE '/summaries/%' ORDER BY creation_date DESC`).catch(() => []), + api.query(`SELECT path, description FROM "${sessionsTable}" WHERE path LIKE '/sessions/%' ORDER BY path`).catch(() => []) + ]); + result.set("/index.md", buildVirtualIndexContent(summaryRows, sessionRows)); + } + return result; +} +async function listVirtualPathRowsForDirs(api, memoryTable, sessionsTable, dirs) { + const uniqueDirs = [...new Set(dirs.map((dir) => dir.replace(/\/+$/, "") || "/"))]; + const filter = buildDirFilter(uniqueDirs); + const rows = await queryUnionRows(api, `SELECT path, NULL::text AS content, size_bytes, '' AS creation_date, 0 AS source_order FROM "${memoryTable}"${filter}`, `SELECT path, NULL::text AS content, size_bytes, '' AS creation_date, 1 AS source_order FROM "${sessionsTable}"${filter}`); + const deduped = dedupeRowsByPath(rows.map((row) => ({ + path: row["path"], + size_bytes: row["size_bytes"] + }))); + const byDir = /* @__PURE__ */ new Map(); + for (const dir of uniqueDirs) + byDir.set(dir, []); + for (const row of deduped) { + const path = row["path"]; + if (typeof path !== "string") + continue; + for (const dir of uniqueDirs) { + const prefix = dir === "/" ? "/" : `${dir}/`; + if (dir === "/" || path.startsWith(prefix)) { + byDir.get(dir)?.push(row); + } + } + } + return byDir; +} +async function readVirtualPathContent(api, memoryTable, sessionsTable, virtualPath) { + return (await readVirtualPathContents(api, memoryTable, sessionsTable, [virtualPath])).get(virtualPath) ?? null; +} +async function listVirtualPathRows(api, memoryTable, sessionsTable, dir) { + return (await listVirtualPathRowsForDirs(api, memoryTable, sessionsTable, [dir])).get(dir.replace(/\/+$/, "") || "/") ?? []; +} +async function findVirtualPaths(api, memoryTable, sessionsTable, dir, filenamePattern) { + const normalizedDir = dir.replace(/\/+$/, "") || "/"; + const likePath = `${sqlLike(normalizedDir === "/" ? "" : normalizedDir)}/%`; + const rows = await queryUnionRows(api, `SELECT path, NULL::text AS content, NULL::bigint AS size_bytes, '' AS creation_date, 0 AS source_order FROM "${memoryTable}" WHERE path LIKE '${likePath}' ESCAPE '\\' AND filename LIKE '${filenamePattern}' ESCAPE '\\'`, `SELECT path, NULL::text AS content, NULL::bigint AS size_bytes, '' AS creation_date, 1 AS source_order FROM "${sessionsTable}" WHERE path LIKE '${likePath}' ESCAPE '\\' AND filename LIKE '${filenamePattern}' ESCAPE '\\'`); + return [...new Set(rows.map((row) => row["path"]).filter((value) => typeof value === "string" && value.length > 0))]; +} +function dedupeRowsByPath(rows) { + const seen = /* @__PURE__ */ new Set(); + const unique = []; + for (const row of rows) { + const path = typeof row["path"] === "string" ? row["path"] : ""; + if (!path || seen.has(path)) + continue; + seen.add(path); + unique.push(row); + } + return unique; +} + +// dist/src/hooks/bash-command-compiler.js +function isQuoted(ch) { + return ch === "'" || ch === '"'; +} +function splitTopLevel(input, operators) { + const parts = []; + let current = ""; + let quote = null; + for (let i = 0; i < input.length; i++) { + const ch = input[i]; + if (quote) { + if (ch === quote) + quote = null; + current += ch; + continue; + } + if (isQuoted(ch)) { + quote = ch; + current += ch; + continue; + } + const matched = operators.find((op) => input.startsWith(op, i)); + if (matched) { + const trimmed2 = current.trim(); + if (trimmed2) + parts.push(trimmed2); + current = ""; + i += matched.length - 1; + continue; + } + current += ch; + } + if (quote) + return null; + const trimmed = current.trim(); + if (trimmed) + parts.push(trimmed); + return parts; +} +function tokenizeShellWords(input) { + const tokens = []; + let current = ""; + let quote = null; + for (let i = 0; i < input.length; i++) { + const ch = input[i]; + if (quote) { + if (ch === quote) { + quote = null; + } else if (ch === "\\" && quote === '"' && i + 1 < input.length) { + current += input[++i]; + } else { + current += ch; + } + continue; + } + if (isQuoted(ch)) { + quote = ch; + continue; + } + if (/\s/.test(ch)) { + if (current) { + tokens.push(current); + current = ""; + } + continue; + } + current += ch; + } + if (quote) + return null; + if (current) + tokens.push(current); + return tokens; +} +function expandBraceToken(token) { + const match = token.match(/\{([^{}]+)\}/); + if (!match) + return [token]; + const [expr] = match; + const prefix = token.slice(0, match.index); + const suffix = token.slice((match.index ?? 0) + expr.length); + let variants = []; + const numericRange = match[1].match(/^(-?\d+)\.\.(-?\d+)$/); + if (numericRange) { + const start = Number(numericRange[1]); + const end = Number(numericRange[2]); + const step = start <= end ? 1 : -1; + for (let value = start; step > 0 ? value <= end : value >= end; value += step) { + variants.push(String(value)); + } + } else { + variants = match[1].split(","); + } + return variants.flatMap((variant) => expandBraceToken(`${prefix}${variant}${suffix}`)); +} +function stripAllowedModifiers(segment) { + const ignoreMissing = /\s2>\/dev\/null\s*$/.test(segment); + const clean = segment.replace(/\s2>\/dev\/null\s*$/g, "").replace(/\s2>&1\s*/g, " ").trim(); + return { clean, ignoreMissing }; +} +function hasUnsupportedRedirection(segment) { + let quote = null; + for (let i = 0; i < segment.length; i++) { + const ch = segment[i]; + if (quote) { + if (ch === quote) + quote = null; + continue; + } + if (isQuoted(ch)) { + quote = ch; + continue; + } + if (ch === ">" || ch === "<") + return true; + } + return false; +} +function parseHeadTailStage(stage) { + const tokens = tokenizeShellWords(stage); + if (!tokens || tokens.length === 0) + return null; + const [cmd, ...rest] = tokens; + if (cmd !== "head" && cmd !== "tail") + return null; + if (rest.length === 0) + return { lineLimit: 10, fromEnd: cmd === "tail" }; + if (rest.length === 1) { + const count = Number(rest[0]); + if (!Number.isFinite(count)) { + return { lineLimit: 10, fromEnd: cmd === "tail" }; + } + return { lineLimit: Math.abs(count), fromEnd: cmd === "tail" }; + } + if (rest.length === 2 && /^-\d+$/.test(rest[0])) { + const count = Number(rest[0]); + if (!Number.isFinite(count)) + return null; + return { lineLimit: Math.abs(count), fromEnd: cmd === "tail" }; + } + if (rest.length === 2 && rest[0] === "-n") { + const count = Number(rest[1]); + if (!Number.isFinite(count)) + return null; + return { lineLimit: Math.abs(count), fromEnd: cmd === "tail" }; + } + if (rest.length === 3 && rest[0] === "-n") { + const count = Number(rest[1]); + if (!Number.isFinite(count)) + return null; + return { lineLimit: Math.abs(count), fromEnd: cmd === "tail" }; + } + return null; +} +function isValidPipelineHeadTailStage(stage) { + const tokens = tokenizeShellWords(stage); + if (!tokens || tokens[0] !== "head" && tokens[0] !== "tail") + return false; + if (tokens.length === 1) + return true; + if (tokens.length === 2) + return /^-\d+$/.test(tokens[1]); + if (tokens.length === 3) + return tokens[1] === "-n" && /^-?\d+$/.test(tokens[2]); + return false; +} +function parseFindNamePatterns(tokens) { + const patterns = []; + for (let i = 2; i < tokens.length; i++) { + const token = tokens[i]; + if (token === "-type") { + i += 1; + continue; + } + if (token === "-o") + continue; + if (token === "-name") { + const pattern = tokens[i + 1]; + if (!pattern) + return null; + patterns.push(pattern); + i += 1; + continue; + } + return null; + } + return patterns.length > 0 ? patterns : null; +} +function parseCompiledSegment(segment) { + const { clean, ignoreMissing } = stripAllowedModifiers(segment); + if (hasUnsupportedRedirection(clean)) + return null; + const pipeline = splitTopLevel(clean, ["|"]); + if (!pipeline || pipeline.length === 0) + return null; + const tokens = tokenizeShellWords(pipeline[0]); + if (!tokens || tokens.length === 0) + return null; + if (tokens[0] === "echo" && pipeline.length === 1) { + const text = tokens.slice(1).join(" "); + return { kind: "echo", text }; + } + if (tokens[0] === "cat") { + const paths = tokens.slice(1).flatMap(expandBraceToken); + if (paths.length === 0) + return null; + let lineLimit = 0; + let fromEnd = false; + let countLines2 = false; + if (pipeline.length > 1) { + if (pipeline.length !== 2) + return null; + const pipeStage = pipeline[1].trim(); + if (/^wc\s+-l\s*$/.test(pipeStage)) { + if (paths.length !== 1) + return null; + countLines2 = true; + } else { + if (!isValidPipelineHeadTailStage(pipeStage)) + return null; + const headTail = parseHeadTailStage(pipeStage); + if (!headTail) + return null; + lineLimit = headTail.lineLimit; + fromEnd = headTail.fromEnd; + } + } + return { kind: "cat", paths, lineLimit, fromEnd, countLines: countLines2, ignoreMissing }; + } + if (tokens[0] === "head" || tokens[0] === "tail") { + if (pipeline.length !== 1) + return null; + const parsed = parseHeadTailStage(clean); + if (!parsed) + return null; + const headTokens = tokenizeShellWords(clean); + if (!headTokens) + return null; + if (headTokens[1] === "-n" && headTokens.length < 4 || /^-\d+$/.test(headTokens[1] ?? "") && headTokens.length < 3 || headTokens.length === 2 && /^-?\d+$/.test(headTokens[1] ?? "")) + return null; + const path = headTokens[headTokens.length - 1]; + if (path === "head" || path === "tail" || path === "-n") + return null; + return { + kind: "cat", + paths: expandBraceToken(path), + lineLimit: parsed.lineLimit, + fromEnd: parsed.fromEnd, + countLines: false, + ignoreMissing + }; + } + if (tokens[0] === "wc" && tokens[1] === "-l" && pipeline.length === 1 && tokens[2]) { + return { + kind: "cat", + paths: expandBraceToken(tokens[2]), + lineLimit: 0, + fromEnd: false, + countLines: true, + ignoreMissing + }; + } + if (tokens[0] === "ls" && pipeline.length === 1) { + const dirs = tokens.slice(1).filter((token) => !token.startsWith("-")).flatMap(expandBraceToken); + const longFormat = tokens.some((token) => token.startsWith("-") && token.includes("l")); + return { kind: "ls", dirs: dirs.length > 0 ? dirs : ["/"], longFormat }; + } + if (tokens[0] === "find") { + if (pipeline.length > 3) + return null; + const dir = tokens[1]; + if (!dir) + return null; + const patterns = parseFindNamePatterns(tokens); + if (!patterns) + return null; + const countOnly = pipeline.length === 2 && /^wc\s+-l\s*$/.test(pipeline[1].trim()); + if (countOnly) { + if (patterns.length !== 1) + return null; + return { kind: "find", dir, pattern: patterns[0], countOnly }; + } + if (pipeline.length >= 2) { + const xargsTokens = tokenizeShellWords(pipeline[1].trim()); + if (!xargsTokens || xargsTokens[0] !== "xargs") + return null; + const xargsArgs = xargsTokens.slice(1); + while (xargsArgs[0] && xargsArgs[0].startsWith("-")) { + if (xargsArgs[0] === "-r") { + xargsArgs.shift(); + continue; + } + return null; + } + const grepCmd = xargsArgs.join(" "); + const grepParams2 = parseBashGrep(grepCmd); + if (!grepParams2) + return null; + let lineLimit = 0; + if (pipeline.length === 3) { + const headStage = pipeline[2].trim(); + if (!isValidPipelineHeadTailStage(headStage)) + return null; + const headTail = parseHeadTailStage(headStage); + if (!headTail || headTail.fromEnd) + return null; + lineLimit = headTail.lineLimit; + } + return { kind: "find_grep", dir, patterns, params: grepParams2, lineLimit }; + } + if (patterns.length !== 1) + return null; + return { kind: "find", dir, pattern: patterns[0], countOnly }; + } + const grepParams = parseBashGrep(clean); + if (grepParams) { + let lineLimit = 0; + if (pipeline.length > 1) { + if (pipeline.length !== 2) + return null; + const headStage = pipeline[1].trim(); + if (!isValidPipelineHeadTailStage(headStage)) + return null; + const headTail = parseHeadTailStage(headStage); + if (!headTail || headTail.fromEnd) + return null; + lineLimit = headTail.lineLimit; + } + return { kind: "grep", params: grepParams, lineLimit }; + } + return null; +} +function parseCompiledBashCommand(cmd) { + if (cmd.includes("||")) + return null; + const segments = splitTopLevel(cmd, ["&&", ";", "\n"]); + if (!segments || segments.length === 0) + return null; + const parsed = segments.map(parseCompiledSegment); + if (parsed.some((segment) => segment === null)) + return null; + return parsed; +} +function applyLineWindow(content, lineLimit, fromEnd) { + if (lineLimit <= 0) + return content; + const lines = content.split("\n"); + return (fromEnd ? lines.slice(-lineLimit) : lines.slice(0, lineLimit)).join("\n"); +} +function countLines(content) { + return content === "" ? 0 : content.split("\n").length; +} +function renderDirectoryListing(dir, rows, longFormat) { + const entries = /* @__PURE__ */ new Map(); + const prefix = dir === "/" ? "/" : `${dir}/`; + for (const row of rows) { + const path = row["path"]; + if (!path.startsWith(prefix) && dir !== "/") + continue; + const rest = dir === "/" ? path.slice(1) : path.slice(prefix.length); + const slash = rest.indexOf("/"); + const name = slash === -1 ? rest : rest.slice(0, slash); + if (!name) + continue; + const existing = entries.get(name); + if (slash !== -1) { + if (!existing) + entries.set(name, { isDir: true, size: 0 }); + } else { + entries.set(name, { isDir: false, size: Number(row["size_bytes"] ?? 0) }); + } + } + if (entries.size === 0) + return `ls: cannot access '${dir}': No such file or directory`; + const lines = []; + for (const [name, info] of [...entries].sort((a, b) => a[0].localeCompare(b[0]))) { + if (longFormat) { + const type = info.isDir ? "drwxr-xr-x" : "-rw-r--r--"; + const size = String(info.isDir ? 0 : info.size).padStart(6); + lines.push(`${type} 1 user user ${size} ${name}${info.isDir ? "/" : ""}`); + } else { + lines.push(name + (info.isDir ? "/" : "")); + } + } + return lines.join("\n"); +} +async function executeCompiledBashCommand(api, memoryTable, sessionsTable, cmd, deps = {}) { + const { readVirtualPathContentsFn = readVirtualPathContents, listVirtualPathRowsForDirsFn = listVirtualPathRowsForDirs, findVirtualPathsFn = findVirtualPaths, handleGrepDirectFn = handleGrepDirect } = deps; + const plan = parseCompiledBashCommand(cmd); + if (!plan) + return null; + const readPaths = [...new Set(plan.flatMap((segment) => segment.kind === "cat" ? segment.paths : []))]; + const listDirs = [...new Set(plan.flatMap((segment) => segment.kind === "ls" ? segment.dirs.map((dir) => dir.replace(/\/+$/, "") || "/") : []))]; + const contentMap = readPaths.length > 0 ? await readVirtualPathContentsFn(api, memoryTable, sessionsTable, readPaths) : /* @__PURE__ */ new Map(); + const dirRowsMap = listDirs.length > 0 ? await listVirtualPathRowsForDirsFn(api, memoryTable, sessionsTable, listDirs) : /* @__PURE__ */ new Map(); + const outputs = []; + for (const segment of plan) { + if (segment.kind === "echo") { + outputs.push(segment.text); + continue; + } + if (segment.kind === "cat") { + const contents = []; + for (const path of segment.paths) { + const content = contentMap.get(path) ?? null; + if (content === null) { + if (segment.ignoreMissing) + continue; + return null; + } + contents.push(content); + } + const combined = contents.join(""); + if (segment.countLines) { + outputs.push(`${countLines(combined)} ${segment.paths[0]}`); + } else { + outputs.push(applyLineWindow(combined, segment.lineLimit, segment.fromEnd)); + } + continue; + } + if (segment.kind === "ls") { + for (const dir of segment.dirs) { + outputs.push(renderDirectoryListing(dir.replace(/\/+$/, "") || "/", dirRowsMap.get(dir.replace(/\/+$/, "") || "/") ?? [], segment.longFormat)); + } + continue; + } + if (segment.kind === "find") { + const filenamePattern = sqlLike(segment.pattern).replace(/\*/g, "%").replace(/\?/g, "_"); + const paths = await findVirtualPathsFn(api, memoryTable, sessionsTable, segment.dir.replace(/\/+$/, "") || "/", filenamePattern); + outputs.push(segment.countOnly ? String(paths.length) : paths.join("\n") || "(no matches)"); + continue; + } + if (segment.kind === "find_grep") { + const dir = segment.dir.replace(/\/+$/, "") || "/"; + const candidateBatches = await Promise.all(segment.patterns.map((pattern) => findVirtualPathsFn(api, memoryTable, sessionsTable, dir, sqlLike(pattern).replace(/\*/g, "%").replace(/\?/g, "_")))); + const candidatePaths = [...new Set(candidateBatches.flat())]; + if (candidatePaths.length === 0) { + outputs.push("(no matches)"); + continue; + } + const candidateContents = await readVirtualPathContentsFn(api, memoryTable, sessionsTable, candidatePaths); + const matched = refineGrepMatches(candidatePaths.flatMap((path) => { + const content = candidateContents.get(path); + if (content === null || content === void 0) + return []; + return [{ path, content: normalizeContent(path, content) }]; + }), segment.params); + const limited = segment.lineLimit > 0 ? matched.slice(0, segment.lineLimit) : matched; + outputs.push(limited.join("\n") || "(no matches)"); + continue; + } + if (segment.kind === "grep") { + const result = await handleGrepDirectFn(api, memoryTable, sessionsTable, segment.params); + if (result === null) + return null; + if (segment.lineLimit > 0) { + outputs.push(result.split("\n").slice(0, segment.lineLimit).join("\n")); + } else { + outputs.push(result); + } + continue; + } + } + return capOutputForClaude(outputs.join("\n"), { kind: "bash" }); +} + +// dist/src/hooks/query-cache.js +import { mkdirSync as mkdirSync2, readFileSync as readFileSync3, rmSync, writeFileSync as writeFileSync2 } from "node:fs"; +import { join as join4 } from "node:path"; +import { homedir as homedir3 } from "node:os"; +var log3 = (msg) => log("query-cache", msg); +var DEFAULT_CACHE_ROOT = join4(homedir3(), ".deeplake", "query-cache"); +var INDEX_CACHE_FILE = "index.md"; +function getSessionQueryCacheDir(sessionId, deps = {}) { + const { cacheRoot = DEFAULT_CACHE_ROOT } = deps; + return join4(cacheRoot, sessionId); +} +function readCachedIndexContent(sessionId, deps = {}) { + const { logFn = log3 } = deps; + try { + return readFileSync3(join4(getSessionQueryCacheDir(sessionId, deps), INDEX_CACHE_FILE), "utf-8"); + } catch (e) { + if (e?.code === "ENOENT") + return null; + logFn(`read failed for session=${sessionId}: ${e.message}`); + return null; + } +} +function writeCachedIndexContent(sessionId, content, deps = {}) { + const { logFn = log3 } = deps; + try { + const dir = getSessionQueryCacheDir(sessionId, deps); + mkdirSync2(dir, { recursive: true }); + writeFileSync2(join4(dir, INDEX_CACHE_FILE), content, "utf-8"); + } catch (e) { + logFn(`write failed for session=${sessionId}: ${e.message}`); + } +} + +// dist/src/hooks/memory-path-utils.js +import { homedir as homedir4 } from "node:os"; +import { join as join5 } from "node:path"; +var MEMORY_PATH = join5(homedir4(), ".deeplake", "memory"); var TILDE_PATH = "~/.deeplake/memory"; var HOME_VAR_PATH = "$HOME/.deeplake/memory"; -var __bundleDir = dirname(fileURLToPath(import.meta.url)); -var SHELL_BUNDLE = existsSync2(join3(__bundleDir, "shell", "deeplake-shell.js")) ? join3(__bundleDir, "shell", "deeplake-shell.js") : join3(__bundleDir, "..", "shell", "deeplake-shell.js"); var SAFE_BUILTINS = /* @__PURE__ */ new Set([ - // filesystem "cat", "ls", "cp", @@ -744,7 +1748,6 @@ var SAFE_BUILTINS = /* @__PURE__ */ new Set([ "du", "tree", "file", - // text processing "grep", "egrep", "fgrep", @@ -771,31 +1774,24 @@ var SAFE_BUILTINS = /* @__PURE__ */ new Set([ "diff", "strings", "split", - // search "find", "xargs", "which", - // data formats "jq", "yq", "xan", "base64", "od", - // archives "tar", "gzip", "gunzip", "zcat", - // hashing "md5sum", "sha1sum", "sha256sum", - // output/io "echo", "printf", "tee", - "cat", - // path/env "pwd", "cd", "basename", @@ -804,7 +1800,6 @@ var SAFE_BUILTINS = /* @__PURE__ */ new Set([ "printenv", "hostname", "whoami", - // misc "date", "seq", "expr", @@ -819,7 +1814,6 @@ var SAFE_BUILTINS = /* @__PURE__ */ new Set([ "history", "help", "clear", - // shell control flow "for", "while", "do", @@ -849,6 +1843,39 @@ function touchesMemory(p) { function rewritePaths(cmd) { return cmd.replace(new RegExp(MEMORY_PATH.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") + "/?", "g"), "/").replace(/~\/.deeplake\/memory\/?/g, "/").replace(/\$HOME\/.deeplake\/memory\/?/g, "/").replace(/"\$HOME\/.deeplake\/memory\/?"/g, '"/"'); } + +// dist/src/hooks/pre-tool-use.js +var log4 = (msg) => log("pre", msg); +var __bundleDir = dirname(fileURLToPath2(import.meta.url)); +var SHELL_BUNDLE = existsSync3(join6(__bundleDir, "shell", "deeplake-shell.js")) ? join6(__bundleDir, "shell", "deeplake-shell.js") : join6(__bundleDir, "..", "shell", "deeplake-shell.js"); +var READ_CACHE_ROOT = join6(homedir5(), ".deeplake", "query-cache"); +function writeReadCacheFile(sessionId, virtualPath, content, deps = {}) { + const { cacheRoot = READ_CACHE_ROOT } = deps; + const safeSessionId = sessionId.replace(/[^a-zA-Z0-9._-]/g, "_") || "unknown"; + const rel = virtualPath.replace(/^\/+/, "") || "content"; + const expectedRoot = join6(cacheRoot, safeSessionId, "read"); + const absPath = join6(expectedRoot, rel); + if (absPath !== expectedRoot && !absPath.startsWith(expectedRoot + sep)) { + throw new Error(`writeReadCacheFile: path escapes cache root: ${absPath}`); + } + mkdirSync3(dirname(absPath), { recursive: true }); + writeFileSync3(absPath, content, "utf-8"); + return absPath; +} +function buildReadDecision(file_path, description) { + return { command: "", description, file_path }; +} +function getReadTargetPath(toolInput) { + const rawPath = toolInput.file_path ?? toolInput.path; + return rawPath ? rawPath : null; +} +function isLikelyDirectoryPath(virtualPath) { + const normalized = virtualPath.replace(/\/+$/, "") || "/"; + if (normalized === "/") + return true; + const base = normalized.split("/").pop() ?? ""; + return !base.includes("."); +} function getShellCommand(toolName, toolInput) { switch (toolName) { case "Grep": { @@ -865,10 +1892,10 @@ function getShellCommand(toolName, toolInput) { break; } case "Read": { - const fp = toolInput.file_path; + const fp = getReadTargetPath(toolInput); if (fp && touchesMemory(fp)) { - const virtualPath = rewritePaths(fp) || "/"; - return `cat ${virtualPath}`; + const rewritten = rewritePaths(fp) || "/"; + return `${isLikelyDirectoryPath(rewritten) ? "ls" : "cat"} ${rewritten}`; } break; } @@ -876,34 +1903,24 @@ function getShellCommand(toolName, toolInput) { const cmd = toolInput.command; if (!cmd || !touchesMemory(cmd)) break; - { - const rewritten = rewritePaths(cmd); - if (!isSafe(rewritten)) { - log3(`unsafe command blocked: ${rewritten}`); - return null; - } - return rewritten; + const rewritten = rewritePaths(cmd); + if (!isSafe(rewritten)) { + log4(`unsafe command blocked: ${rewritten}`); + return null; } - break; + return rewritten; } case "Glob": { const p = toolInput.path; - if (p && touchesMemory(p)) { - return `ls /`; - } + if (p && touchesMemory(p)) + return "ls /"; break; } } return null; } -function emitResult(command, description) { - console.log(JSON.stringify({ - hookSpecificOutput: { - hookEventName: "PreToolUse", - permissionDecision: "allow", - updatedInput: { command, description } - } - })); +function buildAllowDecision(command, description) { + return { command, description }; } function extractGrepParams(toolName, toolInput, shellCmd) { if (toolName === "Grep") { @@ -924,234 +1941,234 @@ function extractGrepParams(toolName, toolInput, shellCmd) { return parseBashGrep(shellCmd); return null; } -async function main() { - const input = await readStdin(); - log3(`hook fired: tool=${input.tool_name} input=${JSON.stringify(input.tool_input)}`); +function buildFallbackDecision(shellCmd, shellBundle = SHELL_BUNDLE) { + return buildAllowDecision(`node "${shellBundle}" -c "${shellCmd.replace(/"/g, '\\"')}"`, `[DeepLake shell] ${shellCmd}`); +} +async function processPreToolUse(input, deps = {}) { + const { config = loadConfig(), createApi = (table2, activeConfig) => new DeeplakeApi(activeConfig.token, activeConfig.apiUrl, activeConfig.orgId, activeConfig.workspaceId, table2), executeCompiledBashCommandFn = executeCompiledBashCommand, handleGrepDirectFn = handleGrepDirect, readVirtualPathContentsFn = readVirtualPathContents, readVirtualPathContentFn = readVirtualPathContent, listVirtualPathRowsFn = listVirtualPathRows, findVirtualPathsFn = findVirtualPaths, readCachedIndexContentFn = readCachedIndexContent, writeCachedIndexContentFn = writeCachedIndexContent, writeReadCacheFileFn = writeReadCacheFile, shellBundle = SHELL_BUNDLE, logFn = log4 } = deps; const cmd = input.tool_input.command ?? ""; const shellCmd = getShellCommand(input.tool_name, input.tool_input); - const toolPath = input.tool_input.file_path ?? input.tool_input.path ?? ""; + const toolPath = getReadTargetPath(input.tool_input) ?? input.tool_input.path ?? ""; if (!shellCmd && (touchesMemory(cmd) || touchesMemory(toolPath))) { const guidance = "[RETRY REQUIRED] The command you tried is not available for ~/.deeplake/memory/. This virtual filesystem only supports bash builtins: cat, ls, grep, echo, jq, head, tail, sed, awk, wc, sort, find, etc. python, python3, node, and curl are NOT available. You MUST rewrite your command using only the bash tools listed above and try again. For example, to parse JSON use: cat file.json | jq '.key'. To count keys: cat file.json | jq 'keys | length'."; - log3(`unsupported command, returning guidance: ${cmd}`); - console.log(JSON.stringify({ - hookSpecificOutput: { - hookEventName: "PreToolUse", - permissionDecision: "allow", - updatedInput: { - command: `echo ${JSON.stringify(guidance)}`, - description: "[DeepLake] unsupported command \u2014 rewrite using bash builtins" - } - } - })); - return; + logFn(`unsupported command, returning guidance: ${cmd}`); + return buildAllowDecision(`echo ${JSON.stringify(guidance)}`, "[DeepLake] unsupported command \u2014 rewrite using bash builtins"); } if (!shellCmd) - return; - const config = loadConfig(); - if (config) { - const table = process.env["HIVEMIND_TABLE"] ?? "memory"; - const sessionsTable = process.env["HIVEMIND_SESSIONS_TABLE"] ?? "sessions"; - const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, table); - try { - const grepParams = extractGrepParams(input.tool_name, input.tool_input, shellCmd); - if (grepParams) { - log3(`direct grep: pattern=${grepParams.pattern} path=${grepParams.targetPath}`); - const result = await handleGrepDirect(api, table, sessionsTable, grepParams); - if (result !== null) { - emitResult(`echo ${JSON.stringify(result)}`, `[DeepLake direct] grep ${grepParams.pattern}`); - return; - } + return null; + if (!config) + return buildFallbackDecision(shellCmd, shellBundle); + const table = process.env["HIVEMIND_TABLE"] ?? "memory"; + const sessionsTable = process.env["HIVEMIND_SESSIONS_TABLE"] ?? "sessions"; + const api = createApi(table, config); + const readVirtualPathContentsWithCache = async (cachePaths) => { + const uniquePaths = [...new Set(cachePaths)]; + const result = new Map(uniquePaths.map((path) => [path, null])); + const cachedIndex = uniquePaths.includes("/index.md") ? readCachedIndexContentFn(input.session_id) : null; + const remainingPaths = cachedIndex === null ? uniquePaths : uniquePaths.filter((path) => path !== "/index.md"); + if (cachedIndex !== null) { + result.set("/index.md", cachedIndex); + } + if (remainingPaths.length > 0) { + const fetched = await readVirtualPathContentsFn(api, table, sessionsTable, remainingPaths); + for (const [path, content] of fetched) + result.set(path, content); + } + const fetchedIndex = result.get("/index.md"); + if (typeof fetchedIndex === "string") { + writeCachedIndexContentFn(input.session_id, fetchedIndex); + } + return result; + }; + try { + if (input.tool_name === "Bash") { + const compiled = await executeCompiledBashCommandFn(api, table, sessionsTable, shellCmd, { + readVirtualPathContentsFn: async (_api, _memoryTable, _sessionsTable, cachePaths) => readVirtualPathContentsWithCache(cachePaths) + }); + if (compiled !== null) { + return buildAllowDecision(`echo ${JSON.stringify(compiled)}`, `[DeepLake compiled] ${shellCmd}`); } - { - let virtualPath = null; - let lineLimit = 0; - let fromEnd = false; - if (input.tool_name === "Read") { - virtualPath = rewritePaths(input.tool_input.file_path ?? ""); - } else if (input.tool_name === "Bash") { - const catCmd = shellCmd.replace(/\s+2>\S+/g, "").trim(); - const catPipeHead = catCmd.match(/^cat\s+(\S+?)\s*(?:\|[^|]*)*\|\s*head\s+(?:-n?\s*)?(-?\d+)\s*$/); - if (catPipeHead) { - virtualPath = catPipeHead[1]; - lineLimit = Math.abs(parseInt(catPipeHead[2], 10)); - } - if (!virtualPath) { - const catMatch = catCmd.match(/^cat\s+(\S+)\s*$/); - if (catMatch) - virtualPath = catMatch[1]; - } - if (!virtualPath) { - const headMatch = shellCmd.match(/^head\s+(?:-n\s*)?(-?\d+)\s+(\S+)\s*$/) ?? shellCmd.match(/^head\s+(\S+)\s*$/); - if (headMatch) { - if (headMatch[2]) { - virtualPath = headMatch[2]; - lineLimit = Math.abs(parseInt(headMatch[1], 10)); - } else { - virtualPath = headMatch[1]; - lineLimit = 10; - } - } - } - if (!virtualPath) { - const tailMatch = shellCmd.match(/^tail\s+(?:-n\s*)?(-?\d+)\s+(\S+)\s*$/) ?? shellCmd.match(/^tail\s+(\S+)\s*$/); - if (tailMatch) { - fromEnd = true; - if (tailMatch[2]) { - virtualPath = tailMatch[2]; - lineLimit = Math.abs(parseInt(tailMatch[1], 10)); - } else { - virtualPath = tailMatch[1]; - lineLimit = 10; - } - } - } - if (!virtualPath) { - const wcMatch = shellCmd.match(/^wc\s+-l\s+(\S+)\s*$/); - if (wcMatch) { - virtualPath = wcMatch[1]; - lineLimit = -1; - } + } + const grepParams = extractGrepParams(input.tool_name, input.tool_input, shellCmd); + if (grepParams) { + logFn(`direct grep: pattern=${grepParams.pattern} path=${grepParams.targetPath}`); + const result = await handleGrepDirectFn(api, table, sessionsTable, grepParams); + if (result !== null) + return buildAllowDecision(`echo ${JSON.stringify(result)}`, `[DeepLake direct] grep ${grepParams.pattern}`); + } + let virtualPath = null; + let lineLimit = 0; + let fromEnd = false; + let lsDir = null; + let longFormat = false; + if (input.tool_name === "Read") { + virtualPath = rewritePaths(getReadTargetPath(input.tool_input) ?? ""); + if (virtualPath && isLikelyDirectoryPath(virtualPath)) { + lsDir = virtualPath.replace(/\/+$/, "") || "/"; + virtualPath = null; + } + } else if (input.tool_name === "Bash") { + const catCmd = shellCmd.replace(/\s+2>\S+/g, "").trim(); + const catPipeHead = catCmd.match(/^cat\s+(\S+?)\s*(?:\|[^|]*)*\|\s*head\s+(?:-n?\s*)?(-?\d+)\s*$/); + if (catPipeHead) { + virtualPath = catPipeHead[1]; + lineLimit = Math.abs(parseInt(catPipeHead[2], 10)); + } + if (!virtualPath) { + const catMatch = catCmd.match(/^cat\s+(\S+)\s*$/); + if (catMatch) + virtualPath = catMatch[1]; + } + if (!virtualPath) { + const headMatch = shellCmd.match(/^head\s+(?:-n\s*)?(-?\d+)\s+(\S+)\s*$/) ?? shellCmd.match(/^head\s+(\S+)\s*$/); + if (headMatch) { + if (headMatch[2]) { + virtualPath = headMatch[2]; + lineLimit = Math.abs(parseInt(headMatch[1], 10)); + } else { + virtualPath = headMatch[1]; + lineLimit = 10; } } - if (virtualPath && !virtualPath.endsWith("/")) { - log3(`direct read: ${virtualPath}`); - let content = null; - if (virtualPath.startsWith("/sessions/")) { - try { - const sessionRows = await api.query(`SELECT message::text AS content FROM "${sessionsTable}" WHERE path = '${sqlStr(virtualPath)}' LIMIT 1`); - if (sessionRows.length > 0 && sessionRows[0]["content"]) { - content = sessionRows[0]["content"]; - } - } catch { - } + } + if (!virtualPath) { + const tailMatch = shellCmd.match(/^tail\s+(?:-n\s*)?(-?\d+)\s+(\S+)\s*$/) ?? shellCmd.match(/^tail\s+(\S+)\s*$/); + if (tailMatch) { + fromEnd = true; + if (tailMatch[2]) { + virtualPath = tailMatch[2]; + lineLimit = Math.abs(parseInt(tailMatch[1], 10)); } else { - const rows = await api.query(`SELECT summary FROM "${table}" WHERE path = '${sqlStr(virtualPath)}' LIMIT 1`); - if (rows.length > 0 && rows[0]["summary"]) { - content = rows[0]["summary"]; - } else if (virtualPath === "/index.md") { - const idxRows = await api.query(`SELECT path, project, description, creation_date FROM "${table}" WHERE path LIKE '/summaries/%' ORDER BY creation_date DESC`); - const lines = ["# Memory Index", "", `${idxRows.length} sessions:`, ""]; - for (const r of idxRows) { - const p = r["path"]; - const proj = r["project"] || ""; - const desc = (r["description"] || "").slice(0, 120); - const date = (r["creation_date"] || "").slice(0, 10); - lines.push(`- [${p}](${p}) ${date} ${proj ? `[${proj}]` : ""} ${desc}`); - } - content = lines.join("\n"); - } - } - if (content !== null) { - if (lineLimit === -1) { - const count = content.split("\n").length; - emitResult(`echo ${JSON.stringify(`${count} ${virtualPath}`)}`, `[DeepLake direct] wc -l ${virtualPath}`); - return; - } - if (lineLimit > 0) { - const lines = content.split("\n"); - content = fromEnd ? lines.slice(-lineLimit).join("\n") : lines.slice(0, lineLimit).join("\n"); - } - const label = lineLimit > 0 ? fromEnd ? `tail -${lineLimit}` : `head -${lineLimit}` : "cat"; - emitResult(`echo ${JSON.stringify(content)}`, `[DeepLake direct] ${label} ${virtualPath}`); - return; + virtualPath = tailMatch[1]; + lineLimit = 10; } } } - { - let lsDir = null; - let longFormat = false; - if (input.tool_name === "Glob") { - lsDir = rewritePaths(input.tool_input.path ?? "") || "/"; - } else if (input.tool_name === "Bash") { - const lsMatch = shellCmd.match(/^ls\s+(?:-([a-zA-Z]+)\s+)?(\S+)?\s*$/); - if (lsMatch) { - lsDir = lsMatch[2] ?? "/"; - longFormat = (lsMatch[1] ?? "").includes("l"); - } + if (!virtualPath) { + const wcMatch = shellCmd.match(/^wc\s+-l\s+(\S+)\s*$/); + if (wcMatch) { + virtualPath = wcMatch[1]; + lineLimit = -1; } - if (lsDir) { - const dir = lsDir.replace(/\/+$/, "") || "/"; - log3(`direct ls: ${dir}`); - const isSessionDir = dir === "/sessions" || dir.startsWith("/sessions/"); - const isRoot = dir === "/"; - const lsQueries = []; - if (!isSessionDir) { - lsQueries.push(api.query(`SELECT path, size_bytes FROM "${table}" WHERE path LIKE '${sqlLike(dir === "/" ? "" : dir)}/%' ORDER BY path`).catch(() => [])); - } - if (isSessionDir || isRoot) { - lsQueries.push(api.query(`SELECT path, size_bytes FROM "${sessionsTable}" WHERE path LIKE '${sqlLike(dir === "/" ? "" : dir)}/%' ORDER BY path`).catch(() => [])); - } - const rows = (await Promise.all(lsQueries)).flat(); - const entries = /* @__PURE__ */ new Map(); - const prefix = dir === "/" ? "/" : dir + "/"; - for (const row of rows) { - const p = row["path"]; - if (!p.startsWith(prefix) && dir !== "/") - continue; - const rest = dir === "/" ? p.slice(1) : p.slice(prefix.length); - const slash = rest.indexOf("/"); - const name = slash === -1 ? rest : rest.slice(0, slash); - if (!name) - continue; - const existing = entries.get(name); - if (slash !== -1) { - if (!existing) - entries.set(name, { isDir: true, size: 0 }); - } else { - entries.set(name, { isDir: false, size: row["size_bytes"] ?? 0 }); - } - } - const lines = []; - for (const [name, info] of [...entries].sort((a, b) => a[0].localeCompare(b[0]))) { - if (longFormat) { - const type = info.isDir ? "drwxr-xr-x" : "-rw-r--r--"; - const size = String(info.isDir ? 0 : info.size).padStart(6); - lines.push(`${type} 1 user user ${size} ${name}${info.isDir ? "/" : ""}`); - } else { - lines.push(name + (info.isDir ? "/" : "")); - } - } - emitResult(`echo ${JSON.stringify(lines.join("\n") || "(empty directory)")}`, `[DeepLake direct] ls ${dir}`); - return; + } + } + if (virtualPath && !virtualPath.endsWith("/")) { + logFn(`direct read: ${virtualPath}`); + let content = virtualPath === "/index.md" ? readCachedIndexContentFn(input.session_id) : null; + if (content === null) { + content = await readVirtualPathContentFn(api, table, sessionsTable, virtualPath); + } + if (content !== null) { + if (virtualPath === "/index.md") { + writeCachedIndexContentFn(input.session_id, content); } + if (lineLimit === -1) + return buildAllowDecision(`echo ${JSON.stringify(`${content.split("\n").length} ${virtualPath}`)}`, `[DeepLake direct] wc -l ${virtualPath}`); + if (lineLimit > 0) { + const lines = content.split("\n"); + content = fromEnd ? lines.slice(-lineLimit).join("\n") : lines.slice(0, lineLimit).join("\n"); + } + const label = lineLimit > 0 ? fromEnd ? `tail -${lineLimit}` : `head -${lineLimit}` : "cat"; + if (input.tool_name === "Read") { + const file_path = writeReadCacheFileFn(input.session_id, virtualPath, content); + return buildReadDecision(file_path, `[DeepLake direct] ${label} ${virtualPath}`); + } + const capped = capOutputForClaude(content, { kind: label }); + return buildAllowDecision(`echo ${JSON.stringify(capped)}`, `[DeepLake direct] ${label} ${virtualPath}`); } - if (input.tool_name === "Bash") { - const findMatch = shellCmd.match(/^find\s+(\S+)\s+(?:-type\s+\S+\s+)?-name\s+'([^']+)'/); - if (findMatch) { - const dir = findMatch[1].replace(/\/+$/, "") || "/"; - const namePattern = sqlLike(findMatch[2]).replace(/\*/g, "%").replace(/\?/g, "_"); - log3(`direct find: ${dir} -name '${findMatch[2]}'`); - const isSessionDir = dir === "/sessions" || dir.startsWith("/sessions/"); - const findTable = isSessionDir ? sessionsTable : table; - const rows = await api.query(`SELECT path FROM "${findTable}" WHERE path LIKE '${sqlLike(dir === "/" ? "" : dir)}/%' AND filename LIKE '${namePattern}' ORDER BY path`); - let result = rows.map((r) => r["path"]).join("\n") || ""; - if (/\|\s*wc\s+-l\s*$/.test(shellCmd)) { - result = String(rows.length); - } - emitResult(`echo ${JSON.stringify(result || "(no matches)")}`, `[DeepLake direct] find ${dir}`); - return; + } + if (!lsDir && input.tool_name === "Glob") { + lsDir = rewritePaths(input.tool_input.path ?? "") || "/"; + } else if (input.tool_name === "Bash") { + const lsMatch = shellCmd.match(/^ls\s+(?:-([a-zA-Z]+)\s+)?(\S+)?\s*$/); + if (lsMatch) { + lsDir = lsMatch[2] ?? "/"; + longFormat = (lsMatch[1] ?? "").includes("l"); + } + } + if (lsDir) { + const dir = lsDir.replace(/\/+$/, "") || "/"; + logFn(`direct ls: ${dir}`); + const rows = await listVirtualPathRowsFn(api, table, sessionsTable, dir); + const entries = /* @__PURE__ */ new Map(); + const prefix = dir === "/" ? "/" : dir + "/"; + for (const row of rows) { + const p = row["path"]; + if (!p.startsWith(prefix) && dir !== "/") + continue; + const rest = dir === "/" ? p.slice(1) : p.slice(prefix.length); + const slash = rest.indexOf("/"); + const name = slash === -1 ? rest : rest.slice(0, slash); + if (!name) + continue; + const existing = entries.get(name); + if (slash !== -1) { + if (!existing) + entries.set(name, { isDir: true, size: 0 }); + } else { + entries.set(name, { isDir: false, size: row["size_bytes"] ?? 0 }); } } - } catch (e) { - log3(`direct query failed, falling back to shell: ${e.message}`); + const lines = []; + for (const [name, info] of [...entries].sort((a, b) => a[0].localeCompare(b[0]))) { + if (longFormat) { + const type = info.isDir ? "drwxr-xr-x" : "-rw-r--r--"; + const size = String(info.isDir ? 0 : info.size).padStart(6); + lines.push(`${type} 1 user user ${size} ${name}${info.isDir ? "/" : ""}`); + } else { + lines.push(name + (info.isDir ? "/" : "")); + } + } + const lsOutput = capOutputForClaude(lines.join("\n") || "(empty directory)", { kind: "ls" }); + return buildAllowDecision(`echo ${JSON.stringify(lsOutput)}`, `[DeepLake direct] ls ${dir}`); } + if (input.tool_name === "Bash") { + const findMatch = shellCmd.match(/^find\s+(\S+)\s+(?:-type\s+\S+\s+)?-name\s+'([^']+)'/); + if (findMatch) { + const dir = findMatch[1].replace(/\/+$/, "") || "/"; + const namePattern = sqlLike(findMatch[2]).replace(/\*/g, "%").replace(/\?/g, "_"); + logFn(`direct find: ${dir} -name '${findMatch[2]}'`); + const paths = await findVirtualPathsFn(api, table, sessionsTable, dir, namePattern); + let result = paths.join("\n") || ""; + if (/\|\s*wc\s+-l\s*$/.test(shellCmd)) + result = String(paths.length); + const capped = capOutputForClaude(result || "(no matches)", { kind: "find" }); + return buildAllowDecision(`echo ${JSON.stringify(capped)}`, `[DeepLake direct] find ${dir}`); + } + } + } catch (e) { + logFn(`direct query failed, falling back to shell: ${e.message}`); } - log3(`intercepted \u2192 rewriting to shell: ${shellCmd}`); - const rewrittenCommand = `node "${SHELL_BUNDLE}" -c "${shellCmd.replace(/"/g, '\\"')}"`; - const output = { + return buildFallbackDecision(shellCmd, shellBundle); +} +async function main() { + const input = await readStdin(); + const decision = await processPreToolUse(input); + if (!decision) + return; + const updatedInput = decision.file_path !== void 0 ? { file_path: decision.file_path } : { command: decision.command, description: decision.description }; + console.log(JSON.stringify({ hookSpecificOutput: { hookEventName: "PreToolUse", permissionDecision: "allow", - updatedInput: { - command: rewrittenCommand, - description: `[DeepLake] ${shellCmd}` - } + updatedInput } - }; - log3(`rewritten: ${rewrittenCommand}`); - console.log(JSON.stringify(output)); + })); } -main().catch((e) => { - log3(`fatal: ${e.message}`); - process.exit(0); -}); +if (isDirectRun(import.meta.url)) { + main().catch((e) => { + log4(`fatal: ${e.message}`); + process.exit(0); + }); +} +export { + buildAllowDecision, + buildReadDecision, + extractGrepParams, + getShellCommand, + isSafe, + processPreToolUse, + rewritePaths, + touchesMemory, + writeReadCacheFile +}; diff --git a/claude-code/bundle/session-start-setup.js b/claude-code/bundle/session-start-setup.js index bec63e9..c0f05cc 100755 --- a/claude-code/bundle/session-start-setup.js +++ b/claude-code/bundle/session-start-setup.js @@ -2,7 +2,7 @@ // dist/src/hooks/session-start-setup.js import { fileURLToPath } from "node:url"; -import { dirname as dirname2, join as join6 } from "node:path"; +import { dirname as dirname2, join as join7 } from "node:path"; import { execSync as execSync2 } from "node:child_process"; import { homedir as homedir4 } from "node:os"; @@ -66,6 +66,9 @@ function loadConfig() { // dist/src/deeplake-api.js import { randomUUID } from "node:crypto"; +import { existsSync as existsSync3, mkdirSync as mkdirSync2, readFileSync as readFileSync3, writeFileSync as writeFileSync2 } from "node:fs"; +import { join as join4 } from "node:path"; +import { tmpdir } from "node:os"; // dist/src/utils/debug.js import { appendFileSync } from "node:fs"; @@ -90,27 +93,48 @@ function sqlStr(value) { // dist/src/deeplake-api.js var log2 = (msg) => log("sdk", msg); -var TRACE_SQL = (process.env.HIVEMIND_TRACE_SQL ?? process.env.DEEPLAKE_TRACE_SQL) === "1" || (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; -var DEBUG_FILE_LOG = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; function summarizeSql(sql, maxLen = 220) { const compact = sql.replace(/\s+/g, " ").trim(); return compact.length > maxLen ? `${compact.slice(0, maxLen)}...` : compact; } function traceSql(msg) { - if (!TRACE_SQL) + const traceEnabled = (process.env.HIVEMIND_TRACE_SQL ?? process.env.DEEPLAKE_TRACE_SQL) === "1" || (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; + if (!traceEnabled) return; process.stderr.write(`[deeplake-sql] ${msg} `); - if (DEBUG_FILE_LOG) + const debugFileLog = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; + if (debugFileLog) log2(msg); } var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; var MAX_CONCURRENCY = 5; +var QUERY_TIMEOUT_MS = Number(process.env["HIVEMIND_QUERY_TIMEOUT_MS"] ?? process.env["DEEPLAKE_QUERY_TIMEOUT_MS"] ?? 1e4); +var INDEX_MARKER_TTL_MS = Number(process.env["HIVEMIND_INDEX_MARKER_TTL_MS"] ?? 6 * 60 * 6e4); function sleep(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } +function isTimeoutError(error) { + const name = error instanceof Error ? error.name.toLowerCase() : ""; + const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase(); + return name.includes("timeout") || name === "aborterror" || message.includes("timeout") || message.includes("timed out"); +} +function isDuplicateIndexError(error) { + const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase(); + return message.includes("duplicate key value violates unique constraint") || message.includes("pg_class_relname_nsp_index") || message.includes("already exists"); +} +function isSessionInsertQuery(sql) { + return /^\s*insert\s+into\s+"[^"]+"\s*\(\s*id\s*,\s*path\s*,\s*filename\s*,\s*message\s*,/i.test(sql); +} +function isTransientHtml403(text) { + const body = text.toLowerCase(); + return body.includes(" Object.fromEntries(raw.columns.map((col, i) => [col, row[i]]))); } const text = await resp.text().catch(() => ""); - if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { + const retryable403 = isSessionInsertQuery(sql) && (resp.status === 401 || resp.status === 403 && (text.length === 0 || isTransientHtml403(text))); + if (attempt < MAX_RETRIES && (RETRYABLE_CODES.has(resp.status) || retryable403)) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`); await sleep(delay); @@ -263,8 +295,61 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + buildLookupIndexName(table, suffix) { + return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); + } + getLookupIndexMarkerPath(table, suffix) { + const markerKey = [ + this.workspaceId, + this.orgId, + table, + suffix + ].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_"); + return join4(getIndexMarkerDir(), `${markerKey}.json`); + } + hasFreshLookupIndexMarker(table, suffix) { + const markerPath = this.getLookupIndexMarkerPath(table, suffix); + if (!existsSync3(markerPath)) + return false; + try { + const raw = JSON.parse(readFileSync3(markerPath, "utf-8")); + const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN; + if (!Number.isFinite(updatedAt) || Date.now() - updatedAt > INDEX_MARKER_TTL_MS) + return false; + return true; + } catch { + return false; + } + } + markLookupIndexReady(table, suffix) { + mkdirSync2(getIndexMarkerDir(), { recursive: true }); + writeFileSync2(this.getLookupIndexMarkerPath(table, suffix), JSON.stringify({ updatedAt: (/* @__PURE__ */ new Date()).toISOString() }), "utf-8"); + } + async ensureLookupIndex(table, suffix, columnsSql) { + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + const indexName = this.buildLookupIndexName(table, suffix); + try { + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" ${columnsSql}`); + this.markLookupIndexReady(table, suffix); + } catch (e) { + if (isDuplicateIndexError(e)) { + this.markLookupIndexReady(table, suffix); + return; + } + log2(`index "${indexName}" skipped: ${e.message}`); + } + } /** List all tables in the workspace (with retry). */ - async listTables() { + async listTables(forceRefresh = false) { + if (!forceRefresh && this._tablesCache) + return [...this._tablesCache]; + const { tables, cacheable } = await this._fetchTables(); + if (cacheable) + this._tablesCache = [...tables]; + return tables; + } + async _fetchTables() { for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { try { const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, { @@ -275,22 +360,25 @@ var DeeplakeApi = class { }); if (resp.ok) { const data = await resp.json(); - return (data.tables ?? []).map((t) => t.table_name); + return { + tables: (data.tables ?? []).map((t) => t.table_name), + cacheable: true + }; } if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { await sleep(BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200); continue; } - return []; + return { tables: [], cacheable: false }; } catch { if (attempt < MAX_RETRIES) { await sleep(BASE_DELAY_MS * Math.pow(2, attempt)); continue; } - return []; + return { tables: [], cacheable: false }; } } - return []; + return { tables: [], cacheable: false }; } /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */ async ensureTable(name) { @@ -300,6 +388,8 @@ var DeeplakeApi = class { log2(`table "${tbl}" not found, creating`); await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); + if (!tables.includes(tbl)) + this._tablesCache = [...tables, tbl]; } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ @@ -309,7 +399,10 @@ var DeeplakeApi = class { log2(`table "${name}" not found, creating`); await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${name}" created`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; } + await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); } }; @@ -331,22 +424,22 @@ function readStdin() { } // dist/src/utils/version-check.js -import { readFileSync as readFileSync3 } from "node:fs"; -import { dirname, join as join4 } from "node:path"; +import { readFileSync as readFileSync4 } from "node:fs"; +import { dirname, join as join5 } from "node:path"; var GITHUB_RAW_PKG = "https://raw.githubusercontent.com/activeloopai/hivemind/main/package.json"; function getInstalledVersion(bundleDir, pluginManifestDir) { try { - const pluginJson = join4(bundleDir, "..", pluginManifestDir, "plugin.json"); - const plugin = JSON.parse(readFileSync3(pluginJson, "utf-8")); + const pluginJson = join5(bundleDir, "..", pluginManifestDir, "plugin.json"); + const plugin = JSON.parse(readFileSync4(pluginJson, "utf-8")); if (plugin.version) return plugin.version; } catch { } let dir = bundleDir; for (let i = 0; i < 5; i++) { - const candidate = join4(dir, "package.json"); + const candidate = join5(dir, "package.json"); try { - const pkg = JSON.parse(readFileSync3(candidate, "utf-8")); + const pkg = JSON.parse(readFileSync4(candidate, "utf-8")); if ((pkg.name === "hivemind" || pkg.name === "hivemind-codex") && pkg.version) return pkg.version; } catch { @@ -377,15 +470,15 @@ function isNewer(latest, current) { } // dist/src/utils/wiki-log.js -import { mkdirSync as mkdirSync2, appendFileSync as appendFileSync2 } from "node:fs"; -import { join as join5 } from "node:path"; +import { mkdirSync as mkdirSync3, appendFileSync as appendFileSync2 } from "node:fs"; +import { join as join6 } from "node:path"; function makeWikiLogger(hooksDir, filename = "deeplake-wiki.log") { - const path = join5(hooksDir, filename); + const path = join6(hooksDir, filename); return { path, log(msg) { try { - mkdirSync2(hooksDir, { recursive: true }); + mkdirSync3(hooksDir, { recursive: true }); appendFileSync2(path, `[${utcTimestamp()}] ${msg} `); } catch { @@ -397,7 +490,7 @@ function makeWikiLogger(hooksDir, filename = "deeplake-wiki.log") { // dist/src/hooks/session-start-setup.js var log3 = (msg) => log("session-setup", msg); var __bundleDir = dirname2(fileURLToPath(import.meta.url)); -var { log: wikiLog } = makeWikiLogger(join6(homedir4(), ".claude", "hooks")); +var { log: wikiLog } = makeWikiLogger(join7(homedir4(), ".claude", "hooks")); async function main() { if (process.env.HIVEMIND_WIKI_WORKER === "1") return; diff --git a/claude-code/bundle/session-start.js b/claude-code/bundle/session-start.js index f136de0..1f815ee 100755 --- a/claude-code/bundle/session-start.js +++ b/claude-code/bundle/session-start.js @@ -2,7 +2,7 @@ // dist/src/hooks/session-start.js import { fileURLToPath } from "node:url"; -import { dirname as dirname2, join as join6 } from "node:path"; +import { dirname as dirname2, join as join7 } from "node:path"; import { readdirSync, rmSync } from "node:fs"; import { execSync as execSync2 } from "node:child_process"; import { homedir as homedir4 } from "node:os"; @@ -67,6 +67,9 @@ function loadConfig() { // dist/src/deeplake-api.js import { randomUUID } from "node:crypto"; +import { existsSync as existsSync3, mkdirSync as mkdirSync2, readFileSync as readFileSync3, writeFileSync as writeFileSync2 } from "node:fs"; +import { join as join4 } from "node:path"; +import { tmpdir } from "node:os"; // dist/src/utils/debug.js import { appendFileSync } from "node:fs"; @@ -91,27 +94,48 @@ function sqlStr(value) { // dist/src/deeplake-api.js var log2 = (msg) => log("sdk", msg); -var TRACE_SQL = (process.env.HIVEMIND_TRACE_SQL ?? process.env.DEEPLAKE_TRACE_SQL) === "1" || (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; -var DEBUG_FILE_LOG = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; function summarizeSql(sql, maxLen = 220) { const compact = sql.replace(/\s+/g, " ").trim(); return compact.length > maxLen ? `${compact.slice(0, maxLen)}...` : compact; } function traceSql(msg) { - if (!TRACE_SQL) + const traceEnabled = (process.env.HIVEMIND_TRACE_SQL ?? process.env.DEEPLAKE_TRACE_SQL) === "1" || (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; + if (!traceEnabled) return; process.stderr.write(`[deeplake-sql] ${msg} `); - if (DEBUG_FILE_LOG) + const debugFileLog = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; + if (debugFileLog) log2(msg); } var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; var MAX_CONCURRENCY = 5; +var QUERY_TIMEOUT_MS = Number(process.env["HIVEMIND_QUERY_TIMEOUT_MS"] ?? process.env["DEEPLAKE_QUERY_TIMEOUT_MS"] ?? 1e4); +var INDEX_MARKER_TTL_MS = Number(process.env["HIVEMIND_INDEX_MARKER_TTL_MS"] ?? 6 * 60 * 6e4); function sleep(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } +function isTimeoutError(error) { + const name = error instanceof Error ? error.name.toLowerCase() : ""; + const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase(); + return name.includes("timeout") || name === "aborterror" || message.includes("timeout") || message.includes("timed out"); +} +function isDuplicateIndexError(error) { + const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase(); + return message.includes("duplicate key value violates unique constraint") || message.includes("pg_class_relname_nsp_index") || message.includes("already exists"); +} +function isSessionInsertQuery(sql) { + return /^\s*insert\s+into\s+"[^"]+"\s*\(\s*id\s*,\s*path\s*,\s*filename\s*,\s*message\s*,/i.test(sql); +} +function isTransientHtml403(text) { + const body = text.toLowerCase(); + return body.includes(" Object.fromEntries(raw.columns.map((col, i) => [col, row[i]]))); } const text = await resp.text().catch(() => ""); - if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { + const retryable403 = isSessionInsertQuery(sql) && (resp.status === 401 || resp.status === 403 && (text.length === 0 || isTransientHtml403(text))); + if (attempt < MAX_RETRIES && (RETRYABLE_CODES.has(resp.status) || retryable403)) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`); await sleep(delay); @@ -264,8 +296,61 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + buildLookupIndexName(table, suffix) { + return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); + } + getLookupIndexMarkerPath(table, suffix) { + const markerKey = [ + this.workspaceId, + this.orgId, + table, + suffix + ].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_"); + return join4(getIndexMarkerDir(), `${markerKey}.json`); + } + hasFreshLookupIndexMarker(table, suffix) { + const markerPath = this.getLookupIndexMarkerPath(table, suffix); + if (!existsSync3(markerPath)) + return false; + try { + const raw = JSON.parse(readFileSync3(markerPath, "utf-8")); + const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN; + if (!Number.isFinite(updatedAt) || Date.now() - updatedAt > INDEX_MARKER_TTL_MS) + return false; + return true; + } catch { + return false; + } + } + markLookupIndexReady(table, suffix) { + mkdirSync2(getIndexMarkerDir(), { recursive: true }); + writeFileSync2(this.getLookupIndexMarkerPath(table, suffix), JSON.stringify({ updatedAt: (/* @__PURE__ */ new Date()).toISOString() }), "utf-8"); + } + async ensureLookupIndex(table, suffix, columnsSql) { + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + const indexName = this.buildLookupIndexName(table, suffix); + try { + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" ${columnsSql}`); + this.markLookupIndexReady(table, suffix); + } catch (e) { + if (isDuplicateIndexError(e)) { + this.markLookupIndexReady(table, suffix); + return; + } + log2(`index "${indexName}" skipped: ${e.message}`); + } + } /** List all tables in the workspace (with retry). */ - async listTables() { + async listTables(forceRefresh = false) { + if (!forceRefresh && this._tablesCache) + return [...this._tablesCache]; + const { tables, cacheable } = await this._fetchTables(); + if (cacheable) + this._tablesCache = [...tables]; + return tables; + } + async _fetchTables() { for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { try { const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, { @@ -276,22 +361,25 @@ var DeeplakeApi = class { }); if (resp.ok) { const data = await resp.json(); - return (data.tables ?? []).map((t) => t.table_name); + return { + tables: (data.tables ?? []).map((t) => t.table_name), + cacheable: true + }; } if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { await sleep(BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200); continue; } - return []; + return { tables: [], cacheable: false }; } catch { if (attempt < MAX_RETRIES) { await sleep(BASE_DELAY_MS * Math.pow(2, attempt)); continue; } - return []; + return { tables: [], cacheable: false }; } } - return []; + return { tables: [], cacheable: false }; } /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */ async ensureTable(name) { @@ -301,6 +389,8 @@ var DeeplakeApi = class { log2(`table "${tbl}" not found, creating`); await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); + if (!tables.includes(tbl)) + this._tablesCache = [...tables, tbl]; } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ @@ -310,7 +400,10 @@ var DeeplakeApi = class { log2(`table "${name}" not found, creating`); await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${name}" created`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; } + await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); } }; @@ -332,22 +425,22 @@ function readStdin() { } // dist/src/utils/version-check.js -import { readFileSync as readFileSync3 } from "node:fs"; -import { dirname, join as join4 } from "node:path"; +import { readFileSync as readFileSync4 } from "node:fs"; +import { dirname, join as join5 } from "node:path"; var GITHUB_RAW_PKG = "https://raw.githubusercontent.com/activeloopai/hivemind/main/package.json"; function getInstalledVersion(bundleDir, pluginManifestDir) { try { - const pluginJson = join4(bundleDir, "..", pluginManifestDir, "plugin.json"); - const plugin = JSON.parse(readFileSync3(pluginJson, "utf-8")); + const pluginJson = join5(bundleDir, "..", pluginManifestDir, "plugin.json"); + const plugin = JSON.parse(readFileSync4(pluginJson, "utf-8")); if (plugin.version) return plugin.version; } catch { } let dir = bundleDir; for (let i = 0; i < 5; i++) { - const candidate = join4(dir, "package.json"); + const candidate = join5(dir, "package.json"); try { - const pkg = JSON.parse(readFileSync3(candidate, "utf-8")); + const pkg = JSON.parse(readFileSync4(candidate, "utf-8")); if ((pkg.name === "hivemind" || pkg.name === "hivemind-codex") && pkg.version) return pkg.version; } catch { @@ -378,15 +471,15 @@ function isNewer(latest, current) { } // dist/src/utils/wiki-log.js -import { mkdirSync as mkdirSync2, appendFileSync as appendFileSync2 } from "node:fs"; -import { join as join5 } from "node:path"; +import { mkdirSync as mkdirSync3, appendFileSync as appendFileSync2 } from "node:fs"; +import { join as join6 } from "node:path"; function makeWikiLogger(hooksDir, filename = "deeplake-wiki.log") { - const path = join5(hooksDir, filename); + const path = join6(hooksDir, filename); return { path, log(msg) { try { - mkdirSync2(hooksDir, { recursive: true }); + mkdirSync3(hooksDir, { recursive: true }); appendFileSync2(path, `[${utcTimestamp()}] ${msg} `); } catch { @@ -398,7 +491,7 @@ function makeWikiLogger(hooksDir, filename = "deeplake-wiki.log") { // dist/src/hooks/session-start.js var log3 = (msg) => log("session-start", msg); var __bundleDir = dirname2(fileURLToPath(import.meta.url)); -var AUTH_CMD = join6(__bundleDir, "commands", "auth-login.js"); +var AUTH_CMD = join7(__bundleDir, "commands", "auth-login.js"); var context = `DEEPLAKE MEMORY: You have TWO memory sources. ALWAYS check BOTH when the user asks you to recall, remember, or look up ANY information: 1. Your built-in memory (~/.claude/) \u2014 personal per-project notes @@ -430,7 +523,7 @@ LIMITS: Do NOT spawn subagents to read deeplake memory. If a file returns empty Debugging: Set HIVEMIND_DEBUG=1 to enable verbose logging to ~/.deeplake/hook-debug.log`; var HOME = homedir4(); -var { log: wikiLog } = makeWikiLogger(join6(HOME, ".claude", "hooks")); +var { log: wikiLog } = makeWikiLogger(join7(HOME, ".claude", "hooks")); async function createPlaceholder(api, table, sessionId, cwd, userName, orgName, workspaceId) { const summaryPath = `/summaries/${userName}/${sessionId}.md`; const existing = await api.query(`SELECT path FROM "${table}" WHERE path = '${sqlStr(summaryPath)}' LIMIT 1`); @@ -508,11 +601,11 @@ async function main() { const cmd = scopes.map((s) => `claude plugin update hivemind@hivemind --scope ${s} 2>/dev/null || true`).join("; "); execSync2(cmd, { stdio: "ignore", timeout: 6e4 }); try { - const cacheParent = join6(homedir4(), ".claude", "plugins", "cache", "hivemind", "hivemind"); + const cacheParent = join7(homedir4(), ".claude", "plugins", "cache", "hivemind", "hivemind"); const entries = readdirSync(cacheParent, { withFileTypes: true }); for (const e of entries) { if (e.isDirectory() && e.name !== latest) { - rmSync(join6(cacheParent, e.name), { recursive: true, force: true }); + rmSync(join7(cacheParent, e.name), { recursive: true, force: true }); log3(`cache cleanup: removed old version ${e.name}`); } } diff --git a/claude-code/bundle/shell/deeplake-shell.js b/claude-code/bundle/shell/deeplake-shell.js index 2d0b237..0793149 100755 --- a/claude-code/bundle/shell/deeplake-shell.js +++ b/claude-code/bundle/shell/deeplake-shell.js @@ -46081,14 +46081,14 @@ var require_turndown_cjs = __commonJS({ } else if (node.nodeType === 1) { replacement = replacementForNode.call(self2, node); } - return join6(output, replacement); + return join7(output, replacement); }, ""); } function postProcess(output) { var self2 = this; this.rules.forEach(function(rule) { if (typeof rule.append === "function") { - output = join6(output, rule.append(self2.options)); + output = join7(output, rule.append(self2.options)); } }); return output.replace(/^[\t\r\n]+/, "").replace(/[\t\r\n\s]+$/, ""); @@ -46100,7 +46100,7 @@ var require_turndown_cjs = __commonJS({ if (whitespace.leading || whitespace.trailing) content = content.trim(); return whitespace.leading + rule.replacement(content, node, this.options) + whitespace.trailing; } - function join6(output, replacement) { + function join7(output, replacement) { var s12 = trimTrailingNewlines(output); var s22 = trimLeadingNewlines(replacement); var nls = Math.max(output.length - s12.length, replacement.length - s22.length); @@ -66758,6 +66758,9 @@ function loadConfig() { // dist/src/deeplake-api.js import { randomUUID } from "node:crypto"; +import { existsSync as existsSync3, mkdirSync, readFileSync as readFileSync2, writeFileSync } from "node:fs"; +import { join as join6 } from "node:path"; +import { tmpdir } from "node:os"; // dist/src/utils/debug.js import { appendFileSync } from "node:fs"; @@ -66782,27 +66785,48 @@ function sqlLike(value) { // dist/src/deeplake-api.js var log2 = (msg) => log("sdk", msg); -var TRACE_SQL = (process.env.HIVEMIND_TRACE_SQL ?? process.env.DEEPLAKE_TRACE_SQL) === "1" || (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; -var DEBUG_FILE_LOG = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; function summarizeSql(sql, maxLen = 220) { const compact = sql.replace(/\s+/g, " ").trim(); return compact.length > maxLen ? `${compact.slice(0, maxLen)}...` : compact; } function traceSql(msg) { - if (!TRACE_SQL) + const traceEnabled = (process.env.HIVEMIND_TRACE_SQL ?? process.env.DEEPLAKE_TRACE_SQL) === "1" || (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; + if (!traceEnabled) return; process.stderr.write(`[deeplake-sql] ${msg} `); - if (DEBUG_FILE_LOG) + const debugFileLog = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; + if (debugFileLog) log2(msg); } var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; var MAX_CONCURRENCY = 5; +var QUERY_TIMEOUT_MS = Number(process.env["HIVEMIND_QUERY_TIMEOUT_MS"] ?? process.env["DEEPLAKE_QUERY_TIMEOUT_MS"] ?? 1e4); +var INDEX_MARKER_TTL_MS = Number(process.env["HIVEMIND_INDEX_MARKER_TTL_MS"] ?? 6 * 60 * 6e4); function sleep(ms3) { return new Promise((resolve5) => setTimeout(resolve5, ms3)); } +function isTimeoutError(error) { + const name = error instanceof Error ? error.name.toLowerCase() : ""; + const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase(); + return name.includes("timeout") || name === "aborterror" || message.includes("timeout") || message.includes("timed out"); +} +function isDuplicateIndexError(error) { + const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase(); + return message.includes("duplicate key value violates unique constraint") || message.includes("pg_class_relname_nsp_index") || message.includes("already exists"); +} +function isSessionInsertQuery(sql) { + return /^\s*insert\s+into\s+"[^"]+"\s*\(\s*id\s*,\s*path\s*,\s*filename\s*,\s*message\s*,/i.test(sql); +} +function isTransientHtml403(text) { + const body = text.toLowerCase(); + return body.includes(" Object.fromEntries(raw.columns.map((col, i11) => [col, row[i11]]))); } const text = await resp.text().catch(() => ""); - if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { + const retryable403 = isSessionInsertQuery(sql) && (resp.status === 401 || resp.status === 403 && (text.length === 0 || isTransientHtml403(text))); + if (attempt < MAX_RETRIES && (RETRYABLE_CODES.has(resp.status) || retryable403)) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`); await sleep(delay); @@ -66955,8 +66987,61 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + buildLookupIndexName(table, suffix) { + return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); + } + getLookupIndexMarkerPath(table, suffix) { + const markerKey = [ + this.workspaceId, + this.orgId, + table, + suffix + ].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_"); + return join6(getIndexMarkerDir(), `${markerKey}.json`); + } + hasFreshLookupIndexMarker(table, suffix) { + const markerPath = this.getLookupIndexMarkerPath(table, suffix); + if (!existsSync3(markerPath)) + return false; + try { + const raw = JSON.parse(readFileSync2(markerPath, "utf-8")); + const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN; + if (!Number.isFinite(updatedAt) || Date.now() - updatedAt > INDEX_MARKER_TTL_MS) + return false; + return true; + } catch { + return false; + } + } + markLookupIndexReady(table, suffix) { + mkdirSync(getIndexMarkerDir(), { recursive: true }); + writeFileSync(this.getLookupIndexMarkerPath(table, suffix), JSON.stringify({ updatedAt: (/* @__PURE__ */ new Date()).toISOString() }), "utf-8"); + } + async ensureLookupIndex(table, suffix, columnsSql) { + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + const indexName = this.buildLookupIndexName(table, suffix); + try { + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" ${columnsSql}`); + this.markLookupIndexReady(table, suffix); + } catch (e6) { + if (isDuplicateIndexError(e6)) { + this.markLookupIndexReady(table, suffix); + return; + } + log2(`index "${indexName}" skipped: ${e6.message}`); + } + } /** List all tables in the workspace (with retry). */ - async listTables() { + async listTables(forceRefresh = false) { + if (!forceRefresh && this._tablesCache) + return [...this._tablesCache]; + const { tables, cacheable } = await this._fetchTables(); + if (cacheable) + this._tablesCache = [...tables]; + return tables; + } + async _fetchTables() { for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { try { const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, { @@ -66967,22 +67052,25 @@ var DeeplakeApi = class { }); if (resp.ok) { const data = await resp.json(); - return (data.tables ?? []).map((t6) => t6.table_name); + return { + tables: (data.tables ?? []).map((t6) => t6.table_name), + cacheable: true + }; } if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { await sleep(BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200); continue; } - return []; + return { tables: [], cacheable: false }; } catch { if (attempt < MAX_RETRIES) { await sleep(BASE_DELAY_MS * Math.pow(2, attempt)); continue; } - return []; + return { tables: [], cacheable: false }; } } - return []; + return { tables: [], cacheable: false }; } /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */ async ensureTable(name) { @@ -66992,6 +67080,8 @@ var DeeplakeApi = class { log2(`table "${tbl}" not found, creating`); await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); + if (!tables.includes(tbl)) + this._tablesCache = [...tables, tbl]; } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ @@ -67001,673 +67091,1090 @@ var DeeplakeApi = class { log2(`table "${name}" not found, creating`); await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${name}" created`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; } + await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); } }; // dist/src/shell/deeplake-fs.js import { basename as basename4, posix } from "node:path"; import { randomUUID as randomUUID2 } from "node:crypto"; -var BATCH_SIZE = 10; -var FLUSH_DEBOUNCE_MS = 200; -function normPath(p22) { - const r10 = posix.normalize(p22.startsWith("/") ? p22 : "/" + p22); - return r10 === "/" ? r10 : r10.replace(/\/$/, ""); + +// dist/src/shell/grep-core.js +var TOOL_INPUT_FIELDS = [ + "command", + "file_path", + "path", + "pattern", + "prompt", + "subagent_type", + "query", + "url", + "notebook_path", + "old_string", + "new_string", + "content", + "skill", + "args", + "taskId", + "status", + "subject", + "description", + "to", + "message", + "summary", + "max_results" +]; +var TOOL_RESPONSE_DROP = /* @__PURE__ */ new Set([ + // Note: `stderr` is intentionally NOT in this set. The `stdout` high-signal + // branch below already de-dupes it for the common case (appends as suffix + // when non-empty). If a tool response has ONLY `stderr` and no `stdout` + // (hard-failure on some tools), the generic cleanup preserves it so the + // error message reaches Claude instead of collapsing to `[ok]`. + "interrupted", + "isImage", + "noOutputExpected", + "type", + "structuredPatch", + "userModified", + "originalFile", + "replaceAll", + "totalDurationMs", + "totalTokens", + "totalToolUseCount", + "usage", + "toolStats", + "durationMs", + "durationSeconds", + "bytes", + "code", + "codeText", + "agentId", + "agentType", + "verificationNudgeNeeded", + "numLines", + "numFiles", + "truncated", + "statusChange", + "updatedFields", + "isAgent", + "success" +]); +function maybeParseJson(v27) { + if (typeof v27 !== "string") + return v27; + const s10 = v27.trim(); + if (s10[0] !== "{" && s10[0] !== "[") + return v27; + try { + return JSON.parse(s10); + } catch { + return v27; + } } -function parentOf(p22) { - const i11 = p22.lastIndexOf("/"); - return i11 <= 0 ? "/" : p22.slice(0, i11); +function snakeCase(k17) { + return k17.replace(/([A-Z])/g, "_$1").toLowerCase(); } -function guessMime(filename) { - const ext2 = filename.split(".").pop()?.toLowerCase() ?? ""; - return { - json: "application/json", - md: "text/markdown", - txt: "text/plain", - js: "text/javascript", - ts: "text/typescript", - html: "text/html", - css: "text/css" - }[ext2] ?? "text/plain"; +function camelCase(k17) { + return k17.replace(/_([a-z])/g, (_16, c15) => c15.toUpperCase()); } -function fsErr(code, msg, path2) { - return Object.assign(new Error(`${code}: ${msg}, '${path2}'`), { code }); +function formatToolInput(raw) { + const p22 = maybeParseJson(raw); + if (typeof p22 !== "object" || p22 === null) + return String(p22 ?? ""); + const parts = []; + for (const k17 of TOOL_INPUT_FIELDS) { + if (p22[k17] === void 0) + continue; + const v27 = p22[k17]; + parts.push(`${k17}: ${typeof v27 === "string" ? v27 : JSON.stringify(v27)}`); + } + for (const k17 of ["glob", "output_mode", "limit", "offset"]) { + if (p22[k17] !== void 0) + parts.push(`${k17}: ${p22[k17]}`); + } + return parts.length ? parts.join("\n") : JSON.stringify(p22); } -var DeeplakeFs = class _DeeplakeFs { - client; - table; - mountPoint; - // path → Buffer (content) or null (exists but not fetched yet) - files = /* @__PURE__ */ new Map(); - meta = /* @__PURE__ */ new Map(); - // dir path → Set of immediate child names - dirs = /* @__PURE__ */ new Map(); - // batched writes pending SQL flush - pending = /* @__PURE__ */ new Map(); - // paths that have been flushed (INSERT) at least once — subsequent flushes use UPDATE - flushed = /* @__PURE__ */ new Set(); - /** Number of files loaded from the server during bootstrap. */ - get fileCount() { - return this.files.size; +function formatToolResponse(raw, inp, toolName) { + const r10 = maybeParseJson(raw); + if (typeof r10 !== "object" || r10 === null) + return String(r10 ?? ""); + if (toolName === "Edit" || toolName === "Write" || toolName === "MultiEdit") { + return r10.filePath ? `[wrote ${r10.filePath}]` : "[ok]"; } - flushTimer = null; - // serialize flushes - flushChain = Promise.resolve(); - // Paths that live in the sessions table (multi-row, read by concatenation) - sessionPaths = /* @__PURE__ */ new Set(); - sessionsTable = null; - constructor(client, table, mountPoint) { - this.client = client; - this.table = table; - this.mountPoint = mountPoint; - this.dirs.set(mountPoint, /* @__PURE__ */ new Set()); - if (mountPoint !== "/") - this.dirs.set("/", /* @__PURE__ */ new Set([mountPoint.slice(1)])); + if (typeof r10.stdout === "string") { + const stderr = r10.stderr; + return r10.stdout + (stderr ? ` +stderr: ${stderr}` : ""); } - static async create(client, table, mount = "/memory", sessionsTable) { - const fs3 = new _DeeplakeFs(client, table, mount); - fs3.sessionsTable = sessionsTable ?? null; - await client.ensureTable(); - let sessionSyncOk = true; - const memoryBootstrap = (async () => { - const sql = `SELECT path, size_bytes, mime_type FROM "${table}" ORDER BY path`; - try { - const rows = await client.query(sql); - for (const row of rows) { - const p22 = row["path"]; - fs3.files.set(p22, null); - fs3.meta.set(p22, { - size: Number(row["size_bytes"] ?? 0), - mime: row["mime_type"] ?? "application/octet-stream", - mtime: /* @__PURE__ */ new Date() - }); - fs3.addToTree(p22); - fs3.flushed.add(p22); - } - } catch { - } - })(); - const sessionsBootstrap = sessionsTable && sessionSyncOk ? (async () => { - try { - const sessionRows = await client.query(`SELECT path, SUM(size_bytes) as total_size FROM "${sessionsTable}" GROUP BY path ORDER BY path`); - for (const row of sessionRows) { - const p22 = row["path"]; - if (!fs3.files.has(p22)) { - fs3.files.set(p22, null); - fs3.meta.set(p22, { - size: Number(row["total_size"] ?? 0), - mime: "application/x-ndjson", - mtime: /* @__PURE__ */ new Date() - }); - fs3.addToTree(p22); - } - fs3.sessionPaths.add(p22); - } - } catch { - } - })() : Promise.resolve(); - await Promise.all([memoryBootstrap, sessionsBootstrap]); - return fs3; + if (typeof r10.content === "string") + return r10.content; + if (r10.file && typeof r10.file === "object") { + const f11 = r10.file; + if (typeof f11.content === "string") + return `[${f11.filePath ?? ""}] +${f11.content}`; + if (typeof f11.base64 === "string") + return `[binary ${f11.filePath ?? ""}: ${f11.base64.length} base64 chars]`; } - // ── tree management ─────────────────────────────────────────────────────── - addToTree(filePath) { - const segs = filePath.split("/").filter(Boolean); - for (let d15 = 0; d15 < segs.length; d15++) { - const dir = d15 === 0 ? "/" : "/" + segs.slice(0, d15).join("/"); - if (!this.dirs.has(dir)) - this.dirs.set(dir, /* @__PURE__ */ new Set()); - this.dirs.get(dir).add(segs[d15]); - } + if (Array.isArray(r10.filenames)) + return r10.filenames.join("\n"); + if (Array.isArray(r10.matches)) { + return r10.matches.map((m26) => typeof m26 === "string" ? m26 : JSON.stringify(m26)).join("\n"); } - removeFromTree(filePath) { - this.files.delete(filePath); - this.meta.delete(filePath); - this.pending.delete(filePath); - this.flushed.delete(filePath); - const parent = parentOf(filePath); - this.dirs.get(parent)?.delete(basename4(filePath)); + if (Array.isArray(r10.results)) { + return r10.results.map((x28) => typeof x28 === "string" ? x28 : x28?.title ?? x28?.url ?? JSON.stringify(x28)).join("\n"); } - // ── flush / write batching ──────────────────────────────────────────────── - scheduleFlush() { - if (this.flushTimer !== null) - return; - this.flushTimer = setTimeout(() => { - this.flush().catch(() => { - }); - }, FLUSH_DEBOUNCE_MS); + const inpObj = maybeParseJson(inp); + const kept = {}; + for (const [k17, v27] of Object.entries(r10)) { + if (TOOL_RESPONSE_DROP.has(k17)) + continue; + if (v27 === "" || v27 === false || v27 == null) + continue; + if (typeof inpObj === "object" && inpObj) { + const inObj = inpObj; + if (k17 in inObj && JSON.stringify(inObj[k17]) === JSON.stringify(v27)) + continue; + const snake = snakeCase(k17); + if (snake in inObj && JSON.stringify(inObj[snake]) === JSON.stringify(v27)) + continue; + const camel = camelCase(k17); + if (camel in inObj && JSON.stringify(inObj[camel]) === JSON.stringify(v27)) + continue; + } + kept[k17] = v27; } - async flush() { - this.flushChain = this.flushChain.then(() => this._doFlush()); - return this.flushChain; + return Object.keys(kept).length ? JSON.stringify(kept) : "[ok]"; +} +function formatToolCall(obj) { + return `[tool:${obj?.tool_name ?? "?"}] +input: ${formatToolInput(obj?.tool_input)} +response: ${formatToolResponse(obj?.tool_response, obj?.tool_input, obj?.tool_name)}`; +} +function normalizeContent(path2, raw) { + if (!path2.includes("/sessions/")) + return raw; + if (!raw || raw[0] !== "{") + return raw; + let obj; + try { + obj = JSON.parse(raw); + } catch { + return raw; } - async _doFlush() { - if (this.pending.size === 0) - return; - if (this.flushTimer !== null) { - clearTimeout(this.flushTimer); - this.flushTimer = null; - } - const rows = [...this.pending.values()]; - this.pending.clear(); - const results = await Promise.allSettled(rows.map((r10) => this.upsertRow(r10))); - let failures = 0; - for (let i11 = 0; i11 < results.length; i11++) { - if (results[i11].status === "rejected") { - if (!this.pending.has(rows[i11].path)) { - this.pending.set(rows[i11].path, rows[i11]); - } - failures++; - } - } - if (failures > 0) { - throw new Error(`flush: ${failures}/${rows.length} writes failed and were re-queued`); + if (Array.isArray(obj.turns)) { + const header = []; + if (obj.date_time) + header.push(`date: ${obj.date_time}`); + if (obj.speakers) { + const s10 = obj.speakers; + const names = [s10.speaker_a, s10.speaker_b].filter(Boolean).join(", "); + if (names) + header.push(`speakers: ${names}`); } + const lines = obj.turns.map((t6) => { + const sp = String(t6?.speaker ?? t6?.name ?? "?").trim(); + const tx = String(t6?.text ?? t6?.content ?? "").replace(/\s+/g, " ").trim(); + const tag = t6?.dia_id ? `[${t6.dia_id}] ` : ""; + return `${tag}${sp}: ${tx}`; + }); + const out2 = [...header, ...lines].join("\n"); + return out2.trim() ? out2 : raw; } - async upsertRow(r10) { - const text = sqlStr(r10.contentText); - const p22 = sqlStr(r10.path); - const fname = sqlStr(r10.filename); - const mime = sqlStr(r10.mimeType); - const ts3 = (/* @__PURE__ */ new Date()).toISOString(); - const cd = r10.creationDate ?? ts3; - const lud = r10.lastUpdateDate ?? ts3; - if (this.flushed.has(r10.path)) { - let setClauses = `filename = '${fname}', summary = E'${text}', mime_type = '${mime}', size_bytes = ${r10.sizeBytes}, last_update_date = '${sqlStr(lud)}'`; - if (r10.project !== void 0) - setClauses += `, project = '${sqlStr(r10.project)}'`; - if (r10.description !== void 0) - setClauses += `, description = '${sqlStr(r10.description)}'`; - await this.client.query(`UPDATE "${this.table}" SET ${setClauses} WHERE path = '${p22}'`); - } else { - const id = randomUUID2(); - const cols = "id, path, filename, summary, mime_type, size_bytes, creation_date, last_update_date" + (r10.project !== void 0 ? ", project" : "") + (r10.description !== void 0 ? ", description" : ""); - const vals = `'${id}', '${p22}', '${fname}', E'${text}', '${mime}', ${r10.sizeBytes}, '${sqlStr(cd)}', '${sqlStr(lud)}'` + (r10.project !== void 0 ? `, '${sqlStr(r10.project)}'` : "") + (r10.description !== void 0 ? `, '${sqlStr(r10.description)}'` : ""); - await this.client.query(`INSERT INTO "${this.table}" (${cols}) VALUES (${vals})`); - this.flushed.add(r10.path); - } + const stripRecalled = (t6) => { + const i11 = t6.indexOf(""); + if (i11 === -1) + return t6; + const j14 = t6.lastIndexOf(""); + if (j14 === -1 || j14 < i11) + return t6; + const head = t6.slice(0, i11); + const tail = t6.slice(j14 + "".length); + return (head + tail).replace(/^\s+/, "").replace(/\n{3,}/g, "\n\n"); + }; + let out = null; + if (obj.type === "user_message") { + out = `[user] ${stripRecalled(String(obj.content ?? ""))}`; + } else if (obj.type === "assistant_message") { + const agent = obj.agent_type ? ` (agent=${obj.agent_type})` : ""; + out = `[assistant${agent}] ${stripRecalled(String(obj.content ?? ""))}`; + } else if (obj.type === "tool_call") { + out = formatToolCall(obj); } - // ── Virtual index.md generation ──────────────────────────────────────────── - async generateVirtualIndex() { - const rows = await this.client.query(`SELECT path, project, description, creation_date, last_update_date FROM "${this.table}" WHERE path LIKE '${sqlStr("/summaries/")}%' ORDER BY last_update_date DESC`); - const sessionPathsByKey = /* @__PURE__ */ new Map(); - for (const sp of this.sessionPaths) { - const hivemind = sp.match(/\/sessions\/[^/]+\/[^/]+_([^.]+)\.jsonl$/); - if (hivemind) { - sessionPathsByKey.set(hivemind[1], sp.slice(1)); - } else { - const fname = sp.split("/").pop() ?? ""; - const stem = fname.replace(/\.[^.]+$/, ""); - if (stem) - sessionPathsByKey.set(stem, sp.slice(1)); - } + if (out === null) + return raw; + const trimmed = out.trim(); + if (!trimmed || trimmed === "[user]" || trimmed === "[assistant]" || /^\[tool:[^\]]*\]\s+input:\s+\{\}\s+response:\s+\{\}$/.test(trimmed)) + return raw; + return out; +} +function buildPathCondition(targetPath) { + if (!targetPath || targetPath === "/") + return ""; + const clean = targetPath.replace(/\/+$/, ""); + if (/[*?]/.test(clean)) { + const likePattern = sqlLike(clean).replace(/\*/g, "%").replace(/\?/g, "_"); + return `path LIKE '${likePattern}' ESCAPE '\\'`; + } + const base = clean.split("/").pop() ?? ""; + if (base.includes(".")) { + return `path = '${sqlStr(clean)}'`; + } + return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%' ESCAPE '\\')`; +} +async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { + const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns } = opts; + const limit = opts.limit ?? 100; + const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : [escapedPattern]; + const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns); + const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns); + const memQuery = `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; + const sessQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; + const rows = await api.query(`SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`); + return rows.map((row) => ({ + path: String(row["path"]), + content: String(row["content"] ?? "") + })); +} +function buildPathFilter(targetPath) { + const condition = buildPathCondition(targetPath); + return condition ? ` AND ${condition}` : ""; +} +function buildPathFilterForTargets(targetPaths) { + if (targetPaths.some((targetPath) => !targetPath || targetPath === "/")) + return ""; + const conditions = [...new Set(targetPaths.map((targetPath) => buildPathCondition(targetPath)).filter((condition) => condition.length > 0))]; + if (conditions.length === 0) + return ""; + if (conditions.length === 1) + return ` AND ${conditions[0]}`; + return ` AND (${conditions.join(" OR ")})`; +} +function extractRegexLiteralPrefilter(pattern) { + if (!pattern) + return null; + const parts = []; + let current = ""; + for (let i11 = 0; i11 < pattern.length; i11++) { + const ch = pattern[i11]; + if (ch === "\\") { + const next = pattern[i11 + 1]; + if (!next) + return null; + if (/[dDsSwWbBAZzGkKpP]/.test(next)) + return null; + current += next; + i11++; + continue; } - const lines = [ - "# Session Index", - "", - "List of all Claude Code sessions with summaries.", - "", - "| Session | Conversation | Created | Last Updated | Project | Description |", - "|---------|-------------|---------|--------------|---------|-------------|" - ]; - for (const row of rows) { - const p22 = row["path"]; - const match2 = p22.match(/\/summaries\/([^/]+)\/([^/]+)\.md$/); - if (!match2) + if (ch === ".") { + if (pattern[i11 + 1] === "*") { + if (current) + parts.push(current); + current = ""; + i11++; continue; - const summaryUser = match2[1]; - const sessionId = match2[2]; - const relPath = `summaries/${summaryUser}/${sessionId}.md`; - const baseName = sessionId.replace(/_summary$/, ""); - const convPath = sessionPathsByKey.get(sessionId) ?? sessionPathsByKey.get(baseName); - const convLink = convPath ? `[messages](${convPath})` : ""; - const project = row["project"] || ""; - const description = row["description"] || ""; - const creationDate = row["creation_date"] || ""; - const lastUpdateDate = row["last_update_date"] || ""; - lines.push(`| [${sessionId}](${relPath}) | ${convLink} | ${creationDate} | ${lastUpdateDate} | ${project} | ${description} |`); + } + return null; } - lines.push(""); - return lines.join("\n"); + if ("|()[]{}+?^$".includes(ch) || ch === "*") + return null; + current += ch; } - // ── batch prefetch ──────────────────────────────────────────────────────── - /** - * Prefetch multiple files into the content cache with a single SQL query. - * Skips paths that are already cached, pending, or session-backed. - * After this call, subsequent readFile() calls for these paths hit cache. - */ - async prefetch(paths) { - const uncached = []; - for (const raw of paths) { - const p22 = normPath(raw); - if (this.files.get(p22) !== null && this.files.get(p22) !== void 0) - continue; - if (this.pending.has(p22)) - continue; - if (this.sessionPaths.has(p22)) - continue; - if (!this.files.has(p22)) - continue; - uncached.push(p22); - } - if (uncached.length === 0) - return; - const inList = uncached.map((p22) => `'${sqlStr(p22)}'`).join(", "); - const rows = await this.client.query(`SELECT path, summary FROM "${this.table}" WHERE path IN (${inList})`); - for (const row of rows) { - const p22 = row["path"]; - const text = row["summary"] ?? ""; - this.files.set(p22, Buffer.from(text, "utf-8")); + if (current) + parts.push(current); + const literal = parts.reduce((best, part) => part.length > best.length ? part : best, ""); + return literal.length >= 2 ? literal : null; +} +function extractRegexAlternationPrefilters(pattern) { + if (!pattern.includes("|")) + return null; + const parts = []; + let current = ""; + let escaped = false; + for (let i11 = 0; i11 < pattern.length; i11++) { + const ch = pattern[i11]; + if (escaped) { + current += `\\${ch}`; + escaped = false; + continue; } - } - // ── IFileSystem: reads ──────────────────────────────────────────────────── - async readFileBuffer(path2) { - const p22 = normPath(path2); - if (this.dirs.has(p22) && !this.files.has(p22)) - throw fsErr("EISDIR", "illegal operation on a directory", p22); - if (!this.files.has(p22)) - throw fsErr("ENOENT", "no such file or directory", p22); - const cached = this.files.get(p22); - if (cached !== null && cached !== void 0) - return cached; - const pend = this.pending.get(p22); - if (pend) { - const buf2 = Buffer.from(pend.contentText, "utf-8"); - this.files.set(p22, buf2); - return buf2; + if (ch === "\\") { + escaped = true; + continue; } - if (this.sessionPaths.has(p22) && this.sessionsTable) { - const rows2 = await this.client.query(`SELECT message FROM "${this.sessionsTable}" WHERE path = '${sqlStr(p22)}' ORDER BY creation_date ASC`); - if (rows2.length === 0) - throw fsErr("ENOENT", "no such file or directory", p22); - const text = rows2.map((r10) => typeof r10["message"] === "string" ? r10["message"] : JSON.stringify(r10["message"])).join("\n"); - const buf2 = Buffer.from(text, "utf-8"); - this.files.set(p22, buf2); - return buf2; + if (ch === "|") { + if (!current) + return null; + parts.push(current); + current = ""; + continue; } - const rows = await this.client.query(`SELECT summary FROM "${this.table}" WHERE path = '${sqlStr(p22)}' LIMIT 1`); - if (rows.length === 0) - throw fsErr("ENOENT", "no such file or directory", p22); - const buf = Buffer.from(rows[0]["summary"] ?? "", "utf-8"); - this.files.set(p22, buf); - return buf; + if ("()[]{}^$".includes(ch)) + return null; + current += ch; } - async readFile(path2, _opts) { - const p22 = normPath(path2); - if (this.dirs.has(p22) && !this.files.has(p22)) - throw fsErr("EISDIR", "illegal operation on a directory", p22); - if (p22 === "/index.md" && !this.files.has(p22)) { - const realRows = await this.client.query(`SELECT summary FROM "${this.table}" WHERE path = '${sqlStr("/index.md")}' LIMIT 1`); - if (realRows.length > 0 && realRows[0]["summary"]) { - const text2 = realRows[0]["summary"]; - const buf2 = Buffer.from(text2, "utf-8"); - this.files.set(p22, buf2); - return text2; + if (escaped || !current) + return null; + parts.push(current); + const literals = [...new Set(parts.map((part) => extractRegexLiteralPrefilter(part)).filter((part) => typeof part === "string" && part.length >= 2))]; + return literals.length > 0 ? literals : null; +} +function buildGrepSearchOptions(params, targetPath) { + const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(params.pattern); + const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(params.pattern) : null; + const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(params.pattern) : null; + return { + pathFilter: buildPathFilter(targetPath), + contentScanOnly: hasRegexMeta, + likeOp: params.ignoreCase ? "ILIKE" : "LIKE", + escapedPattern: sqlLike(params.pattern), + prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : void 0, + prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)) + }; +} +function buildContentFilter(column, likeOp, patterns) { + if (patterns.length === 0) + return ""; + if (patterns.length === 1) + return ` AND ${column} ${likeOp} '%${patterns[0]}%'`; + return ` AND (${patterns.map((pattern) => `${column} ${likeOp} '%${pattern}%'`).join(" OR ")})`; +} +function compileGrepRegex(params) { + let reStr = params.fixedString ? params.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") : params.pattern; + if (params.wordMatch) + reStr = `\\b${reStr}\\b`; + try { + return new RegExp(reStr, params.ignoreCase ? "i" : ""); + } catch { + return new RegExp(params.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), params.ignoreCase ? "i" : ""); + } +} +function refineGrepMatches(rows, params, forceMultiFilePrefix) { + const re9 = compileGrepRegex(params); + const multi = forceMultiFilePrefix ?? rows.length > 1; + const output = []; + for (const row of rows) { + if (!row.content) + continue; + const lines = row.content.split("\n"); + const matched = []; + for (let i11 = 0; i11 < lines.length; i11++) { + const hit = re9.test(lines[i11]); + if (hit !== !!params.invertMatch) { + if (params.filesOnly) { + output.push(row.path); + break; + } + const prefix = multi ? `${row.path}:` : ""; + const ln3 = params.lineNumber ? `${i11 + 1}:` : ""; + matched.push(`${prefix}${ln3}${lines[i11]}`); } - return this.generateVirtualIndex(); } - if (!this.files.has(p22)) - throw fsErr("ENOENT", "no such file or directory", p22); - const cached = this.files.get(p22); - if (cached !== null && cached !== void 0) - return cached.toString("utf-8"); - const pend = this.pending.get(p22); - if (pend) - return pend.contentText; - if (this.sessionPaths.has(p22) && this.sessionsTable) { - const rows2 = await this.client.query(`SELECT message FROM "${this.sessionsTable}" WHERE path = '${sqlStr(p22)}' ORDER BY creation_date ASC`); - if (rows2.length === 0) - throw fsErr("ENOENT", "no such file or directory", p22); - const text2 = rows2.map((r10) => typeof r10["message"] === "string" ? r10["message"] : JSON.stringify(r10["message"])).join("\n"); - const buf2 = Buffer.from(text2, "utf-8"); - this.files.set(p22, buf2); - return text2; + if (!params.filesOnly) { + if (params.countOnly) { + output.push(`${multi ? `${row.path}:` : ""}${matched.length}`); + } else { + output.push(...matched); + } } - const rows = await this.client.query(`SELECT summary FROM "${this.table}" WHERE path = '${sqlStr(p22)}' LIMIT 1`); - if (rows.length === 0) - throw fsErr("ENOENT", "no such file or directory", p22); - const text = rows[0]["summary"] ?? ""; - const buf = Buffer.from(text, "utf-8"); - this.files.set(p22, buf); - return text; } - // ── IFileSystem: writes ─────────────────────────────────────────────────── - /** Write a file with optional row-level metadata (project, description, dates). */ - async writeFileWithMeta(path2, content, meta) { - const p22 = normPath(path2); - if (this.sessionPaths.has(p22)) - throw fsErr("EPERM", "session files are read-only", p22); - if (this.dirs.has(p22) && !this.files.has(p22)) - throw fsErr("EISDIR", "illegal operation on a directory", p22); - const text = typeof content === "string" ? content : Buffer.from(content).toString("utf-8"); - const buf = Buffer.from(text, "utf-8"); - const mime = guessMime(basename4(p22)); - this.files.set(p22, buf); - this.meta.set(p22, { size: buf.length, mime, mtime: /* @__PURE__ */ new Date() }); - this.addToTree(p22); - this.pending.set(p22, { - path: p22, - filename: basename4(p22), - contentText: text, - mimeType: mime, - sizeBytes: buf.length, - ...meta - }); - if (this.pending.size >= BATCH_SIZE) - await this.flush(); - else - this.scheduleFlush(); + return output; +} + +// dist/src/shell/deeplake-fs.js +var BATCH_SIZE = 10; +var PREFETCH_BATCH_SIZE = 50; +var FLUSH_DEBOUNCE_MS = 200; +function normPath(p22) { + const r10 = posix.normalize(p22.startsWith("/") ? p22 : "/" + p22); + return r10 === "/" ? r10 : r10.replace(/\/$/, ""); +} +function parentOf(p22) { + const i11 = p22.lastIndexOf("/"); + return i11 <= 0 ? "/" : p22.slice(0, i11); +} +function guessMime(filename) { + const ext2 = filename.split(".").pop()?.toLowerCase() ?? ""; + return { + json: "application/json", + md: "text/markdown", + txt: "text/plain", + js: "text/javascript", + ts: "text/typescript", + html: "text/html", + css: "text/css" + }[ext2] ?? "text/plain"; +} +function normalizeSessionMessage(path2, message) { + const raw = typeof message === "string" ? message : JSON.stringify(message); + return normalizeContent(path2, raw); +} +function joinSessionMessages(path2, messages) { + return messages.map((message) => normalizeSessionMessage(path2, message)).join("\n"); +} +function fsErr(code, msg, path2) { + return Object.assign(new Error(`${code}: ${msg}, '${path2}'`), { code }); +} +var DeeplakeFs = class _DeeplakeFs { + client; + table; + mountPoint; + // path → Buffer (content) or null (exists but not fetched yet) + files = /* @__PURE__ */ new Map(); + meta = /* @__PURE__ */ new Map(); + // dir path → Set of immediate child names + dirs = /* @__PURE__ */ new Map(); + // batched writes pending SQL flush + pending = /* @__PURE__ */ new Map(); + // paths that have been flushed (INSERT) at least once — subsequent flushes use UPDATE + flushed = /* @__PURE__ */ new Set(); + /** Number of files loaded from the server during bootstrap. */ + get fileCount() { + return this.files.size; } - async writeFile(path2, content, _opts) { - const p22 = normPath(path2); - if (this.sessionPaths.has(p22)) - throw fsErr("EPERM", "session files are read-only", p22); - if (this.dirs.has(p22) && !this.files.has(p22)) - throw fsErr("EISDIR", "illegal operation on a directory", p22); - const text = typeof content === "string" ? content : Buffer.from(content).toString("utf-8"); - const buf = Buffer.from(text, "utf-8"); - const mime = guessMime(basename4(p22)); - this.files.set(p22, buf); - this.meta.set(p22, { size: buf.length, mime, mtime: /* @__PURE__ */ new Date() }); - this.addToTree(p22); - this.pending.set(p22, { - path: p22, - filename: basename4(p22), - contentText: text, - mimeType: mime, - sizeBytes: buf.length - }); - if (this.pending.size >= BATCH_SIZE) - await this.flush(); - else - this.scheduleFlush(); + flushTimer = null; + // serialize flushes + flushChain = Promise.resolve(); + // Paths that live in the sessions table (multi-row, read by concatenation) + sessionPaths = /* @__PURE__ */ new Set(); + sessionsTable = null; + constructor(client, table, mountPoint) { + this.client = client; + this.table = table; + this.mountPoint = mountPoint; + this.dirs.set(mountPoint, /* @__PURE__ */ new Set()); + if (mountPoint !== "/") + this.dirs.set("/", /* @__PURE__ */ new Set([mountPoint.slice(1)])); } - async appendFile(path2, content, opts) { - const p22 = normPath(path2); - const add = typeof content === "string" ? content : Buffer.from(content).toString("utf-8"); - if (this.sessionPaths.has(p22)) - throw fsErr("EPERM", "session files are read-only", p22); - if (this.files.has(p22) || await this.exists(p22).catch(() => false)) { - const ts3 = (/* @__PURE__ */ new Date()).toISOString(); - await this.client.query(`UPDATE "${this.table}" SET summary = summary || E'${sqlStr(add)}', size_bytes = size_bytes + ${Buffer.byteLength(add, "utf-8")}, last_update_date = '${ts3}' WHERE path = '${sqlStr(p22)}'`); - this.files.set(p22, null); - const m26 = this.meta.get(p22); - if (m26) { - m26.size += Buffer.byteLength(add, "utf-8"); - m26.mtime = new Date(ts3); + static async create(client, table, mount = "/memory", sessionsTable) { + const fs3 = new _DeeplakeFs(client, table, mount); + fs3.sessionsTable = sessionsTable ?? null; + await client.ensureTable(); + let sessionSyncOk = true; + const memoryBootstrap = (async () => { + const sql = `SELECT path, size_bytes, mime_type FROM "${table}" ORDER BY path`; + try { + const rows = await client.query(sql); + for (const row of rows) { + const p22 = row["path"]; + fs3.files.set(p22, null); + fs3.meta.set(p22, { + size: Number(row["size_bytes"] ?? 0), + mime: row["mime_type"] ?? "application/octet-stream", + mtime: /* @__PURE__ */ new Date() + }); + fs3.addToTree(p22); + fs3.flushed.add(p22); + } + } catch { } - } else { - await this.writeFile(p22, content, opts); - await this.flush(); - } + })(); + const sessionsBootstrap = sessionsTable && sessionSyncOk ? (async () => { + try { + const sessionRows = await client.query(`SELECT path, SUM(size_bytes) as total_size FROM "${sessionsTable}" GROUP BY path ORDER BY path`); + for (const row of sessionRows) { + const p22 = row["path"]; + if (!fs3.files.has(p22)) { + fs3.files.set(p22, null); + fs3.meta.set(p22, { + size: Number(row["total_size"] ?? 0), + mime: "application/x-ndjson", + mtime: /* @__PURE__ */ new Date() + }); + fs3.addToTree(p22); + } + fs3.sessionPaths.add(p22); + } + } catch { + } + })() : Promise.resolve(); + await Promise.all([memoryBootstrap, sessionsBootstrap]); + return fs3; } - // ── IFileSystem: metadata ───────────────────────────────────────────────── - async exists(path2) { - const p22 = normPath(path2); - if (p22 === "/index.md") - return true; - return this.files.has(p22) || this.dirs.has(p22); + // ── tree management ─────────────────────────────────────────────────────── + addToTree(filePath) { + const segs = filePath.split("/").filter(Boolean); + for (let d15 = 0; d15 < segs.length; d15++) { + const dir = d15 === 0 ? "/" : "/" + segs.slice(0, d15).join("/"); + if (!this.dirs.has(dir)) + this.dirs.set(dir, /* @__PURE__ */ new Set()); + this.dirs.get(dir).add(segs[d15]); + } } - async stat(path2) { - const p22 = normPath(path2); - const isFile = this.files.has(p22); - const isDir = this.dirs.has(p22); - if (p22 === "/index.md" && !isFile && !isDir) { - return { - isFile: true, - isDirectory: false, - isSymbolicLink: false, - mode: 420, - size: 0, - mtime: /* @__PURE__ */ new Date() - }; - } - if (!isFile && !isDir) - throw fsErr("ENOENT", "no such file or directory", p22); - const m26 = this.meta.get(p22); - return { - isFile: isFile && !isDir, - isDirectory: isDir, - isSymbolicLink: false, - mode: isDir ? 493 : 420, - size: m26?.size ?? 0, - mtime: m26?.mtime ?? /* @__PURE__ */ new Date() - }; - } - async lstat(path2) { - return this.stat(path2); - } - async chmod(_path, _mode) { - } - async utimes(_path, _atime, _mtime) { - } - async symlink(_target, linkPath) { - throw fsErr("EPERM", "operation not permitted", linkPath); - } - async link(_src, destPath) { - throw fsErr("EPERM", "operation not permitted", destPath); + removeFromTree(filePath) { + this.files.delete(filePath); + this.meta.delete(filePath); + this.pending.delete(filePath); + this.flushed.delete(filePath); + const parent = parentOf(filePath); + this.dirs.get(parent)?.delete(basename4(filePath)); } - async readlink(path2) { - throw fsErr("EINVAL", "invalid argument", path2); + // ── flush / write batching ──────────────────────────────────────────────── + scheduleFlush() { + if (this.flushTimer !== null) + return; + this.flushTimer = setTimeout(() => { + this.flush().catch(() => { + }); + }, FLUSH_DEBOUNCE_MS); } - async realpath(path2) { - const p22 = normPath(path2); - if (p22 === "/index.md") - return p22; - if (!this.files.has(p22) && !this.dirs.has(p22)) - throw fsErr("ENOENT", "no such file or directory", p22); - return p22; + async flush() { + this.flushChain = this.flushChain.then(() => this._doFlush()); + return this.flushChain; } - // ── IFileSystem: directories ────────────────────────────────────────────── - async mkdir(path2, opts) { - const p22 = normPath(path2); - if (this.files.has(p22)) - throw fsErr("EEXIST", "file exists", p22); - if (this.dirs.has(p22)) { - if (!opts?.recursive) - throw fsErr("EEXIST", "file exists", p22); + async _doFlush() { + if (this.pending.size === 0) return; + if (this.flushTimer !== null) { + clearTimeout(this.flushTimer); + this.flushTimer = null; } - if (!opts?.recursive) { - const parent2 = parentOf(p22); - if (!this.dirs.has(parent2)) - throw fsErr("ENOENT", "no such file or directory", parent2); + const rows = [...this.pending.values()]; + this.pending.clear(); + const results = await Promise.allSettled(rows.map((r10) => this.upsertRow(r10))); + let failures = 0; + for (let i11 = 0; i11 < results.length; i11++) { + if (results[i11].status === "rejected") { + if (!this.pending.has(rows[i11].path)) { + this.pending.set(rows[i11].path, rows[i11]); + } + failures++; + } } - this.dirs.set(p22, /* @__PURE__ */ new Set()); - const parent = parentOf(p22); - if (!this.dirs.has(parent)) - this.dirs.set(parent, /* @__PURE__ */ new Set()); - this.dirs.get(parent).add(basename4(p22)); - } - async readdir(path2) { - const p22 = normPath(path2); - if (!this.dirs.has(p22)) - throw fsErr("ENOTDIR", "not a directory", p22); - const entries = [...this.dirs.get(p22) ?? []]; - if (p22 === "/" && !entries.includes("index.md")) { - entries.push("index.md"); + if (failures > 0) { + throw new Error(`flush: ${failures}/${rows.length} writes failed and were re-queued`); } - return entries; } - async readdirWithFileTypes(path2) { - const names = await this.readdir(path2); - const p22 = normPath(path2); - return names.map((name) => { - const child = p22 === "/" ? `/${name}` : `${p22}/${name}`; - return { - name, - isFile: (this.files.has(child) || child === "/index.md") && !this.dirs.has(child), - isDirectory: this.dirs.has(child), - isSymbolicLink: false - }; - }); + async upsertRow(r10) { + const text = sqlStr(r10.contentText); + const p22 = sqlStr(r10.path); + const fname = sqlStr(r10.filename); + const mime = sqlStr(r10.mimeType); + const ts3 = (/* @__PURE__ */ new Date()).toISOString(); + const cd = r10.creationDate ?? ts3; + const lud = r10.lastUpdateDate ?? ts3; + if (this.flushed.has(r10.path)) { + let setClauses = `filename = '${fname}', summary = E'${text}', mime_type = '${mime}', size_bytes = ${r10.sizeBytes}, last_update_date = '${sqlStr(lud)}'`; + if (r10.project !== void 0) + setClauses += `, project = '${sqlStr(r10.project)}'`; + if (r10.description !== void 0) + setClauses += `, description = '${sqlStr(r10.description)}'`; + await this.client.query(`UPDATE "${this.table}" SET ${setClauses} WHERE path = '${p22}'`); + } else { + const id = randomUUID2(); + const cols = "id, path, filename, summary, mime_type, size_bytes, creation_date, last_update_date" + (r10.project !== void 0 ? ", project" : "") + (r10.description !== void 0 ? ", description" : ""); + const vals = `'${id}', '${p22}', '${fname}', E'${text}', '${mime}', ${r10.sizeBytes}, '${sqlStr(cd)}', '${sqlStr(lud)}'` + (r10.project !== void 0 ? `, '${sqlStr(r10.project)}'` : "") + (r10.description !== void 0 ? `, '${sqlStr(r10.description)}'` : ""); + await this.client.query(`INSERT INTO "${this.table}" (${cols}) VALUES (${vals})`); + this.flushed.add(r10.path); + } } - // ── IFileSystem: structural mutations ───────────────────────────────────── - async rm(path2, opts) { - const p22 = normPath(path2); - if (this.sessionPaths.has(p22)) - throw fsErr("EPERM", "session files are read-only", p22); - if (!this.files.has(p22) && !this.dirs.has(p22)) { - if (opts?.force) - return; - throw fsErr("ENOENT", "no such file or directory", p22); + // ── Virtual index.md generation ──────────────────────────────────────────── + async generateVirtualIndex() { + const rows = await this.client.query(`SELECT path, project, description, creation_date, last_update_date FROM "${this.table}" WHERE path LIKE '${sqlStr("/summaries/")}%' ORDER BY last_update_date DESC`); + const sessionPathsByKey = /* @__PURE__ */ new Map(); + for (const sp of this.sessionPaths) { + const hivemind = sp.match(/\/sessions\/[^/]+\/[^/]+_([^.]+)\.jsonl$/); + if (hivemind) { + sessionPathsByKey.set(hivemind[1], sp.slice(1)); + } else { + const fname = sp.split("/").pop() ?? ""; + const stem = fname.replace(/\.[^.]+$/, ""); + if (stem) + sessionPathsByKey.set(stem, sp.slice(1)); + } } - if (this.dirs.has(p22)) { - const children = this.dirs.get(p22) ?? /* @__PURE__ */ new Set(); - if (children.size > 0 && !opts?.recursive) - throw fsErr("ENOTEMPTY", "directory not empty", p22); - const toDelete = []; - const stack = [p22]; - while (stack.length) { - const cur = stack.pop(); - for (const child of [...this.dirs.get(cur) ?? []]) { - const childPath = cur === "/" ? `/${child}` : `${cur}/${child}`; - if (this.files.has(childPath)) - toDelete.push(childPath); - if (this.dirs.has(childPath)) - stack.push(childPath); - } + const lines = [ + "# Session Index", + "", + "List of all Claude Code sessions with summaries.", + "", + "| Session | Conversation | Created | Last Updated | Project | Description |", + "|---------|-------------|---------|--------------|---------|-------------|" + ]; + for (const row of rows) { + const p22 = row["path"]; + const match2 = p22.match(/\/summaries\/([^/]+)\/([^/]+)\.md$/); + if (!match2) + continue; + const summaryUser = match2[1]; + const sessionId = match2[2]; + const relPath = `summaries/${summaryUser}/${sessionId}.md`; + const baseName = sessionId.replace(/_summary$/, ""); + const convPath = sessionPathsByKey.get(sessionId) ?? sessionPathsByKey.get(baseName); + const convLink = convPath ? `[messages](${convPath})` : ""; + const project = row["project"] || ""; + const description = row["description"] || ""; + const creationDate = row["creation_date"] || ""; + const lastUpdateDate = row["last_update_date"] || ""; + lines.push(`| [${sessionId}](${relPath}) | ${convLink} | ${creationDate} | ${lastUpdateDate} | ${project} | ${description} |`); + } + lines.push(""); + return lines.join("\n"); + } + // ── batch prefetch ──────────────────────────────────────────────────────── + /** + * Prefetch multiple files into the content cache with a single SQL query. + * Skips paths that are already cached, pending, or session-backed. + * After this call, subsequent readFile() calls for these paths hit cache. + */ + async prefetch(paths) { + const uncached = []; + const uncachedSessions = []; + for (const raw of paths) { + const p22 = normPath(raw); + if (this.files.get(p22) !== null && this.files.get(p22) !== void 0) + continue; + if (this.pending.has(p22)) + continue; + if (!this.files.has(p22)) + continue; + if (this.sessionPaths.has(p22)) { + uncachedSessions.push(p22); + } else { + uncached.push(p22); } - const safeToDelete = toDelete.filter((fp) => !this.sessionPaths.has(fp)); - for (const fp of safeToDelete) - this.removeFromTree(fp); - this.dirs.delete(p22); - this.dirs.get(parentOf(p22))?.delete(basename4(p22)); - if (safeToDelete.length > 0) { - const inList = safeToDelete.map((fp) => `'${sqlStr(fp)}'`).join(", "); - await this.client.query(`DELETE FROM "${this.table}" WHERE path IN (${inList})`); + } + for (let i11 = 0; i11 < uncached.length; i11 += PREFETCH_BATCH_SIZE) { + const chunk = uncached.slice(i11, i11 + PREFETCH_BATCH_SIZE); + const inList = chunk.map((p22) => `'${sqlStr(p22)}'`).join(", "); + const rows = await this.client.query(`SELECT path, summary FROM "${this.table}" WHERE path IN (${inList})`); + for (const row of rows) { + const p22 = row["path"]; + const text = row["summary"] ?? ""; + this.files.set(p22, Buffer.from(text, "utf-8")); } - } else { - await this.client.query(`DELETE FROM "${this.table}" WHERE path = '${sqlStr(p22)}'`); - this.removeFromTree(p22); } - } - async cp(src, dest, opts) { - const s10 = normPath(src), d15 = normPath(dest); - if (this.sessionPaths.has(d15)) - throw fsErr("EPERM", "session files are read-only", d15); - if (this.dirs.has(s10) && !this.files.has(s10)) { - if (!opts?.recursive) - throw fsErr("EISDIR", "is a directory", s10); - for (const fp of [...this.files.keys()].filter((k17) => k17 === s10 || k17.startsWith(s10 + "/"))) { - await this.writeFile(d15 + fp.slice(s10.length), await this.readFileBuffer(fp)); + if (!this.sessionsTable) + return; + for (let i11 = 0; i11 < uncachedSessions.length; i11 += PREFETCH_BATCH_SIZE) { + const chunk = uncachedSessions.slice(i11, i11 + PREFETCH_BATCH_SIZE); + const inList = chunk.map((p22) => `'${sqlStr(p22)}'`).join(", "); + const rows = await this.client.query(`SELECT path, message, creation_date FROM "${this.sessionsTable}" WHERE path IN (${inList}) ORDER BY path, creation_date ASC`); + const grouped = /* @__PURE__ */ new Map(); + for (const row of rows) { + const p22 = row["path"]; + const current = grouped.get(p22) ?? []; + current.push(normalizeSessionMessage(p22, row["message"])); + grouped.set(p22, current); + } + for (const [p22, parts] of grouped) { + this.files.set(p22, Buffer.from(parts.join("\n"), "utf-8")); } - } else { - await this.writeFile(d15, await this.readFileBuffer(s10)); } } - async mv(src, dest) { - const s10 = normPath(src), d15 = normPath(dest); - if (this.sessionPaths.has(s10)) - throw fsErr("EPERM", "session files are read-only", s10); - if (this.sessionPaths.has(d15)) - throw fsErr("EPERM", "session files are read-only", d15); - await this.cp(src, dest, { recursive: true }); - await this.rm(src, { recursive: true, force: true }); + // ── IFileSystem: reads ──────────────────────────────────────────────────── + async readFileBuffer(path2) { + const p22 = normPath(path2); + if (this.dirs.has(p22) && !this.files.has(p22)) + throw fsErr("EISDIR", "illegal operation on a directory", p22); + if (!this.files.has(p22)) + throw fsErr("ENOENT", "no such file or directory", p22); + const cached = this.files.get(p22); + if (cached !== null && cached !== void 0) + return cached; + const pend = this.pending.get(p22); + if (pend) { + const buf2 = Buffer.from(pend.contentText, "utf-8"); + this.files.set(p22, buf2); + return buf2; + } + if (this.sessionPaths.has(p22) && this.sessionsTable) { + const rows2 = await this.client.query(`SELECT message FROM "${this.sessionsTable}" WHERE path = '${sqlStr(p22)}' ORDER BY creation_date ASC`); + if (rows2.length === 0) + throw fsErr("ENOENT", "no such file or directory", p22); + const text = joinSessionMessages(p22, rows2.map((row) => row["message"])); + const buf2 = Buffer.from(text, "utf-8"); + this.files.set(p22, buf2); + return buf2; + } + const rows = await this.client.query(`SELECT summary FROM "${this.table}" WHERE path = '${sqlStr(p22)}' LIMIT 1`); + if (rows.length === 0) + throw fsErr("ENOENT", "no such file or directory", p22); + const buf = Buffer.from(rows[0]["summary"] ?? "", "utf-8"); + this.files.set(p22, buf); + return buf; } - resolvePath(base, path2) { - if (path2.startsWith("/")) - return normPath(path2); - return normPath(posix.join(base, path2)); + async readFile(path2, _opts) { + const p22 = normPath(path2); + if (this.dirs.has(p22) && !this.files.has(p22)) + throw fsErr("EISDIR", "illegal operation on a directory", p22); + if (p22 === "/index.md" && !this.files.has(p22)) { + const realRows = await this.client.query(`SELECT summary FROM "${this.table}" WHERE path = '${sqlStr("/index.md")}' LIMIT 1`); + if (realRows.length > 0 && realRows[0]["summary"]) { + const text2 = realRows[0]["summary"]; + const buf2 = Buffer.from(text2, "utf-8"); + this.files.set(p22, buf2); + return text2; + } + return this.generateVirtualIndex(); + } + if (!this.files.has(p22)) + throw fsErr("ENOENT", "no such file or directory", p22); + const cached = this.files.get(p22); + if (cached !== null && cached !== void 0) + return cached.toString("utf-8"); + const pend = this.pending.get(p22); + if (pend) + return pend.contentText; + if (this.sessionPaths.has(p22) && this.sessionsTable) { + const rows2 = await this.client.query(`SELECT message FROM "${this.sessionsTable}" WHERE path = '${sqlStr(p22)}' ORDER BY creation_date ASC`); + if (rows2.length === 0) + throw fsErr("ENOENT", "no such file or directory", p22); + const text2 = joinSessionMessages(p22, rows2.map((row) => row["message"])); + const buf2 = Buffer.from(text2, "utf-8"); + this.files.set(p22, buf2); + return text2; + } + const rows = await this.client.query(`SELECT summary FROM "${this.table}" WHERE path = '${sqlStr(p22)}' LIMIT 1`); + if (rows.length === 0) + throw fsErr("ENOENT", "no such file or directory", p22); + const text = rows[0]["summary"] ?? ""; + const buf = Buffer.from(text, "utf-8"); + this.files.set(p22, buf); + return text; } - getAllPaths() { - return [.../* @__PURE__ */ new Set([...this.files.keys(), ...this.dirs.keys()])]; + // ── IFileSystem: writes ─────────────────────────────────────────────────── + /** Write a file with optional row-level metadata (project, description, dates). */ + async writeFileWithMeta(path2, content, meta) { + const p22 = normPath(path2); + if (this.sessionPaths.has(p22)) + throw fsErr("EPERM", "session files are read-only", p22); + if (this.dirs.has(p22) && !this.files.has(p22)) + throw fsErr("EISDIR", "illegal operation on a directory", p22); + const text = typeof content === "string" ? content : Buffer.from(content).toString("utf-8"); + const buf = Buffer.from(text, "utf-8"); + const mime = guessMime(basename4(p22)); + this.files.set(p22, buf); + this.meta.set(p22, { size: buf.length, mime, mtime: /* @__PURE__ */ new Date() }); + this.addToTree(p22); + this.pending.set(p22, { + path: p22, + filename: basename4(p22), + contentText: text, + mimeType: mime, + sizeBytes: buf.length, + ...meta + }); + if (this.pending.size >= BATCH_SIZE) + await this.flush(); + else + this.scheduleFlush(); } -}; - -// node_modules/yargs-parser/build/lib/index.js -import { format } from "util"; -import { normalize, resolve as resolve4 } from "path"; - -// node_modules/yargs-parser/build/lib/string-utils.js -function camelCase(str) { - const isCamelCase = str !== str.toLowerCase() && str !== str.toUpperCase(); - if (!isCamelCase) { - str = str.toLowerCase(); + async writeFile(path2, content, _opts) { + const p22 = normPath(path2); + if (this.sessionPaths.has(p22)) + throw fsErr("EPERM", "session files are read-only", p22); + if (this.dirs.has(p22) && !this.files.has(p22)) + throw fsErr("EISDIR", "illegal operation on a directory", p22); + const text = typeof content === "string" ? content : Buffer.from(content).toString("utf-8"); + const buf = Buffer.from(text, "utf-8"); + const mime = guessMime(basename4(p22)); + this.files.set(p22, buf); + this.meta.set(p22, { size: buf.length, mime, mtime: /* @__PURE__ */ new Date() }); + this.addToTree(p22); + this.pending.set(p22, { + path: p22, + filename: basename4(p22), + contentText: text, + mimeType: mime, + sizeBytes: buf.length + }); + if (this.pending.size >= BATCH_SIZE) + await this.flush(); + else + this.scheduleFlush(); } - if (str.indexOf("-") === -1 && str.indexOf("_") === -1) { - return str; - } else { - let camelcase = ""; - let nextChrUpper = false; - const leadingHyphens = str.match(/^-+/); - for (let i11 = leadingHyphens ? leadingHyphens[0].length : 0; i11 < str.length; i11++) { - let chr = str.charAt(i11); - if (nextChrUpper) { - nextChrUpper = false; - chr = chr.toUpperCase(); - } - if (i11 !== 0 && (chr === "-" || chr === "_")) { - nextChrUpper = true; - } else if (chr !== "-" && chr !== "_") { - camelcase += chr; + async appendFile(path2, content, opts) { + const p22 = normPath(path2); + const add = typeof content === "string" ? content : Buffer.from(content).toString("utf-8"); + if (this.sessionPaths.has(p22)) + throw fsErr("EPERM", "session files are read-only", p22); + if (this.files.has(p22) || await this.exists(p22).catch(() => false)) { + const ts3 = (/* @__PURE__ */ new Date()).toISOString(); + await this.client.query(`UPDATE "${this.table}" SET summary = summary || E'${sqlStr(add)}', size_bytes = size_bytes + ${Buffer.byteLength(add, "utf-8")}, last_update_date = '${ts3}' WHERE path = '${sqlStr(p22)}'`); + this.files.set(p22, null); + const m26 = this.meta.get(p22); + if (m26) { + m26.size += Buffer.byteLength(add, "utf-8"); + m26.mtime = new Date(ts3); } - } - return camelcase; - } -} -function decamelize(str, joinString) { - const lowercase = str.toLowerCase(); - joinString = joinString || "-"; - let notCamelcase = ""; - for (let i11 = 0; i11 < str.length; i11++) { - const chrLower = lowercase.charAt(i11); - const chrString = str.charAt(i11); - if (chrLower !== chrString && i11 > 0) { - notCamelcase += `${joinString}${lowercase.charAt(i11)}`; } else { - notCamelcase += chrString; + await this.writeFile(p22, content, opts); + await this.flush(); } } - return notCamelcase; -} -function looksLikeNumber(x28) { - if (x28 === null || x28 === void 0) - return false; - if (typeof x28 === "number") - return true; - if (/^0x[0-9a-f]+$/i.test(x28)) - return true; - if (/^0[^.]/.test(x28)) - return false; - return /^[-]?(?:\d+(?:\.\d*)?|\.\d+)(e[-+]?\d+)?$/.test(x28); -} - -// node_modules/yargs-parser/build/lib/tokenize-arg-string.js -function tokenizeArgString(argString) { - if (Array.isArray(argString)) { - return argString.map((e6) => typeof e6 !== "string" ? e6 + "" : e6); + // ── IFileSystem: metadata ───────────────────────────────────────────────── + async exists(path2) { + const p22 = normPath(path2); + if (p22 === "/index.md") + return true; + return this.files.has(p22) || this.dirs.has(p22); } - argString = argString.trim(); - let i11 = 0; - let prevC = null; - let c15 = null; - let opening = null; - const args = []; - for (let ii2 = 0; ii2 < argString.length; ii2++) { - prevC = c15; - c15 = argString.charAt(ii2); - if (c15 === " " && !opening) { - if (!(prevC === " ")) { - i11++; - } - continue; - } - if (c15 === opening) { - opening = null; - } else if ((c15 === "'" || c15 === '"') && !opening) { - opening = c15; + async stat(path2) { + const p22 = normPath(path2); + const isFile = this.files.has(p22); + const isDir = this.dirs.has(p22); + if (p22 === "/index.md" && !isFile && !isDir) { + return { + isFile: true, + isDirectory: false, + isSymbolicLink: false, + mode: 420, + size: 0, + mtime: /* @__PURE__ */ new Date() + }; } - if (!args[i11]) - args[i11] = ""; - args[i11] += c15; + if (!isFile && !isDir) + throw fsErr("ENOENT", "no such file or directory", p22); + const m26 = this.meta.get(p22); + return { + isFile: isFile && !isDir, + isDirectory: isDir, + isSymbolicLink: false, + mode: isDir ? 493 : 420, + size: m26?.size ?? 0, + mtime: m26?.mtime ?? /* @__PURE__ */ new Date() + }; } - return args; -} - -// node_modules/yargs-parser/build/lib/yargs-parser-types.js -var DefaultValuesForTypeKey; -(function(DefaultValuesForTypeKey2) { - DefaultValuesForTypeKey2["BOOLEAN"] = "boolean"; + async lstat(path2) { + return this.stat(path2); + } + async chmod(_path, _mode) { + } + async utimes(_path, _atime, _mtime) { + } + async symlink(_target, linkPath) { + throw fsErr("EPERM", "operation not permitted", linkPath); + } + async link(_src, destPath) { + throw fsErr("EPERM", "operation not permitted", destPath); + } + async readlink(path2) { + throw fsErr("EINVAL", "invalid argument", path2); + } + async realpath(path2) { + const p22 = normPath(path2); + if (p22 === "/index.md") + return p22; + if (!this.files.has(p22) && !this.dirs.has(p22)) + throw fsErr("ENOENT", "no such file or directory", p22); + return p22; + } + // ── IFileSystem: directories ────────────────────────────────────────────── + async mkdir(path2, opts) { + const p22 = normPath(path2); + if (this.files.has(p22)) + throw fsErr("EEXIST", "file exists", p22); + if (this.dirs.has(p22)) { + if (!opts?.recursive) + throw fsErr("EEXIST", "file exists", p22); + return; + } + if (!opts?.recursive) { + const parent2 = parentOf(p22); + if (!this.dirs.has(parent2)) + throw fsErr("ENOENT", "no such file or directory", parent2); + } + this.dirs.set(p22, /* @__PURE__ */ new Set()); + const parent = parentOf(p22); + if (!this.dirs.has(parent)) + this.dirs.set(parent, /* @__PURE__ */ new Set()); + this.dirs.get(parent).add(basename4(p22)); + } + async readdir(path2) { + const p22 = normPath(path2); + if (!this.dirs.has(p22)) + throw fsErr("ENOTDIR", "not a directory", p22); + const entries = [...this.dirs.get(p22) ?? []]; + if (p22 === "/" && !entries.includes("index.md")) { + entries.push("index.md"); + } + return entries; + } + async readdirWithFileTypes(path2) { + const names = await this.readdir(path2); + const p22 = normPath(path2); + return names.map((name) => { + const child = p22 === "/" ? `/${name}` : `${p22}/${name}`; + return { + name, + isFile: (this.files.has(child) || child === "/index.md") && !this.dirs.has(child), + isDirectory: this.dirs.has(child), + isSymbolicLink: false + }; + }); + } + // ── IFileSystem: structural mutations ───────────────────────────────────── + async rm(path2, opts) { + const p22 = normPath(path2); + if (this.sessionPaths.has(p22)) + throw fsErr("EPERM", "session files are read-only", p22); + if (!this.files.has(p22) && !this.dirs.has(p22)) { + if (opts?.force) + return; + throw fsErr("ENOENT", "no such file or directory", p22); + } + if (this.dirs.has(p22)) { + const children = this.dirs.get(p22) ?? /* @__PURE__ */ new Set(); + if (children.size > 0 && !opts?.recursive) + throw fsErr("ENOTEMPTY", "directory not empty", p22); + const toDelete = []; + const stack = [p22]; + while (stack.length) { + const cur = stack.pop(); + for (const child of [...this.dirs.get(cur) ?? []]) { + const childPath = cur === "/" ? `/${child}` : `${cur}/${child}`; + if (this.files.has(childPath)) + toDelete.push(childPath); + if (this.dirs.has(childPath)) + stack.push(childPath); + } + } + const safeToDelete = toDelete.filter((fp) => !this.sessionPaths.has(fp)); + for (const fp of safeToDelete) + this.removeFromTree(fp); + this.dirs.delete(p22); + this.dirs.get(parentOf(p22))?.delete(basename4(p22)); + if (safeToDelete.length > 0) { + const inList = safeToDelete.map((fp) => `'${sqlStr(fp)}'`).join(", "); + await this.client.query(`DELETE FROM "${this.table}" WHERE path IN (${inList})`); + } + } else { + await this.client.query(`DELETE FROM "${this.table}" WHERE path = '${sqlStr(p22)}'`); + this.removeFromTree(p22); + } + } + async cp(src, dest, opts) { + const s10 = normPath(src), d15 = normPath(dest); + if (this.sessionPaths.has(d15)) + throw fsErr("EPERM", "session files are read-only", d15); + if (this.dirs.has(s10) && !this.files.has(s10)) { + if (!opts?.recursive) + throw fsErr("EISDIR", "is a directory", s10); + for (const fp of [...this.files.keys()].filter((k17) => k17 === s10 || k17.startsWith(s10 + "/"))) { + await this.writeFile(d15 + fp.slice(s10.length), await this.readFileBuffer(fp)); + } + } else { + await this.writeFile(d15, await this.readFileBuffer(s10)); + } + } + async mv(src, dest) { + const s10 = normPath(src), d15 = normPath(dest); + if (this.sessionPaths.has(s10)) + throw fsErr("EPERM", "session files are read-only", s10); + if (this.sessionPaths.has(d15)) + throw fsErr("EPERM", "session files are read-only", d15); + await this.cp(src, dest, { recursive: true }); + await this.rm(src, { recursive: true, force: true }); + } + resolvePath(base, path2) { + if (path2.startsWith("/")) + return normPath(path2); + return normPath(posix.join(base, path2)); + } + getAllPaths() { + return [.../* @__PURE__ */ new Set([...this.files.keys(), ...this.dirs.keys()])]; + } +}; + +// node_modules/yargs-parser/build/lib/index.js +import { format } from "util"; +import { normalize, resolve as resolve4 } from "path"; + +// node_modules/yargs-parser/build/lib/string-utils.js +function camelCase2(str) { + const isCamelCase = str !== str.toLowerCase() && str !== str.toUpperCase(); + if (!isCamelCase) { + str = str.toLowerCase(); + } + if (str.indexOf("-") === -1 && str.indexOf("_") === -1) { + return str; + } else { + let camelcase = ""; + let nextChrUpper = false; + const leadingHyphens = str.match(/^-+/); + for (let i11 = leadingHyphens ? leadingHyphens[0].length : 0; i11 < str.length; i11++) { + let chr = str.charAt(i11); + if (nextChrUpper) { + nextChrUpper = false; + chr = chr.toUpperCase(); + } + if (i11 !== 0 && (chr === "-" || chr === "_")) { + nextChrUpper = true; + } else if (chr !== "-" && chr !== "_") { + camelcase += chr; + } + } + return camelcase; + } +} +function decamelize(str, joinString) { + const lowercase = str.toLowerCase(); + joinString = joinString || "-"; + let notCamelcase = ""; + for (let i11 = 0; i11 < str.length; i11++) { + const chrLower = lowercase.charAt(i11); + const chrString = str.charAt(i11); + if (chrLower !== chrString && i11 > 0) { + notCamelcase += `${joinString}${lowercase.charAt(i11)}`; + } else { + notCamelcase += chrString; + } + } + return notCamelcase; +} +function looksLikeNumber(x28) { + if (x28 === null || x28 === void 0) + return false; + if (typeof x28 === "number") + return true; + if (/^0x[0-9a-f]+$/i.test(x28)) + return true; + if (/^0[^.]/.test(x28)) + return false; + return /^[-]?(?:\d+(?:\.\d*)?|\.\d+)(e[-+]?\d+)?$/.test(x28); +} + +// node_modules/yargs-parser/build/lib/tokenize-arg-string.js +function tokenizeArgString(argString) { + if (Array.isArray(argString)) { + return argString.map((e6) => typeof e6 !== "string" ? e6 + "" : e6); + } + argString = argString.trim(); + let i11 = 0; + let prevC = null; + let c15 = null; + let opening = null; + const args = []; + for (let ii2 = 0; ii2 < argString.length; ii2++) { + prevC = c15; + c15 = argString.charAt(ii2); + if (c15 === " " && !opening) { + if (!(prevC === " ")) { + i11++; + } + continue; + } + if (c15 === opening) { + opening = null; + } else if ((c15 === "'" || c15 === '"') && !opening) { + opening = c15; + } + if (!args[i11]) + args[i11] = ""; + args[i11] += c15; + } + return args; +} + +// node_modules/yargs-parser/build/lib/yargs-parser-types.js +var DefaultValuesForTypeKey; +(function(DefaultValuesForTypeKey2) { + DefaultValuesForTypeKey2["BOOLEAN"] = "boolean"; DefaultValuesForTypeKey2["STRING"] = "string"; DefaultValuesForTypeKey2["NUMBER"] = "number"; DefaultValuesForTypeKey2["ARRAY"] = "array"; @@ -67983,7 +68490,7 @@ var YargsParser = class { ; [].concat(...Object.keys(aliases).map((k17) => aliases[k17])).forEach((alias) => { if (configuration["camel-case-expansion"] && alias.includes("-")) { - delete argv[alias.split(".").map((prop) => camelCase(prop)).join(".")]; + delete argv[alias.split(".").map((prop) => camelCase2(prop)).join(".")]; } delete argv[alias]; }); @@ -68065,7 +68572,7 @@ var YargsParser = class { function setArg(key, val, shouldStripQuotes = inputIsString) { if (/-/.test(key) && configuration["camel-case-expansion"]) { const alias = key.split(".").map(function(prop) { - return camelCase(prop); + return camelCase2(prop); }).join("."); addNewAlias(key, alias); } @@ -68213,7 +68720,7 @@ var YargsParser = class { if (i11 === 0) { key = key.substring(prefix.length); } - return camelCase(key); + return camelCase2(key); }); if ((configOnly && flags.configs[keys.join(".")] || !configOnly) && !hasKey(argv2, keys)) { setArg(keys.join("."), env2[envVar]); @@ -68333,7 +68840,7 @@ var YargsParser = class { flags.aliases[key] = [].concat(aliases[key] || []); flags.aliases[key].concat(key).forEach(function(x28) { if (/-/.test(x28) && configuration["camel-case-expansion"]) { - const c15 = camelCase(x28); + const c15 = camelCase2(x28); if (c15 !== key && flags.aliases[key].indexOf(c15) === -1) { flags.aliases[key].push(c15); newAliases[c15] = true; @@ -68397,442 +68904,167 @@ var YargsParser = class { return hasAllFlags; } function isUnknownOptionAsArg(arg) { - return configuration["unknown-options-as-args"] && isUnknownOption(arg); - } - function isUnknownOption(arg) { - arg = arg.replace(/^-{3,}/, "--"); - if (arg.match(negative)) { - return false; - } - if (hasAllShortFlags(arg)) { - return false; - } - const flagWithEquals = /^-+([^=]+?)=[\s\S]*$/; - const normalFlag = /^-+([^=]+?)$/; - const flagEndingInHyphen = /^-+([^=]+?)-$/; - const flagEndingInDigits = /^-+([^=]+?\d+)$/; - const flagEndingInNonWordCharacters = /^-+([^=]+?)\W+.*$/; - return !hasFlagsMatching(arg, flagWithEquals, negatedBoolean, normalFlag, flagEndingInHyphen, flagEndingInDigits, flagEndingInNonWordCharacters); - } - function defaultValue(key) { - if (!checkAllAliases(key, flags.bools) && !checkAllAliases(key, flags.counts) && `${key}` in defaults2) { - return defaults2[key]; - } else { - return defaultForType(guessType(key)); - } - } - function defaultForType(type) { - const def = { - [DefaultValuesForTypeKey.BOOLEAN]: true, - [DefaultValuesForTypeKey.STRING]: "", - [DefaultValuesForTypeKey.NUMBER]: void 0, - [DefaultValuesForTypeKey.ARRAY]: [] - }; - return def[type]; - } - function guessType(key) { - let type = DefaultValuesForTypeKey.BOOLEAN; - if (checkAllAliases(key, flags.strings)) - type = DefaultValuesForTypeKey.STRING; - else if (checkAllAliases(key, flags.numbers)) - type = DefaultValuesForTypeKey.NUMBER; - else if (checkAllAliases(key, flags.bools)) - type = DefaultValuesForTypeKey.BOOLEAN; - else if (checkAllAliases(key, flags.arrays)) - type = DefaultValuesForTypeKey.ARRAY; - return type; - } - function isUndefined(num) { - return num === void 0; - } - function checkConfiguration() { - Object.keys(flags.counts).find((key) => { - if (checkAllAliases(key, flags.arrays)) { - error = Error(__("Invalid configuration: %s, opts.count excludes opts.array.", key)); - return true; - } else if (checkAllAliases(key, flags.nargs)) { - error = Error(__("Invalid configuration: %s, opts.count excludes opts.narg.", key)); - return true; - } - return false; - }); - } - return { - aliases: Object.assign({}, flags.aliases), - argv: Object.assign(argvReturn, argv), - configuration, - defaulted: Object.assign({}, defaulted), - error, - newAliases: Object.assign({}, newAliases) - }; - } -}; -function combineAliases(aliases) { - const aliasArrays = []; - const combined = /* @__PURE__ */ Object.create(null); - let change = true; - Object.keys(aliases).forEach(function(key) { - aliasArrays.push([].concat(aliases[key], key)); - }); - while (change) { - change = false; - for (let i11 = 0; i11 < aliasArrays.length; i11++) { - for (let ii2 = i11 + 1; ii2 < aliasArrays.length; ii2++) { - const intersect = aliasArrays[i11].filter(function(v27) { - return aliasArrays[ii2].indexOf(v27) !== -1; - }); - if (intersect.length) { - aliasArrays[i11] = aliasArrays[i11].concat(aliasArrays[ii2]); - aliasArrays.splice(ii2, 1); - change = true; - break; - } - } - } - } - aliasArrays.forEach(function(aliasArray) { - aliasArray = aliasArray.filter(function(v27, i11, self2) { - return self2.indexOf(v27) === i11; - }); - const lastAlias = aliasArray.pop(); - if (lastAlias !== void 0 && typeof lastAlias === "string") { - combined[lastAlias] = aliasArray; - } - }); - return combined; -} -function increment(orig) { - return orig !== void 0 ? orig + 1 : 1; -} -function sanitizeKey(key) { - if (key === "__proto__") - return "___proto___"; - return key; -} -function stripQuotes(val) { - return typeof val === "string" && (val[0] === "'" || val[0] === '"') && val[val.length - 1] === val[0] ? val.substring(1, val.length - 1) : val; -} - -// node_modules/yargs-parser/build/lib/index.js -import { readFileSync as readFileSync2 } from "fs"; -import { createRequire } from "node:module"; -var _a3; -var _b; -var _c; -var minNodeVersion = process && process.env && process.env.YARGS_MIN_NODE_VERSION ? Number(process.env.YARGS_MIN_NODE_VERSION) : 20; -var nodeVersion = (_b = (_a3 = process === null || process === void 0 ? void 0 : process.versions) === null || _a3 === void 0 ? void 0 : _a3.node) !== null && _b !== void 0 ? _b : (_c = process === null || process === void 0 ? void 0 : process.version) === null || _c === void 0 ? void 0 : _c.slice(1); -if (nodeVersion) { - const major = Number(nodeVersion.match(/^([^.]+)/)[1]); - if (major < minNodeVersion) { - throw Error(`yargs parser supports a minimum Node.js version of ${minNodeVersion}. Read our version support policy: https://github.com/yargs/yargs-parser#supported-nodejs-versions`); - } -} -var env = process ? process.env : {}; -var require2 = createRequire ? createRequire(import.meta.url) : void 0; -var parser = new YargsParser({ - cwd: process.cwd, - env: () => { - return env; - }, - format, - normalize, - resolve: resolve4, - require: (path2) => { - if (typeof require2 !== "undefined") { - return require2(path2); - } else if (path2.match(/\.json$/)) { - return JSON.parse(readFileSync2(path2, "utf8")); - } else { - throw Error("only .json config files are supported in ESM"); - } - } -}); -var yargsParser = function Parser(args, opts) { - const result = parser.parse(args.slice(), opts); - return result.argv; -}; -yargsParser.detailed = function(args, opts) { - return parser.parse(args.slice(), opts); -}; -yargsParser.camelCase = camelCase; -yargsParser.decamelize = decamelize; -yargsParser.looksLikeNumber = looksLikeNumber; -var lib_default = yargsParser; - -// dist/src/shell/grep-core.js -var TOOL_INPUT_FIELDS = [ - "command", - "file_path", - "path", - "pattern", - "prompt", - "subagent_type", - "query", - "url", - "notebook_path", - "old_string", - "new_string", - "content", - "skill", - "args", - "taskId", - "status", - "subject", - "description", - "to", - "message", - "summary", - "max_results" -]; -var TOOL_RESPONSE_DROP = /* @__PURE__ */ new Set([ - // Note: `stderr` is intentionally NOT in this set. The `stdout` high-signal - // branch below already de-dupes it for the common case (appends as suffix - // when non-empty). If a tool response has ONLY `stderr` and no `stdout` - // (hard-failure on some tools), the generic cleanup preserves it so the - // error message reaches Claude instead of collapsing to `[ok]`. - "interrupted", - "isImage", - "noOutputExpected", - "type", - "structuredPatch", - "userModified", - "originalFile", - "replaceAll", - "totalDurationMs", - "totalTokens", - "totalToolUseCount", - "usage", - "toolStats", - "durationMs", - "durationSeconds", - "bytes", - "code", - "codeText", - "agentId", - "agentType", - "verificationNudgeNeeded", - "numLines", - "numFiles", - "truncated", - "statusChange", - "updatedFields", - "isAgent", - "success" -]); -function maybeParseJson(v27) { - if (typeof v27 !== "string") - return v27; - const s10 = v27.trim(); - if (s10[0] !== "{" && s10[0] !== "[") - return v27; - try { - return JSON.parse(s10); - } catch { - return v27; - } -} -function snakeCase(k17) { - return k17.replace(/([A-Z])/g, "_$1").toLowerCase(); -} -function camelCase2(k17) { - return k17.replace(/_([a-z])/g, (_16, c15) => c15.toUpperCase()); -} -function formatToolInput(raw) { - const p22 = maybeParseJson(raw); - if (typeof p22 !== "object" || p22 === null) - return String(p22 ?? ""); - const parts = []; - for (const k17 of TOOL_INPUT_FIELDS) { - if (p22[k17] === void 0) - continue; - const v27 = p22[k17]; - parts.push(`${k17}: ${typeof v27 === "string" ? v27 : JSON.stringify(v27)}`); - } - for (const k17 of ["glob", "output_mode", "limit", "offset"]) { - if (p22[k17] !== void 0) - parts.push(`${k17}: ${p22[k17]}`); - } - return parts.length ? parts.join("\n") : JSON.stringify(p22); -} -function formatToolResponse(raw, inp, toolName) { - const r10 = maybeParseJson(raw); - if (typeof r10 !== "object" || r10 === null) - return String(r10 ?? ""); - if (toolName === "Edit" || toolName === "Write" || toolName === "MultiEdit") { - return r10.filePath ? `[wrote ${r10.filePath}]` : "[ok]"; - } - if (typeof r10.stdout === "string") { - const stderr = r10.stderr; - return r10.stdout + (stderr ? ` -stderr: ${stderr}` : ""); - } - if (typeof r10.content === "string") - return r10.content; - if (r10.file && typeof r10.file === "object") { - const f11 = r10.file; - if (typeof f11.content === "string") - return `[${f11.filePath ?? ""}] -${f11.content}`; - if (typeof f11.base64 === "string") - return `[binary ${f11.filePath ?? ""}: ${f11.base64.length} base64 chars]`; - } - if (Array.isArray(r10.filenames)) - return r10.filenames.join("\n"); - if (Array.isArray(r10.matches)) { - return r10.matches.map((m26) => typeof m26 === "string" ? m26 : JSON.stringify(m26)).join("\n"); - } - if (Array.isArray(r10.results)) { - return r10.results.map((x28) => typeof x28 === "string" ? x28 : x28?.title ?? x28?.url ?? JSON.stringify(x28)).join("\n"); - } - const inpObj = maybeParseJson(inp); - const kept = {}; - for (const [k17, v27] of Object.entries(r10)) { - if (TOOL_RESPONSE_DROP.has(k17)) - continue; - if (v27 === "" || v27 === false || v27 == null) - continue; - if (typeof inpObj === "object" && inpObj) { - const inObj = inpObj; - if (k17 in inObj && JSON.stringify(inObj[k17]) === JSON.stringify(v27)) - continue; - const snake = snakeCase(k17); - if (snake in inObj && JSON.stringify(inObj[snake]) === JSON.stringify(v27)) - continue; - const camel = camelCase2(k17); - if (camel in inObj && JSON.stringify(inObj[camel]) === JSON.stringify(v27)) - continue; + return configuration["unknown-options-as-args"] && isUnknownOption(arg); } - kept[k17] = v27; - } - return Object.keys(kept).length ? JSON.stringify(kept) : "[ok]"; -} -function formatToolCall(obj) { - return `[tool:${obj?.tool_name ?? "?"}] -input: ${formatToolInput(obj?.tool_input)} -response: ${formatToolResponse(obj?.tool_response, obj?.tool_input, obj?.tool_name)}`; -} -function normalizeContent(path2, raw) { - if (!path2.includes("/sessions/")) - return raw; - if (!raw || raw[0] !== "{") - return raw; - let obj; - try { - obj = JSON.parse(raw); - } catch { - return raw; - } - if (Array.isArray(obj.turns)) { - const header = []; - if (obj.date_time) - header.push(`date: ${obj.date_time}`); - if (obj.speakers) { - const s10 = obj.speakers; - const names = [s10.speaker_a, s10.speaker_b].filter(Boolean).join(", "); - if (names) - header.push(`speakers: ${names}`); + function isUnknownOption(arg) { + arg = arg.replace(/^-{3,}/, "--"); + if (arg.match(negative)) { + return false; + } + if (hasAllShortFlags(arg)) { + return false; + } + const flagWithEquals = /^-+([^=]+?)=[\s\S]*$/; + const normalFlag = /^-+([^=]+?)$/; + const flagEndingInHyphen = /^-+([^=]+?)-$/; + const flagEndingInDigits = /^-+([^=]+?\d+)$/; + const flagEndingInNonWordCharacters = /^-+([^=]+?)\W+.*$/; + return !hasFlagsMatching(arg, flagWithEquals, negatedBoolean, normalFlag, flagEndingInHyphen, flagEndingInDigits, flagEndingInNonWordCharacters); } - const lines = obj.turns.map((t6) => { - const sp = String(t6?.speaker ?? t6?.name ?? "?").trim(); - const tx = String(t6?.text ?? t6?.content ?? "").replace(/\s+/g, " ").trim(); - const tag = t6?.dia_id ? `[${t6.dia_id}] ` : ""; - return `${tag}${sp}: ${tx}`; - }); - const out2 = [...header, ...lines].join("\n"); - return out2.trim() ? out2 : raw; + function defaultValue(key) { + if (!checkAllAliases(key, flags.bools) && !checkAllAliases(key, flags.counts) && `${key}` in defaults2) { + return defaults2[key]; + } else { + return defaultForType(guessType(key)); + } + } + function defaultForType(type) { + const def = { + [DefaultValuesForTypeKey.BOOLEAN]: true, + [DefaultValuesForTypeKey.STRING]: "", + [DefaultValuesForTypeKey.NUMBER]: void 0, + [DefaultValuesForTypeKey.ARRAY]: [] + }; + return def[type]; + } + function guessType(key) { + let type = DefaultValuesForTypeKey.BOOLEAN; + if (checkAllAliases(key, flags.strings)) + type = DefaultValuesForTypeKey.STRING; + else if (checkAllAliases(key, flags.numbers)) + type = DefaultValuesForTypeKey.NUMBER; + else if (checkAllAliases(key, flags.bools)) + type = DefaultValuesForTypeKey.BOOLEAN; + else if (checkAllAliases(key, flags.arrays)) + type = DefaultValuesForTypeKey.ARRAY; + return type; + } + function isUndefined(num) { + return num === void 0; + } + function checkConfiguration() { + Object.keys(flags.counts).find((key) => { + if (checkAllAliases(key, flags.arrays)) { + error = Error(__("Invalid configuration: %s, opts.count excludes opts.array.", key)); + return true; + } else if (checkAllAliases(key, flags.nargs)) { + error = Error(__("Invalid configuration: %s, opts.count excludes opts.narg.", key)); + return true; + } + return false; + }); + } + return { + aliases: Object.assign({}, flags.aliases), + argv: Object.assign(argvReturn, argv), + configuration, + defaulted: Object.assign({}, defaulted), + error, + newAliases: Object.assign({}, newAliases) + }; } - const stripRecalled = (t6) => { - const i11 = t6.indexOf(""); - if (i11 === -1) - return t6; - const j14 = t6.lastIndexOf(""); - if (j14 === -1 || j14 < i11) - return t6; - const head = t6.slice(0, i11); - const tail = t6.slice(j14 + "".length); - return (head + tail).replace(/^\s+/, "").replace(/\n{3,}/g, "\n\n"); - }; - let out = null; - if (obj.type === "user_message") { - out = `[user] ${stripRecalled(String(obj.content ?? ""))}`; - } else if (obj.type === "assistant_message") { - const agent = obj.agent_type ? ` (agent=${obj.agent_type})` : ""; - out = `[assistant${agent}] ${stripRecalled(String(obj.content ?? ""))}`; - } else if (obj.type === "tool_call") { - out = formatToolCall(obj); +}; +function combineAliases(aliases) { + const aliasArrays = []; + const combined = /* @__PURE__ */ Object.create(null); + let change = true; + Object.keys(aliases).forEach(function(key) { + aliasArrays.push([].concat(aliases[key], key)); + }); + while (change) { + change = false; + for (let i11 = 0; i11 < aliasArrays.length; i11++) { + for (let ii2 = i11 + 1; ii2 < aliasArrays.length; ii2++) { + const intersect = aliasArrays[i11].filter(function(v27) { + return aliasArrays[ii2].indexOf(v27) !== -1; + }); + if (intersect.length) { + aliasArrays[i11] = aliasArrays[i11].concat(aliasArrays[ii2]); + aliasArrays.splice(ii2, 1); + change = true; + break; + } + } + } } - if (out === null) - return raw; - const trimmed = out.trim(); - if (!trimmed || trimmed === "[user]" || trimmed === "[assistant]" || /^\[tool:[^\]]*\]\s+input:\s+\{\}\s+response:\s+\{\}$/.test(trimmed)) - return raw; - return out; + aliasArrays.forEach(function(aliasArray) { + aliasArray = aliasArray.filter(function(v27, i11, self2) { + return self2.indexOf(v27) === i11; + }); + const lastAlias = aliasArray.pop(); + if (lastAlias !== void 0 && typeof lastAlias === "string") { + combined[lastAlias] = aliasArray; + } + }); + return combined; } -async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { - const { pathFilter, contentScanOnly, likeOp, escapedPattern } = opts; - const limit = opts.limit ?? 100; - const memFilter = contentScanOnly ? "" : ` AND summary::text ${likeOp} '%${escapedPattern}%'`; - const sessFilter = contentScanOnly ? "" : ` AND message::text ${likeOp} '%${escapedPattern}%'`; - const memQuery = `SELECT path, summary::text AS content FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; - const sessQuery = `SELECT path, message::text AS content FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; - const [memRows, sessRows] = await Promise.all([ - api.query(memQuery).catch(() => []), - api.query(sessQuery).catch(() => []) - ]); - const rows = []; - for (const r10 of memRows) - rows.push({ path: String(r10.path), content: String(r10.content ?? "") }); - for (const r10 of sessRows) - rows.push({ path: String(r10.path), content: String(r10.content ?? "") }); - return rows; +function increment(orig) { + return orig !== void 0 ? orig + 1 : 1; } -function buildPathFilter(targetPath) { - if (!targetPath || targetPath === "/") - return ""; - const clean = targetPath.replace(/\/+$/, ""); - return ` AND (path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%')`; +function sanitizeKey(key) { + if (key === "__proto__") + return "___proto___"; + return key; } -function compileGrepRegex(params) { - let reStr = params.fixedString ? params.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") : params.pattern; - if (params.wordMatch) - reStr = `\\b${reStr}\\b`; - try { - return new RegExp(reStr, params.ignoreCase ? "i" : ""); - } catch { - return new RegExp(params.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), params.ignoreCase ? "i" : ""); +function stripQuotes(val) { + return typeof val === "string" && (val[0] === "'" || val[0] === '"') && val[val.length - 1] === val[0] ? val.substring(1, val.length - 1) : val; +} + +// node_modules/yargs-parser/build/lib/index.js +import { readFileSync as readFileSync3 } from "fs"; +import { createRequire } from "node:module"; +var _a3; +var _b; +var _c; +var minNodeVersion = process && process.env && process.env.YARGS_MIN_NODE_VERSION ? Number(process.env.YARGS_MIN_NODE_VERSION) : 20; +var nodeVersion = (_b = (_a3 = process === null || process === void 0 ? void 0 : process.versions) === null || _a3 === void 0 ? void 0 : _a3.node) !== null && _b !== void 0 ? _b : (_c = process === null || process === void 0 ? void 0 : process.version) === null || _c === void 0 ? void 0 : _c.slice(1); +if (nodeVersion) { + const major = Number(nodeVersion.match(/^([^.]+)/)[1]); + if (major < minNodeVersion) { + throw Error(`yargs parser supports a minimum Node.js version of ${minNodeVersion}. Read our version support policy: https://github.com/yargs/yargs-parser#supported-nodejs-versions`); } } -function refineGrepMatches(rows, params, forceMultiFilePrefix) { - const re9 = compileGrepRegex(params); - const multi = forceMultiFilePrefix ?? rows.length > 1; - const output = []; - for (const row of rows) { - if (!row.content) - continue; - const lines = row.content.split("\n"); - const matched = []; - for (let i11 = 0; i11 < lines.length; i11++) { - const hit = re9.test(lines[i11]); - if (hit !== !!params.invertMatch) { - if (params.filesOnly) { - output.push(row.path); - break; - } - const prefix = multi ? `${row.path}:` : ""; - const ln3 = params.lineNumber ? `${i11 + 1}:` : ""; - matched.push(`${prefix}${ln3}${lines[i11]}`); - } - } - if (!params.filesOnly) { - if (params.countOnly) { - output.push(`${multi ? `${row.path}:` : ""}${matched.length}`); - } else { - output.push(...matched); - } +var env = process ? process.env : {}; +var require2 = createRequire ? createRequire(import.meta.url) : void 0; +var parser = new YargsParser({ + cwd: process.cwd, + env: () => { + return env; + }, + format, + normalize, + resolve: resolve4, + require: (path2) => { + if (typeof require2 !== "undefined") { + return require2(path2); + } else if (path2.match(/\.json$/)) { + return JSON.parse(readFileSync3(path2, "utf8")); + } else { + throw Error("only .json config files are supported in ESM"); } } - return output; -} +}); +var yargsParser = function Parser(args, opts) { + const result = parser.parse(args.slice(), opts); + return result.argv; +}; +yargsParser.detailed = function(args, opts) { + return parser.parse(args.slice(), opts); +}; +yargsParser.camelCase = camelCase2; +yargsParser.decamelize = decamelize; +yargsParser.looksLikeNumber = looksLikeNumber; +var lib_default = yargsParser; // dist/src/shell/grep-interceptor.js var MAX_FALLBACK_CANDIDATES = 500; @@ -68876,23 +69108,18 @@ function createGrepCommand(client, fs3, table, sessionsTable) { filesOnly: Boolean(parsed.l || parsed["files-with-matches"]), countOnly: Boolean(parsed.c || parsed["count"]) }; - const likeOp = matchParams.ignoreCase ? "ILIKE" : "LIKE"; - const hasRegexMeta = !matchParams.fixedString && /[.*+?^${}()|[\]\\]/.test(pattern); - const escapedPattern = sqlLike(pattern); let rows = []; try { - const perTarget = await Promise.race([ - Promise.all(targets.map((t6) => searchDeeplakeTables(client, table, sessionsTable ?? "sessions", { - pathFilter: buildPathFilter(t6), - contentScanOnly: hasRegexMeta, - likeOp, - escapedPattern, - limit: 100 - }))), + const searchOptions = { + ...buildGrepSearchOptions(matchParams, targets[0] ?? ctx.cwd), + pathFilter: buildPathFilterForTargets(targets), + limit: 100 + }; + const queryRows = await Promise.race([ + searchDeeplakeTables(client, table, sessionsTable ?? "sessions", searchOptions), new Promise((_16, reject) => setTimeout(() => reject(new Error("timeout")), 3e3)) ]); - for (const batch of perTarget) - rows.push(...batch); + rows.push(...queryRows); } catch { rows = []; } @@ -68920,6 +69147,13 @@ function createGrepCommand(client, fs3, table, sessionsTable) { // dist/src/shell/deeplake-shell.js async function main() { + const isOneShot = process.argv.includes("-c"); + if (isOneShot) { + delete process.env["HIVEMIND_TRACE_SQL"]; + delete process.env["DEEPLAKE_TRACE_SQL"]; + delete process.env["HIVEMIND_DEBUG"]; + delete process.env["DEEPLAKE_DEBUG"]; + } const config = loadConfig(); if (!config) { process.stderr.write("Deeplake credentials not found.\nSet HIVEMIND_TOKEN + HIVEMIND_ORG_ID in environment, or create ~/.deeplake/credentials.json\n"); @@ -68928,7 +69162,6 @@ async function main() { const table = process.env["HIVEMIND_TABLE"] ?? "memory"; const sessionsTable = process.env["HIVEMIND_SESSIONS_TABLE"] ?? "sessions"; const mount = process.env["HIVEMIND_MOUNT"] ?? "/"; - const isOneShot = process.argv.includes("-c"); const client = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, table); if (!isOneShot) { process.stderr.write(`Connecting to deeplake://${config.workspaceId}/${table} ... diff --git a/claude-code/tests/bash-command-compiler.test.ts b/claude-code/tests/bash-command-compiler.test.ts new file mode 100644 index 0000000..3bb90a7 --- /dev/null +++ b/claude-code/tests/bash-command-compiler.test.ts @@ -0,0 +1,457 @@ +import { describe, expect, it, vi } from "vitest"; +import { + executeCompiledBashCommand, + expandBraceToken, + hasUnsupportedRedirection, + parseCompiledBashCommand, + parseCompiledSegment, + splitTopLevel, + stripAllowedModifiers, + tokenizeShellWords, +} from "../../src/hooks/bash-command-compiler.js"; + +describe("bash-command-compiler parsing", () => { + it("splits top-level sequences while respecting quotes", () => { + expect(splitTopLevel("cat /a && echo 'x && y' ; ls /b", ["&&", ";"])).toEqual([ + "cat /a", + "echo 'x && y'", + "ls /b", + ]); + expect(splitTopLevel(" && echo hi ; ", ["&&", ";"])).toEqual(["echo hi"]); + }); + + it("returns null on unterminated quotes", () => { + expect(splitTopLevel("echo 'oops", ["&&"])).toBeNull(); + expect(tokenizeShellWords("echo \"oops")).toBeNull(); + }); + + it("tokenizes shell words with quotes and escapes", () => { + expect(tokenizeShellWords("echo \"hello world\" 'again' plain")).toEqual([ + "echo", + "hello world", + "again", + "plain", + ]); + expect(tokenizeShellWords("echo \"hello \\\"world\\\"\"")).toEqual([ + "echo", + "hello \"world\"", + ]); + }); + + it("expands numeric and comma brace expressions", () => { + expect(expandBraceToken("/part_{1..3}.md")).toEqual([ + "/part_1.md", + "/part_2.md", + "/part_3.md", + ]); + expect(expandBraceToken("/file_{a,b}.md")).toEqual([ + "/file_a.md", + "/file_b.md", + ]); + expect(expandBraceToken("/plain.md")).toEqual(["/plain.md"]); + expect(expandBraceToken("/part_{3..1}.md")).toEqual([ + "/part_3.md", + "/part_2.md", + "/part_1.md", + ]); + }); + + it("strips allowed stderr modifiers and detects unsupported redirection", () => { + expect(stripAllowedModifiers("cat /a 2>/dev/null")).toEqual({ + clean: "cat /a", + ignoreMissing: true, + }); + expect(stripAllowedModifiers("cat /a 2>&1 | head -2")).toEqual({ + clean: "cat /a | head -2", + ignoreMissing: false, + }); + expect(hasUnsupportedRedirection("echo ok > /x")).toBe(true); + expect(hasUnsupportedRedirection("echo '>'")).toBe(false); + }); + + it("parses supported read-only segments", () => { + expect(parseCompiledSegment("echo ---")).toEqual({ kind: "echo", text: "---" }); + expect(parseCompiledSegment("cat /a /b | head -2")).toEqual({ + kind: "cat", + paths: ["/a", "/b"], + lineLimit: 2, + fromEnd: false, + countLines: false, + ignoreMissing: false, + }); + expect(parseCompiledSegment("head /a")).toEqual({ + kind: "cat", + paths: ["/a"], + lineLimit: 10, + fromEnd: false, + countLines: false, + ignoreMissing: false, + }); + expect(parseCompiledSegment("head -2 /a")).toEqual({ + kind: "cat", + paths: ["/a"], + lineLimit: 2, + fromEnd: false, + countLines: false, + ignoreMissing: false, + }); + expect(parseCompiledSegment("tail -n 3 /a")).toEqual({ + kind: "cat", + paths: ["/a"], + lineLimit: 3, + fromEnd: true, + countLines: false, + ignoreMissing: false, + }); + expect(parseCompiledSegment("tail -2 /a")).toEqual({ + kind: "cat", + paths: ["/a"], + lineLimit: 2, + fromEnd: true, + countLines: false, + ignoreMissing: false, + }); + expect(parseCompiledSegment("head -n 2 /a")).toEqual({ + kind: "cat", + paths: ["/a"], + lineLimit: 2, + fromEnd: false, + countLines: false, + ignoreMissing: false, + }); + expect(parseCompiledSegment("wc -l /a")).toEqual({ + kind: "cat", + paths: ["/a"], + lineLimit: 0, + fromEnd: false, + countLines: true, + ignoreMissing: false, + }); + expect(parseCompiledSegment("cat /a | wc -l")).toEqual({ + kind: "cat", + paths: ["/a"], + lineLimit: 0, + fromEnd: false, + countLines: true, + ignoreMissing: false, + }); + expect(parseCompiledSegment("ls -la /summaries/{a,b}")).toEqual({ + kind: "ls", + dirs: ["/summaries/a", "/summaries/b"], + longFormat: true, + }); + expect(parseCompiledSegment("ls -l")).toEqual({ + kind: "ls", + dirs: ["/"], + longFormat: true, + }); + expect(parseCompiledSegment("ls -a")).toEqual({ + kind: "ls", + dirs: ["/"], + longFormat: false, + }); + expect(parseCompiledSegment("find /summaries -name '*.md' | wc -l")).toEqual({ + kind: "find", + dir: "/summaries", + pattern: "*.md", + countOnly: true, + }); + expect(parseCompiledSegment("grep foo /summaries | head -5")).toEqual({ + kind: "grep", + params: { + pattern: "foo", + targetPath: "/summaries", + ignoreCase: false, + wordMatch: false, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }, + lineLimit: 5, + }); + expect(parseCompiledSegment("grep foo /summaries | head")).toEqual({ + kind: "grep", + params: { + pattern: "foo", + targetPath: "/summaries", + ignoreCase: false, + wordMatch: false, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }, + lineLimit: 10, + }); + expect(parseCompiledSegment("grep foo /summaries")).toEqual({ + kind: "grep", + params: { + pattern: "foo", + targetPath: "/summaries", + ignoreCase: false, + wordMatch: false, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }, + lineLimit: 0, + }); + expect(parseCompiledSegment("find /summaries -type f -name '*.md' -o -name '*.json' | xargs grep -l 'launch' | head -5")).toEqual({ + kind: "find_grep", + dir: "/summaries", + patterns: ["*.md", "*.json"], + params: { + pattern: "launch", + targetPath: "/", + ignoreCase: false, + wordMatch: false, + filesOnly: true, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }, + lineLimit: 5, + }); + expect(parseCompiledSegment("find /summaries -type f -name '*.md' | xargs -r grep -l launch | head -1")).toEqual({ + kind: "find_grep", + dir: "/summaries", + patterns: ["*.md"], + params: { + pattern: "launch", + targetPath: "/", + ignoreCase: false, + wordMatch: false, + filesOnly: true, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }, + lineLimit: 1, + }); + }); + + it("rejects unsupported segments and command shapes", () => { + expect(parseCompiledSegment("cat")).toBeNull(); + expect(parseCompiledSegment("echo ok > /x")).toBeNull(); + expect(parseCompiledSegment("cat /a | jq '.x'")).toBeNull(); + expect(parseCompiledSegment("cat /a /b | wc -l")).toBeNull(); + expect(parseCompiledSegment("cat /a | head -n nope")).toBeNull(); + expect(parseCompiledSegment("head -n nope /a")).toBeNull(); + expect(parseCompiledSegment("head -n 2")).toBeNull(); + expect(parseCompiledSegment("wc -l")).toBeNull(); + expect(parseCompiledSegment("find")).toBeNull(); + expect(parseCompiledSegment("find /summaries -name")).toBeNull(); + expect(parseCompiledSegment("find /summaries -name '*.md' | sort")).toBeNull(); + expect(parseCompiledSegment("find /summaries -name '*.md' -o -name '*.json'")).toBeNull(); + expect(parseCompiledSegment("find /summaries -name '*.md' -o -name '*.json' | wc -l")).toBeNull(); + expect(parseCompiledSegment("find /summaries -name '*.md' | xargs")).toBeNull(); + expect(parseCompiledSegment("find /summaries -name '*.md' | xargs grep -l foo | head nope")).toBeNull(); + expect(parseCompiledSegment("find /summaries -name '*.md' | xargs -z grep -l foo")).toBeNull(); + expect(parseCompiledSegment("find /summaries -name '*.md' | xargs grep -l foo | tail -2")).toBeNull(); + expect(parseCompiledSegment("grep foo /a | tail -2")).toBeNull(); + expect(parseCompiledSegment("grep foo /a | head nope")).toBeNull(); + expect(parseCompiledBashCommand("cat /a || cat /b")).toBeNull(); + expect(parseCompiledBashCommand("cat /a && echo ok > /x")).toBeNull(); + }); +}); + +describe("bash-command-compiler execution", () => { + it("batches exact reads and directory listings across compound commands", async () => { + const readVirtualPathContentsFn = vi.fn(async () => new Map([ + ["/a.md", "line1\nline2\nline3\n"], + ["/b.md", "tail1\ntail2\n"], + ])); + const listVirtualPathRowsForDirsFn = vi.fn(async () => new Map([ + ["/summaries/a", [{ path: "/summaries/a/group/file1.md", size_bytes: 10 }]], + ["/summaries/b", [{ path: "/summaries/b/file2.md", size_bytes: 20 }]], + ])); + const findVirtualPathsFn = vi.fn(async () => ["/summaries/a/file1.md", "/summaries/a/file2.md"]); + const handleGrepDirectFn = vi.fn(async () => "/summaries/a/file1.md:needle\n/summaries/a/file2.md:needle"); + + const output = await executeCompiledBashCommand( + { query: vi.fn() } as any, + "memory", + "sessions", + "cat /{a,b}.md | head -3 && echo --- && ls -la /summaries/{a,b} && find /summaries/a -name '*.md' | wc -l && grep needle /summaries/a | head -1", + { + readVirtualPathContentsFn: readVirtualPathContentsFn as any, + listVirtualPathRowsForDirsFn: listVirtualPathRowsForDirsFn as any, + findVirtualPathsFn: findVirtualPathsFn as any, + handleGrepDirectFn: handleGrepDirectFn as any, + }, + ); + + expect(readVirtualPathContentsFn).toHaveBeenCalledWith(expect.anything(), "memory", "sessions", ["/a.md", "/b.md"]); + expect(listVirtualPathRowsForDirsFn).toHaveBeenCalledWith(expect.anything(), "memory", "sessions", ["/summaries/a", "/summaries/b"]); + expect(handleGrepDirectFn).toHaveBeenCalledTimes(1); + expect(output).toContain("line1\nline2\nline3"); + expect(output).toContain("---"); + expect(output).toContain("drwxr-xr-x"); + expect(output).toContain("group/"); + expect(output).toContain("2"); + expect(output).toContain("/summaries/a/file1.md:needle"); + }); + + it("returns null when a required path is missing", async () => { + const output = await executeCompiledBashCommand( + { query: vi.fn() } as any, + "memory", + "sessions", + "cat /missing.md", + { + readVirtualPathContentsFn: vi.fn(async () => new Map([["/missing.md", null]])) as any, + }, + ); + expect(output).toBeNull(); + }); + + it("ignores missing files when stderr is redirected to /dev/null", async () => { + const output = await executeCompiledBashCommand( + { query: vi.fn() } as any, + "memory", + "sessions", + "cat /missing.md 2>/dev/null", + { + readVirtualPathContentsFn: vi.fn(async () => new Map([["/missing.md", null]])) as any, + }, + ); + expect(output).toBe(""); + }); + + it("ignores only the missing cat inputs and keeps present content", async () => { + const output = await executeCompiledBashCommand( + { query: vi.fn() } as any, + "memory", + "sessions", + "cat /missing.md /present.md 2>/dev/null", + { + readVirtualPathContentsFn: vi.fn(async () => new Map([ + ["/missing.md", null], + ["/present.md", "ok"], + ])) as any, + }, + ); + expect(output).toBe("ok"); + }); + + it("renders missing directories and supports line-counting", async () => { + const output = await executeCompiledBashCommand( + { query: vi.fn() } as any, + "memory", + "sessions", + "wc -l /a.md && ls /missing", + { + readVirtualPathContentsFn: vi.fn(async () => new Map([["/a.md", "x\ny\nz"]])) as any, + listVirtualPathRowsForDirsFn: vi.fn(async () => new Map([["/missing", []]])) as any, + }, + ); + expect(output).toContain("3 /a.md"); + expect(output).toContain("No such file or directory"); + }); + + it("renders short ls output, no-match find output, and raw grep output", async () => { + const output = await executeCompiledBashCommand( + { query: vi.fn() } as any, + "memory", + "sessions", + "ls /summaries/a && find /summaries/a -name '*.txt' && grep needle /summaries/a", + { + listVirtualPathRowsForDirsFn: vi.fn(async () => new Map([ + ["/summaries/a", [{ path: "/summaries/a/file1.md", size_bytes: 10 }]], + ])) as any, + findVirtualPathsFn: vi.fn(async () => []) as any, + handleGrepDirectFn: vi.fn(async () => "/summaries/a/file1.md:needle") as any, + }, + ); + + expect(output).toContain("file1.md"); + expect(output).toContain("(no matches)"); + expect(output).toContain("/summaries/a/file1.md:needle"); + }); + + it("returns joined find results, line-limited grep, and no-match compiled find+grep output", async () => { + const joinedFind = await executeCompiledBashCommand( + { query: vi.fn() } as any, + "memory", + "sessions", + "find /summaries/a -name '*.md'", + { + findVirtualPathsFn: vi.fn(async () => ["/summaries/a/file1.md", "/summaries/a/file2.md"]) as any, + }, + ); + expect(joinedFind).toBe("/summaries/a/file1.md\n/summaries/a/file2.md"); + + const grepLimited = await executeCompiledBashCommand( + { query: vi.fn() } as any, + "memory", + "sessions", + "grep needle /summaries/a | head -1", + { + handleGrepDirectFn: vi.fn(async () => "/summaries/a/file1.md:needle\n/summaries/a/file2.md:needle") as any, + }, + ); + expect(grepLimited).toBe("/summaries/a/file1.md:needle"); + + const noMatchFindGrep = await executeCompiledBashCommand( + { query: vi.fn() } as any, + "memory", + "sessions", + "find /summaries -name '*.md' | xargs grep -l launch", + { + findVirtualPathsFn: vi.fn(async () => []) as any, + }, + ); + expect(noMatchFindGrep).toBe("(no matches)"); + }); + + it("returns null when a compiled grep returns null", async () => { + const output = await executeCompiledBashCommand( + { query: vi.fn() } as any, + "memory", + "sessions", + "grep needle /summaries/a", + { + handleGrepDirectFn: vi.fn(async () => null) as any, + }, + ); + expect(output).toBeNull(); + }); + + it("compiles find | xargs grep -l | head into batched path reads", async () => { + const findVirtualPathsFn = vi.fn() + .mockResolvedValueOnce(["/summaries/a.md", "/summaries/shared.json"]) + .mockResolvedValueOnce(["/summaries/b.json", "/summaries/shared.json"]); + const readVirtualPathContentsFn = vi.fn(async () => new Map([ + ["/summaries/a.md", "launch timeline and notes"], + ["/summaries/shared.json", "{\"turns\":[{\"speaker\":\"Alice\",\"text\":\"launch update\"}]}"], + ["/summaries/b.json", "No match here"], + ])); + + const output = await executeCompiledBashCommand( + { query: vi.fn() } as any, + "memory", + "sessions", + "find /summaries -type f -name '*.md' -o -name '*.json' | xargs grep -l 'launch' | head -1", + { + findVirtualPathsFn: findVirtualPathsFn as any, + readVirtualPathContentsFn: readVirtualPathContentsFn as any, + }, + ); + + expect(findVirtualPathsFn).toHaveBeenCalledTimes(2); + expect(readVirtualPathContentsFn).toHaveBeenCalledWith( + expect.anything(), + "memory", + "sessions", + ["/summaries/a.md", "/summaries/shared.json", "/summaries/b.json"], + ); + expect(output).toBe("/summaries/a.md"); + }); +}); diff --git a/claude-code/tests/deeplake-api.test.ts b/claude-code/tests/deeplake-api.test.ts index b1276c9..f427bf7 100644 --- a/claude-code/tests/deeplake-api.test.ts +++ b/claude-code/tests/deeplake-api.test.ts @@ -1,4 +1,7 @@ import { describe, it, expect, beforeEach, vi, afterEach } from "vitest"; +import { mkdtempSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; import { DeeplakeApi, WriteRow } from "../../src/deeplake-api.js"; // ��─ Mock fetch ────────────────────────────────────────────────────────────── @@ -20,6 +23,11 @@ function makeApi(table = "test_table") { beforeEach(() => { mockFetch.mockReset(); + process.env.HIVEMIND_INDEX_MARKER_DIR = mkdtempSync(join(tmpdir(), "hivemind-index-marker-")); +}); + +afterEach(() => { + delete process.env.HIVEMIND_INDEX_MARKER_DIR; }); // ── query() ───────────────────────────────────────────────────────────────── @@ -79,6 +87,23 @@ describe("DeeplakeApi.query", () => { expect(rows).toEqual([{ x: "ok" }]); }); + it("retries transient HTML 403s for session inserts", async () => { + mockFetch + .mockResolvedValueOnce({ + ok: false, + status: 403, + json: async () => ({}), + text: async () => "403 Forbiddennginx", + }) + .mockResolvedValueOnce(jsonResponse({})); + const api = makeApi(); + const rows = await api.query( + 'INSERT INTO "sessions" (id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date) VALUES (\'id\', \'/p\', \'f\', \'{}\'::jsonb, \'u\', 2, \'p\', \'Stop\', \'claude_code\', \'t\', \'t\')', + ); + expect(rows).toEqual([]); + expect(mockFetch).toHaveBeenCalledTimes(2); + }); + it("retries on 502/503/504", async () => { mockFetch .mockResolvedValueOnce(jsonResponse("", 502)) @@ -120,6 +145,25 @@ describe("DeeplakeApi.query", () => { await expect(api.query("SELECT 1")).rejects.toThrow("DNS_FAIL"); }); + it("fails fast on timeout-like fetch errors without retrying", async () => { + const timeoutError = new Error("request timed out"); + timeoutError.name = "TimeoutError"; + mockFetch.mockRejectedValueOnce(timeoutError); + const api = makeApi(); + + await expect(api.query("SELECT 1")).rejects.toThrow("Query timeout after 10000ms"); + expect(mockFetch).toHaveBeenCalledTimes(1); + }); + + it("passes an abort signal to query fetches", async () => { + mockFetch.mockResolvedValueOnce(jsonResponse({ columns: ["x"], rows: [["ok"]] })); + const api = makeApi(); + await api.query("SELECT 1"); + + const opts = mockFetch.mock.calls[0][1]; + expect(opts.signal).toBeInstanceOf(AbortSignal); + }); + it("wraps non-Error fetch exceptions", async () => { mockFetch.mockRejectedValue("string error"); const api = makeApi(); @@ -328,6 +372,19 @@ describe("DeeplakeApi.listTables", () => { const api = makeApi(); expect(await api.listTables()).toEqual([]); }); + + it("caches successful results per api instance", async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, + status: 200, + json: async () => ({ tables: [{ table_name: "memory" }, { table_name: "sessions" }] }), + }); + const api = makeApi(); + + expect(await api.listTables()).toEqual(["memory", "sessions"]); + expect(await api.listTables()).toEqual(["memory", "sessions"]); + expect(mockFetch).toHaveBeenCalledTimes(1); + }); }); // ── ensureTable ───────────────────────────────────────────────────────────── @@ -371,6 +428,28 @@ describe("DeeplakeApi.ensureTable", () => { const createSql = JSON.parse(mockFetch.mock.calls[1][1].body).query; expect(createSql).toContain("custom_table"); }); + + it("reuses cached listTables across ensureTable and ensureSessionsTable", async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, status: 200, + json: async () => ({ tables: [{ table_name: "memory" }] }), + }); + mockFetch.mockResolvedValueOnce(jsonResponse({})); + mockFetch.mockResolvedValueOnce(jsonResponse({})); + const api = makeApi("memory"); + + await api.ensureTable(); + await api.ensureSessionsTable("sessions"); + + expect(mockFetch).toHaveBeenCalledTimes(3); + const createSql = JSON.parse(mockFetch.mock.calls[1][1].body).query; + expect(createSql).toContain("CREATE TABLE IF NOT EXISTS"); + expect(createSql).toContain("sessions"); + const indexSql = JSON.parse(mockFetch.mock.calls[2][1].body).query; + expect(indexSql).toContain("CREATE INDEX IF NOT EXISTS"); + expect(indexSql).toContain("\"path\""); + expect(indexSql).toContain("\"creation_date\""); + }); }); // ── ensureSessionsTable ───────────────────────────────────────────────────── @@ -382,6 +461,7 @@ describe("DeeplakeApi.ensureSessionsTable", () => { json: async () => ({ tables: [] }), }); mockFetch.mockResolvedValueOnce(jsonResponse({})); + mockFetch.mockResolvedValueOnce(jsonResponse({})); const api = makeApi(); await api.ensureSessionsTable("sessions"); const createSql = JSON.parse(mockFetch.mock.calls[1][1].body).query; @@ -389,15 +469,50 @@ describe("DeeplakeApi.ensureSessionsTable", () => { expect(createSql).toContain("sessions"); expect(createSql).toContain("JSONB"); expect(createSql).toContain("USING deeplake"); + const indexSql = JSON.parse(mockFetch.mock.calls[2][1].body).query; + expect(indexSql).toContain("CREATE INDEX IF NOT EXISTS"); + expect(indexSql).toContain("\"sessions\""); + expect(indexSql).toContain("(\"path\", \"creation_date\")"); }); - it("does nothing when sessions table already exists", async () => { + it("ensures the lookup index when sessions table already exists", async () => { mockFetch.mockResolvedValueOnce({ ok: true, status: 200, json: async () => ({ tables: [{ table_name: "sessions" }] }), }); + mockFetch.mockResolvedValueOnce(jsonResponse({})); const api = makeApi(); await api.ensureSessionsTable("sessions"); - expect(mockFetch).toHaveBeenCalledOnce(); + expect(mockFetch).toHaveBeenCalledTimes(2); + const indexSql = JSON.parse(mockFetch.mock.calls[1][1].body).query; + expect(indexSql).toContain("CREATE INDEX IF NOT EXISTS"); + }); + + it("ignores lookup-index creation errors after ensuring the sessions table", async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, status: 200, + json: async () => ({ tables: [{ table_name: "sessions" }] }), + }); + mockFetch.mockResolvedValueOnce(jsonResponse("forbidden", 403)); + const api = makeApi(); + + await expect(api.ensureSessionsTable("sessions")).resolves.toBeUndefined(); + expect(mockFetch).toHaveBeenCalledTimes(2); + }); + + it("treats duplicate concurrent index creation errors as success and records a local marker", async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, status: 200, + json: async () => ({ tables: [{ table_name: "sessions" }] }), + }); + mockFetch.mockResolvedValueOnce(jsonResponse("duplicate key value violates unique constraint \"pg_class_relname_nsp_index\"", 400)); + + const api = makeApi(); + await expect(api.ensureSessionsTable("sessions")).resolves.toBeUndefined(); + + mockFetch.mockReset(); + await api.ensureSessionsTable("sessions"); + + expect(mockFetch).not.toHaveBeenCalled(); }); }); diff --git a/claude-code/tests/deeplake-fs.test.ts b/claude-code/tests/deeplake-fs.test.ts index 4cbf03b..455b86a 100644 --- a/claude-code/tests/deeplake-fs.test.ts +++ b/claude-code/tests/deeplake-fs.test.ts @@ -586,6 +586,63 @@ describe("prefetch", () => { expect(client.query).not.toHaveBeenCalled(); }); + + it("prefetches session-backed files in batches instead of one query per path", async () => { + const sessionMessages = new Map([ + ["/sessions/alice/a.json", [ + { message: "{\"type\":\"user_message\",\"content\":\"hello\"}", creation_date: "2026-01-01T00:00:00.000Z" }, + { message: "{\"type\":\"assistant_message\",\"content\":\"hi\"}", creation_date: "2026-01-01T00:00:01.000Z" }, + ]], + ["/sessions/alice/b.json", [ + { message: "{\"type\":\"user_message\",\"content\":\"bye\"}", creation_date: "2026-01-01T00:00:02.000Z" }, + ]], + ]); + + const client = { + ensureTable: vi.fn().mockResolvedValue(undefined), + query: vi.fn(async (sql: string) => { + if (sql.includes("SELECT path, size_bytes, mime_type")) return []; + if (sql.includes("SELECT path, SUM(size_bytes) as total_size")) { + return [...sessionMessages.entries()].map(([path, rows]) => ({ + path, + total_size: rows.reduce((sum, row) => sum + Buffer.byteLength(row.message, "utf-8"), 0), + })); + } + if (sql.includes("SELECT path, message, creation_date")) { + const inMatch = sql.match(/IN \(([^)]+)\)/); + const paths = inMatch + ? inMatch[1].split(",").map((value) => value.trim().replace(/^'|'$/g, "")) + : []; + return paths.flatMap((path) => + (sessionMessages.get(path) ?? []).map((row) => ({ + path, + message: row.message, + creation_date: row.creation_date, + })), + ); + } + if (sql.includes("SELECT message FROM")) return []; + return []; + }), + }; + + const fs = await DeeplakeFs.create(client as never, "memory", "/", "sessions"); + client.query.mockClear(); + + await fs.prefetch(["/sessions/alice/a.json", "/sessions/alice/b.json"]); + + const prefetchCalls = (client.query.mock.calls as [string][]).filter( + ([sql]) => sql.includes("SELECT path, message, creation_date") && sql.includes("IN ("), + ); + expect(prefetchCalls).toHaveLength(1); + expect(prefetchCalls[0][0]).toContain("/sessions/alice/a.json"); + expect(prefetchCalls[0][0]).toContain("/sessions/alice/b.json"); + + client.query.mockClear(); + expect(await fs.readFile("/sessions/alice/a.json")).toBe("[user] hello\n[assistant] hi"); + expect(await fs.readFile("/sessions/alice/b.json")).toBe("[user] bye"); + expect(client.query).not.toHaveBeenCalled(); + }); }); // ── Upsert: id stability & dates ───────────────────────────────────────────── diff --git a/claude-code/tests/grep-core.test.ts b/claude-code/tests/grep-core.test.ts index 4a3a860..51339ff 100644 --- a/claude-code/tests/grep-core.test.ts +++ b/claude-code/tests/grep-core.test.ts @@ -1,8 +1,12 @@ import { describe, it, expect, vi } from "vitest"; import { + buildGrepSearchOptions, normalizeContent, buildPathFilter, + buildPathFilterForTargets, compileGrepRegex, + extractRegexAlternationPrefilters, + extractRegexLiteralPrefilter, refineGrepMatches, searchDeeplakeTables, grepBothTables, @@ -32,31 +36,31 @@ describe("normalizeContent: passthrough for non-session paths", () => { }); }); -describe("normalizeContent: LoCoMo benchmark shape", () => { +describe("normalizeContent: turn-array session shape", () => { const raw = JSON.stringify({ date_time: "1:56 pm on 8 May, 2023", - speakers: { speaker_a: "Caroline", speaker_b: "Melanie" }, + speakers: { speaker_a: "Avery", speaker_b: "Jordan" }, turns: [ - { dia_id: "D1:1", speaker: "Caroline", text: "Hey Mel!" }, - { dia_id: "D1:2", speaker: "Melanie", text: "Hi Caroline." }, + { dia_id: "D1:1", speaker: "Avery", text: "Hey Jordan!" }, + { dia_id: "D1:2", speaker: "Jordan", text: "Hi Avery." }, ], }); it("emits date and speakers header", () => { - const out = normalizeContent("/sessions/conv_0_session_1.json", raw); + const out = normalizeContent("/sessions/alice/chat_1.json", raw); expect(out).toContain("date: 1:56 pm on 8 May, 2023"); - expect(out).toContain("speakers: Caroline, Melanie"); + expect(out).toContain("speakers: Avery, Jordan"); }); it("emits one line per turn with dia_id tag", () => { - const out = normalizeContent("/sessions/conv_0_session_1.json", raw); - expect(out).toContain("[D1:1] Caroline: Hey Mel!"); - expect(out).toContain("[D1:2] Melanie: Hi Caroline."); + const out = normalizeContent("/sessions/alice/chat_1.json", raw); + expect(out).toContain("[D1:1] Avery: Hey Jordan!"); + expect(out).toContain("[D1:2] Jordan: Hi Avery."); }); it("falls back gracefully on turns without speaker/text", () => { const weird = JSON.stringify({ turns: [{}, { speaker: "X" }] }); - const out = normalizeContent("/sessions/conv_0_session_1.json", weird); + const out = normalizeContent("/sessions/alice/chat_1.json", weird); // Must not crash; includes placeholder `?` for missing speaker expect(out).toContain("?: "); expect(out).toContain("X: "); @@ -67,7 +71,7 @@ describe("normalizeContent: LoCoMo benchmark shape", () => { turns: [{ speaker: "A", text: "hi" }], speakers: { speaker_a: "", speaker_b: "" }, }); - const out = normalizeContent("/sessions/conv_0_session_1.json", raw); + const out = normalizeContent("/sessions/alice/chat_1.json", raw); expect(out).not.toContain("speakers:"); expect(out).toContain("A: hi"); }); @@ -77,32 +81,32 @@ describe("normalizeContent: LoCoMo benchmark shape", () => { turns: [{ speaker: "A", text: "hi" }], speakers: { speaker_a: "Alice" }, }); - const out = normalizeContent("/sessions/conv_0_session_1.json", raw); + const out = normalizeContent("/sessions/alice/chat_1.json", raw); expect(out).toContain("speakers: Alice"); }); it("falls back speaker->name when speaker field is absent on a turn", () => { - const raw = JSON.stringify({ turns: [{ name: "Caroline", text: "hi" }] }); - const out = normalizeContent("/sessions/conv_0_session_1.json", raw); - expect(out).toContain("Caroline: hi"); + const raw = JSON.stringify({ turns: [{ name: "Avery", text: "hi" }] }); + const out = normalizeContent("/sessions/alice/chat_1.json", raw); + expect(out).toContain("Avery: hi"); }); it("falls back text->content when text field is absent on a turn", () => { const raw = JSON.stringify({ turns: [{ speaker: "X", content: "fallback" }] }); - const out = normalizeContent("/sessions/conv_0_session_1.json", raw); + const out = normalizeContent("/sessions/alice/chat_1.json", raw); expect(out).toContain("X: fallback"); }); it("omits dia_id prefix when the turn has no dia_id", () => { const raw = JSON.stringify({ turns: [{ speaker: "A", text: "hi" }] }); - const out = normalizeContent("/sessions/conv_0_session_1.json", raw); + const out = normalizeContent("/sessions/alice/chat_1.json", raw); expect(out).toContain("A: hi"); expect(out).not.toMatch(/\[\]/); }); it("emits turns without date/speakers when both are missing", () => { const raw = JSON.stringify({ turns: [{ speaker: "A", text: "hi" }] }); - const out = normalizeContent("/sessions/conv_0_session_1.json", raw); + const out = normalizeContent("/sessions/alice/chat_1.json", raw); expect(out).not.toContain("date:"); expect(out).not.toContain("speakers:"); expect(out).toContain("A: hi"); @@ -111,7 +115,7 @@ describe("normalizeContent: LoCoMo benchmark shape", () => { it("returns raw when turns produce an empty serialization", () => { const empty = JSON.stringify({ turns: [] }); // No header, no turns → trimmed output is empty → fallback to raw - const out = normalizeContent("/sessions/conv_0_session_1.json", empty); + const out = normalizeContent("/sessions/alice/chat_1.json", empty); expect(out).toBe(empty); }); }); @@ -428,15 +432,49 @@ describe("buildPathFilter", () => { expect(buildPathFilter("")).toBe(""); }); it("emits equality + prefix match for subpaths", () => { - const f = buildPathFilter("/summaries/locomo"); - expect(f).toContain("path = '/summaries/locomo'"); - expect(f).toContain("path LIKE '/summaries/locomo/%'"); + const f = buildPathFilter("/summaries/projects"); + expect(f).toContain("path = '/summaries/projects'"); + expect(f).toContain("path LIKE '/summaries/projects/%'"); }); it("strips trailing slashes", () => { const f = buildPathFilter("/sessions///"); expect(f).toContain("path = '/sessions'"); expect(f).toContain("path LIKE '/sessions/%'"); }); + it("uses exact matching for likely file targets", () => { + expect(buildPathFilter("/summaries/alice/s1.md")).toBe( + " AND path = '/summaries/alice/s1.md'", + ); + }); + it("uses LIKE matching for glob targets instead of exact file matching", () => { + // Fix #4 appends `ESCAPE '\'` so sqlLike-escaped underscores (`\_`) and + // percent signs (`\%`) in the pattern match their literal characters on + // the Deeplake backend. Without the ESCAPE clause `\_` was treated as + // two literal characters and `/sessions/conv_0_session_*.json`-style + // globs silently returned zero rows. + expect(buildPathFilter("/summaries/projects/*.md")).toBe( + " AND path LIKE '/summaries/projects/%.md' ESCAPE '\\'", + ); + const filter = buildPathFilter("/sessions/alice/chat_?.json"); + expect(filter).toMatch(/^ AND path LIKE '\/sessions\/alice\/chat.*\.json' ESCAPE '\\'$/); + }); +}); + +describe("buildPathFilterForTargets", () => { + it("returns empty string when any target is root", () => { + expect(buildPathFilterForTargets(["/summaries", "/"])).toBe(""); + }); + + it("joins multiple target filters into one OR clause", () => { + const filter = buildPathFilterForTargets([ + "/summaries/alice", + "/sessions/bob/chat.jsonl", + ]); + expect(filter).toContain("path = '/summaries/alice'"); + expect(filter).toContain("path LIKE '/summaries/alice/%'"); + expect(filter).toContain("path = '/sessions/bob/chat.jsonl'"); + expect(filter).toContain(" OR "); + }); }); // ── compileGrepRegex ──────────────────────────────────────────────────────── @@ -569,15 +607,14 @@ describe("refineGrepMatches", () => { // ── searchDeeplakeTables ───────────────────────────────────────────────────── describe("searchDeeplakeTables", () => { - function mockApi(memRows: unknown[], sessRows: unknown[]) { + function mockApi(rows: unknown[]) { const query = vi.fn() - .mockImplementationOnce(async () => memRows) - .mockImplementationOnce(async () => sessRows); + .mockImplementationOnce(async () => rows); return { query } as any; } - it("issues one LIKE query per table with the escaped pattern and path filter", async () => { - const api = mockApi([], []); + it("issues one UNION ALL query with the escaped pattern and path filter", async () => { + const api = mockApi([]); await searchDeeplakeTables(api, "memory", "sessions", { pathFilter: " AND (path = '/x' OR path LIKE '/x/%')", contentScanOnly: false, @@ -585,33 +622,64 @@ describe("searchDeeplakeTables", () => { escapedPattern: "foo", limit: 50, }); - expect(api.query).toHaveBeenCalledTimes(2); - const [memCall, sessCall] = api.query.mock.calls.map((c: unknown[]) => c[0] as string); - expect(memCall).toContain('FROM "memory"'); - expect(memCall).toContain("summary::text ILIKE '%foo%'"); - expect(memCall).toContain("LIMIT 50"); - expect(sessCall).toContain('FROM "sessions"'); - expect(sessCall).toContain("message::text ILIKE '%foo%'"); + expect(api.query).toHaveBeenCalledTimes(1); + const sql = api.query.mock.calls[0][0] as string; + expect(sql).toContain('FROM "memory"'); + expect(sql).toContain('FROM "sessions"'); + expect(sql).toContain("summary::text ILIKE '%foo%'"); + expect(sql).toContain("message::text ILIKE '%foo%'"); + expect(sql).toContain("LIMIT 50"); + expect(sql).toContain("UNION ALL"); }); it("skips LIKE filter when contentScanOnly is true (regex-in-memory mode)", async () => { - const api = mockApi([], []); + const api = mockApi([]); await searchDeeplakeTables(api, "m", "s", { pathFilter: "", contentScanOnly: true, likeOp: "LIKE", escapedPattern: "anything", }); - const [memCall, sessCall] = api.query.mock.calls.map((c: unknown[]) => c[0] as string); - expect(memCall).not.toContain("LIKE"); - expect(sessCall).not.toContain("LIKE"); + const sql = api.query.mock.calls[0][0] as string; + expect(sql).not.toContain("summary::text LIKE"); + expect(sql).not.toContain("message::text LIKE"); + }); + + it("uses a safe literal prefilter for regex scans when available", async () => { + const api = mockApi([]); + await searchDeeplakeTables(api, "m", "s", { + pathFilter: "", + contentScanOnly: true, + likeOp: "LIKE", + escapedPattern: "foo.*bar", + prefilterPattern: "foo", + }); + const sql = api.query.mock.calls[0][0] as string; + expect(sql).toContain("summary::text LIKE '%foo%'"); + expect(sql).toContain("message::text LIKE '%foo%'"); + }); + + it("expands alternation prefilters into OR clauses instead of literal pipes", async () => { + const api = mockApi([]); + await searchDeeplakeTables(api, "m", "s", { + pathFilter: "", + contentScanOnly: true, + likeOp: "LIKE", + escapedPattern: "relationship|partner|married", + prefilterPatterns: ["relationship", "partner", "married"], + }); + const sql = api.query.mock.calls[0][0] as string; + expect(sql).toContain("summary::text LIKE '%relationship%'"); + expect(sql).toContain("summary::text LIKE '%partner%'"); + expect(sql).toContain("summary::text LIKE '%married%'"); + expect(sql).not.toContain("relationship|partner|married"); }); it("concatenates rows from both tables into {path, content}", async () => { - const api = mockApi( - [{ path: "/summaries/a", content: "aaa" }], - [{ path: "/sessions/b", content: "bbb" }], - ); + const api = mockApi([ + { path: "/summaries/a", content: "aaa" }, + { path: "/sessions/b", content: "bbb" }, + ]); const rows = await searchDeeplakeTables(api, "m", "s", { pathFilter: "", contentScanOnly: false, likeOp: "LIKE", escapedPattern: "x", }); @@ -622,7 +690,7 @@ describe("searchDeeplakeTables", () => { }); it("tolerates null content on memory row (coerces to empty string)", async () => { - const api = mockApi([{ path: "/a", content: null }], []); + const api = mockApi([{ path: "/a", content: null }]); const rows = await searchDeeplakeTables(api, "m", "s", { pathFilter: "", contentScanOnly: false, likeOp: "LIKE", escapedPattern: "x", }); @@ -630,35 +698,22 @@ describe("searchDeeplakeTables", () => { }); it("tolerates null content on sessions row too", async () => { - const api = mockApi([], [{ path: "/b", content: null }]); + const api = mockApi([{ path: "/b", content: null }]); const rows = await searchDeeplakeTables(api, "m", "s", { pathFilter: "", contentScanOnly: false, likeOp: "LIKE", escapedPattern: "x", }); expect(rows[0]).toEqual({ path: "/b", content: "" }); }); - it("returns partial results when the sessions query fails", async () => { + it("keeps grep on a single SQL query when the union query fails", async () => { const api = { query: vi.fn() - .mockImplementationOnce(async () => [{ path: "/a", content: "ok" }]) - .mockImplementationOnce(async () => { throw new Error("boom"); }), + .mockRejectedValueOnce(new Error("bad union")) } as any; - const rows = await searchDeeplakeTables(api, "m", "s", { + await expect(searchDeeplakeTables(api, "m", "s", { pathFilter: "", contentScanOnly: false, likeOp: "LIKE", escapedPattern: "x", - }); - expect(rows).toEqual([{ path: "/a", content: "ok" }]); - }); - - it("returns partial results when the memory query fails", async () => { - const api = { - query: vi.fn() - .mockImplementationOnce(async () => { throw new Error("boom"); }) - .mockImplementationOnce(async () => [{ path: "/b", content: "ok" }]), - } as any; - const rows = await searchDeeplakeTables(api, "m", "s", { - pathFilter: "", contentScanOnly: false, likeOp: "LIKE", escapedPattern: "x", - }); - expect(rows).toEqual([{ path: "/b", content: "ok" }]); + })).rejects.toThrow("bad union"); + expect(api.query).toHaveBeenCalledTimes(1); }); it("defaults limit to 100 when omitted", async () => { @@ -677,8 +732,7 @@ describe("grepBothTables", () => { function mockApi(rows: unknown[]) { return { query: vi.fn() - .mockResolvedValueOnce(rows) // memory - .mockResolvedValueOnce([]), // sessions (empty in these tests) + .mockResolvedValueOnce(rows), } as any; } @@ -698,44 +752,151 @@ describe("grepBothTables", () => { it("deduplicates rows by path when memory and sessions return the same path", async () => { const api = { query: vi.fn() - .mockResolvedValueOnce([{ path: "/shared", content: "foo" }]) - .mockResolvedValueOnce([{ path: "/shared", content: "foo" }]), + .mockResolvedValueOnce([{ path: "/shared", content: "foo" }, { path: "/shared", content: "foo" }]), } as any; const out = await grepBothTables(api, "m", "s", baseParams, "/"); // only one line for the shared path expect(out.length).toBe(1); }); - it("normalizes session JSON before refinement (LoCoMo turns)", async () => { + it("normalizes session JSON before refinement (turn-array sessions)", async () => { const sessionContent = JSON.stringify({ turns: [ - { dia_id: "D1:1", speaker: "Alice", text: "greeting foo here" }, + { dia_id: "D1:1", speaker: "Alice", text: "project foo update" }, { dia_id: "D1:2", speaker: "Bob", text: "unrelated" }, ], }); const api = { query: vi.fn() - .mockResolvedValueOnce([]) - .mockResolvedValueOnce([{ path: "/sessions/conv_0_session_1.json", content: sessionContent }]), + .mockResolvedValueOnce([{ path: "/sessions/alice/chat_1.json", content: sessionContent }]), } as any; const out = await grepBothTables(api, "m", "s", baseParams, "/"); // Only the matching turn is returned, not the whole JSON blob - expect(out.some(l => l.includes("[D1:1] Alice: greeting foo here"))).toBe(true); + expect(out.some(l => l.includes("[D1:1] Alice: project foo update"))).toBe(true); expect(out.some(l => l.includes("unrelated"))).toBe(false); }); it("uses contentScanOnly when pattern has regex metacharacters", async () => { const api = mockApi([{ path: "/a", content: "this is a test" }]); await grepBothTables(api, "m", "s", { ...baseParams, pattern: "t.*t" }, "/"); - const [memSql] = api.query.mock.calls.map((c: unknown[]) => c[0] as string); - expect(memSql).not.toContain("ILIKE"); - expect(memSql).not.toContain("summary::text LIKE"); + const [sql] = api.query.mock.calls.map((c: unknown[]) => c[0] as string); + expect(sql).not.toContain("summary::text LIKE"); + expect(sql).not.toContain("message::text LIKE"); + }); + + it("adds a safe literal prefilter for wildcard regexes with stable anchors", async () => { + const api = mockApi([{ path: "/a", content: "foo middle bar" }]); + await grepBothTables(api, "m", "s", { ...baseParams, pattern: "foo.*bar" }, "/"); + const [sql] = api.query.mock.calls.map((c: unknown[]) => c[0] as string); + expect(sql).toContain("summary::text LIKE '%foo%'"); }); it("routes to ILIKE when ignoreCase is set", async () => { const api = mockApi([]); await grepBothTables(api, "m", "s", { ...baseParams, ignoreCase: true }, "/"); - const [memSql] = api.query.mock.calls.map((c: unknown[]) => c[0] as string); - expect(memSql).toContain("ILIKE"); + const [sql] = api.query.mock.calls.map((c: unknown[]) => c[0] as string); + expect(sql).toContain("ILIKE"); + }); + + it("uses a single union query even for scoped target paths", async () => { + const api = mockApi([{ path: "/summaries/a.md", content: "foo line" }]); + await grepBothTables(api, "memory", "sessions", baseParams, "/summaries"); + expect(api.query).toHaveBeenCalledTimes(1); + const sql = api.query.mock.calls[0][0] as string; + expect(sql).toContain('FROM "memory"'); + expect(sql).toContain('FROM "sessions"'); + expect(sql).toContain("UNION ALL"); + }); +}); + +describe("regex literal prefilter", () => { + it("returns null for an empty pattern", () => { + expect(extractRegexLiteralPrefilter("")).toBeNull(); + }); + + it("extracts a literal from simple wildcard regexes", () => { + expect(extractRegexLiteralPrefilter("foo.*bar")).toBe("foo"); + expect(extractRegexLiteralPrefilter("prefix.*suffix")).toBe("prefix"); + expect(extractRegexLiteralPrefilter("x.*suffix")).toBe("suffix"); + }); + + it("returns null for complex regex features", () => { + expect(extractRegexLiteralPrefilter("colou?r")).toBeNull(); + expect(extractRegexLiteralPrefilter("foo|bar")).toBeNull(); + expect(extractRegexLiteralPrefilter("[ab]foo")).toBeNull(); + }); + + it("handles escaped literals and rejects dangling escapes or bare dots", () => { + expect(extractRegexLiteralPrefilter("foo\\.bar")).toBe("foo.bar"); + expect(extractRegexLiteralPrefilter("\\d+foo")).toBeNull(); + expect(extractRegexLiteralPrefilter("foo\\")).toBeNull(); + expect(extractRegexLiteralPrefilter("foo.bar")).toBeNull(); + }); + + it("builds grep search options with regex prefilter when safe", () => { + const opts = buildGrepSearchOptions({ + pattern: "foo.*bar", + ignoreCase: true, + wordMatch: false, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }, "/summaries"); + + expect(opts.contentScanOnly).toBe(true); + expect(opts.likeOp).toBe("ILIKE"); + expect(opts.prefilterPattern).toBe("foo"); + expect(opts.pathFilter).toContain("/summaries"); + }); + + it("extracts safe alternation anchors and carries them into grep search options", () => { + expect(extractRegexAlternationPrefilters("relationship|partner|married")).toEqual([ + "relationship", + "partner", + "married", + ]); + + const opts = buildGrepSearchOptions({ + pattern: "relationship|partner|married", + ignoreCase: false, + wordMatch: false, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }, "/summaries"); + + expect(opts.contentScanOnly).toBe(true); + expect(opts.prefilterPatterns).toEqual(["relationship", "partner", "married"]); + }); + + it("rejects alternation prefilters when grouping makes them unsafe", () => { + expect(extractRegexAlternationPrefilters("(foo|bar)")).toBeNull(); + expect(extractRegexAlternationPrefilters("foo|bar.*baz")).toEqual(["foo", "bar"]); + }); + + it("preserves escaped alternation characters inside a literal branch", () => { + expect(extractRegexAlternationPrefilters("foo\\|bar|baz")).toEqual(["foo|bar", "baz"]); + expect(extractRegexAlternationPrefilters("foo|bar\\.md")).toEqual(["foo", "bar.md"]); + }); + + it("keeps fixed-string searches on the SQL-filtered path even with regex metacharacters", () => { + const opts = buildGrepSearchOptions({ + pattern: "foo.*bar", + ignoreCase: false, + wordMatch: false, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: true, + }, "/summaries/alice/s1.md"); + + expect(opts.contentScanOnly).toBe(false); + expect(opts.prefilterPattern).toBeUndefined(); + expect(opts.pathFilter).toBe(" AND path = '/summaries/alice/s1.md'"); }); }); diff --git a/claude-code/tests/grep-direct.test.ts b/claude-code/tests/grep-direct.test.ts index df74a0d..0f56c9a 100644 --- a/claude-code/tests/grep-direct.test.ts +++ b/claude-code/tests/grep-direct.test.ts @@ -8,16 +8,14 @@ describe("handleGrepDirect", () => { lineNumber: false, invertMatch: false, fixedString: false, }; - function mockApi(mem: unknown[], sess: unknown[]) { + function mockApi(rows: unknown[]) { return { - query: vi.fn() - .mockImplementationOnce(async () => mem) - .mockImplementationOnce(async () => sess), + query: vi.fn().mockImplementationOnce(async () => rows), } as any; } it("returns null when pattern is empty", async () => { - const api = mockApi([], []); + const api = mockApi([]); const r = await handleGrepDirect(api, "memory", "sessions", { ...baseParams, pattern: "" }); expect(r).toBeNull(); expect(api.query).not.toHaveBeenCalled(); @@ -26,30 +24,29 @@ describe("handleGrepDirect", () => { it("delegates to grepBothTables and joins the match lines", async () => { const api = mockApi( [{ path: "/summaries/a.md", content: "foo line here\nbar line" }], - [], ); const r = await handleGrepDirect(api, "memory", "sessions", baseParams); expect(r).toBe("foo line here"); }); it("emits '(no matches)' when both tables return nothing", async () => { - const api = mockApi([], []); + const api = mockApi([]); const r = await handleGrepDirect(api, "memory", "sessions", baseParams); expect(r).toBe("(no matches)"); }); it("merges results from both memory and sessions", async () => { - const api = mockApi( - [{ path: "/summaries/a.md", content: "foo in summary" }], - [{ path: "/sessions/b.jsonl", content: "foo in session" }], - ); + const api = mockApi([ + { path: "/summaries/a.md", content: "foo in summary" }, + { path: "/sessions/b.jsonl", content: "foo in session" }, + ]); const r = await handleGrepDirect(api, "memory", "sessions", baseParams); expect(r).toContain("/summaries/a.md:foo in summary"); expect(r).toContain("/sessions/b.jsonl:foo in session"); }); it("applies ignoreCase flag at SQL level (ILIKE)", async () => { - const api = mockApi([{ path: "/a", content: "Foo" }], []); + const api = mockApi([{ path: "/a", content: "Foo" }]); await handleGrepDirect(api, "memory", "sessions", { ...baseParams, ignoreCase: true }); const sql = api.query.mock.calls[0][0] as string; expect(sql).toContain("ILIKE"); @@ -93,6 +90,13 @@ describe("parseBashGrep: long options", () => { expect(r).not.toBeNull(); expect(r!.pattern).toBe("foo"); }); + + it("accepts grep no-op long options that take inline numeric values", () => { + const r = parseBashGrep("grep --after-context=2 --before-context=3 --context=4 --max-count=1 foo /x"); + expect(r).not.toBeNull(); + expect(r!.pattern).toBe("foo"); + expect(r!.targetPath).toBe("/x"); + }); }); @@ -140,6 +144,10 @@ describe("parseBashGrep", () => { expect(parseBashGrep("grep -r")).toBeNull(); }); + it("returns null for unterminated quoted commands", () => { + expect(parseBashGrep('grep "unterminated /dir')).toBeNull(); + }); + // ── Flag parsing ── it("parses -i flag", () => { @@ -227,4 +235,100 @@ describe("parseBashGrep", () => { expect(r!.pattern).toBe("pattern"); expect(r!.targetPath).toBe("/dir"); }); + + it("does not split on alternation pipes inside quotes", () => { + const r = parseBashGrep("grep 'book|read' /dir | head -5"); + expect(r).not.toBeNull(); + expect(r!.pattern).toBe("book|read"); + expect(r!.targetPath).toBe("/dir"); + }); + + it("keeps escaped spaces inside unquoted patterns", () => { + const r = parseBashGrep("grep Melanie\\ sunrise /dir"); + expect(r).not.toBeNull(); + expect(r!.pattern).toBe("Melanie sunrise"); + expect(r!.targetPath).toBe("/dir"); + }); + + it("consumes -A numeric values without treating them as paths", () => { + const r = parseBashGrep("grep -A 5 'Caroline' /summaries/"); + expect(r).not.toBeNull(); + expect(r!.pattern).toBe("Caroline"); + expect(r!.targetPath).toBe("/summaries/"); + }); + + it("consumes attached -B numeric values without shifting the target path", () => { + const r = parseBashGrep("grep -B5 'friends' /sessions/"); + expect(r).not.toBeNull(); + expect(r!.pattern).toBe("friends"); + expect(r!.targetPath).toBe("/sessions/"); + }); + + it("consumes -m values without shifting the target path", () => { + const r = parseBashGrep("grep -m 1 'single' /dir"); + expect(r).not.toBeNull(); + expect(r!.pattern).toBe("single"); + expect(r!.targetPath).toBe("/dir"); + }); + + it("uses -e as the explicit pattern source", () => { + const r = parseBashGrep("grep -e 'book|read' /dir"); + expect(r).not.toBeNull(); + expect(r!.pattern).toBe("book|read"); + expect(r!.targetPath).toBe("/dir"); + }); + + it("uses inline -e values as the explicit pattern source", () => { + const r = parseBashGrep("grep -ebook /dir"); + expect(r).not.toBeNull(); + expect(r!.pattern).toBe("book"); + expect(r!.targetPath).toBe("/dir"); + }); + + it("uses --regexp= as the explicit pattern source", () => { + const r = parseBashGrep("grep --regexp=book\\|read /dir"); + expect(r).not.toBeNull(); + expect(r!.pattern).toBe("book|read"); + expect(r!.targetPath).toBe("/dir"); + }); + + it("defaults explicit -e searches to / when no target path is given", () => { + const r = parseBashGrep("grep -e 'book|read'"); + expect(r).not.toBeNull(); + expect(r!.pattern).toBe("book|read"); + expect(r!.targetPath).toBe("/"); + }); + + it("returns null when a value-taking long option is missing its value", () => { + expect(parseBashGrep("grep --after-context")).toBeNull(); + }); + + it("returns null when -A is missing its value", () => { + expect(parseBashGrep("grep -A")).toBeNull(); + }); + + it("returns null when -e is missing its value", () => { + expect(parseBashGrep("grep -e")).toBeNull(); + }); + + it("tolerates unknown short flags without crashing", () => { + const r = parseBashGrep("grep -Z foo /dir"); + expect(r).not.toBeNull(); + expect(r!.pattern).toBe("foo"); + expect(r!.targetPath).toBe("/dir"); + }); + + it("preserves escaped pipes outside quotes as part of the pattern", () => { + const r = parseBashGrep("grep foo\\|bar /dir | head -5"); + expect(r).not.toBeNull(); + expect(r!.pattern).toBe("foo|bar"); + expect(r!.targetPath).toBe("/dir"); + }); + + it("preserves escaped quotes inside double-quoted patterns", () => { + const r = parseBashGrep('grep "foo\\"bar" /dir'); + expect(r).not.toBeNull(); + expect(r!.pattern).toBe('foo"bar'); + expect(r!.targetPath).toBe("/dir"); + }); }); diff --git a/claude-code/tests/grep-interceptor.test.ts b/claude-code/tests/grep-interceptor.test.ts index a2584ce..ba7e67b 100644 --- a/claude-code/tests/grep-interceptor.test.ts +++ b/claude-code/tests/grep-interceptor.test.ts @@ -1,6 +1,7 @@ import { describe, it, expect, vi } from "vitest"; import { createGrepCommand } from "../../src/shell/grep-interceptor.js"; import { DeeplakeFs } from "../../src/shell/deeplake-fs.js"; +import * as grepCore from "../../src/shell/grep-core.js"; // ── Minimal mocks ───────────────────────────────────────────────────────────── function makeClient(queryResults: Record[] = []) { @@ -30,6 +31,31 @@ function makeCtx(fs: DeeplakeFs, cwd = "/memory") { // cache. Tests below assert that new contract. describe("grep interceptor", () => { + it("returns exitCode=1 when the pattern is missing", async () => { + const client = makeClient(); + const fs = await DeeplakeFs.create(client as never, "test", "/memory"); + client.query.mockClear(); + const cmd = createGrepCommand(client as never, fs, "test"); + const result = await cmd.execute([], makeCtx(fs) as never); + expect(result).toEqual({ + stdout: "", + stderr: "grep: missing pattern\n", + exitCode: 1, + }); + expect(client.query).not.toHaveBeenCalled(); + }); + + it("returns exitCode=1 when all target paths resolve to nothing", async () => { + const client = makeClient(); + const fs = await DeeplakeFs.create(client as never, "test", "/memory"); + vi.spyOn(fs, "resolvePath").mockReturnValue(""); + client.query.mockClear(); + const cmd = createGrepCommand(client as never, fs, "test"); + const result = await cmd.execute(["foo", "missing"], makeCtx(fs) as never); + expect(result).toEqual({ stdout: "", stderr: "", exitCode: 1 }); + expect(client.query).not.toHaveBeenCalled(); + }); + it("returns exitCode=127 for paths outside mount (pass-through)", async () => { const client = makeClient(); const fs = await DeeplakeFs.create(client as never, "test", "/memory"); @@ -44,13 +70,11 @@ describe("grep interceptor", () => { const client = makeClient([{ path: "/memory/a.txt", content: "hello world" }]); const fs = await DeeplakeFs.create(client as never, "test", "/memory"); client.query.mockClear(); - // Both mem and sess queries should run; return matching content for both. client.query.mockResolvedValue([{ path: "/memory/a.txt", content: "hello world" }]); const cmd = createGrepCommand(client as never, fs, "test", "sessions"); const result = await cmd.execute(["hello", "/memory"], makeCtx(fs) as never); - // At least one call for memory + one for sessions const sqls = client.query.mock.calls.map((c: unknown[]) => c[0] as string); expect(sqls.some(s => /FROM "test"/.test(s) && /ILIKE|LIKE/.test(s))).toBe(true); expect(sqls.some(s => /FROM "sessions"/.test(s) && /ILIKE|LIKE/.test(s))).toBe(true); @@ -60,6 +84,24 @@ describe("grep interceptor", () => { expect(result.exitCode).toBe(0); }); + it("uses one SQL query even when grep receives multiple target paths", async () => { + const client = makeClient([{ path: "/memory/a.txt", content: "hello world" }]); + const fs = await DeeplakeFs.create(client as never, "test", "/memory"); + client.query.mockClear(); + client.query.mockResolvedValue([{ path: "/memory/a.txt", content: "hello world" }]); + + const cmd = createGrepCommand(client as never, fs, "test", "sessions"); + const result = await cmd.execute(["hello", "/memory/a", "/memory/b"], makeCtx(fs) as never); + + expect(client.query).toHaveBeenCalledTimes(1); + const sql = client.query.mock.calls[0][0] as string; + expect(sql).toContain('FROM "test"'); + expect(sql).toContain('FROM "sessions"'); + expect(sql).toContain("path = '/memory/a'"); + expect(sql).toContain("path = '/memory/b'"); + expect(result.exitCode).toBe(0); + }); + it("falls back to in-memory scan when SQL returns nothing", async () => { const client = makeClient([]); const fs = await DeeplakeFs.create(client as never, "test", "/memory"); @@ -162,4 +204,17 @@ describe("grep interceptor", () => { expect.arrayContaining(["/memory/a.txt", "/memory/b.txt"]) ); }); + + it("falls back to the FS cache when the SQL search rejects", async () => { + const client = makeClient(); + const fs = await DeeplakeFs.create(client as never, "test", "/memory"); + await fs.writeFile("/memory/a.txt", "hello world"); + vi.spyOn(grepCore, "searchDeeplakeTables").mockRejectedValueOnce(new Error("timeout")); + + const cmd = createGrepCommand(client as never, fs, "test"); + const result = await cmd.execute(["hello", "/memory"], makeCtx(fs) as never); + + expect(result.exitCode).toBe(0); + expect(result.stdout).toContain("hello world"); + }); }); diff --git a/claude-code/tests/output-cap.test.ts b/claude-code/tests/output-cap.test.ts new file mode 100644 index 0000000..28756bd --- /dev/null +++ b/claude-code/tests/output-cap.test.ts @@ -0,0 +1,166 @@ +/** + * Cap for large tool outputs (fix #5). + * + * Claude Code's Bash tool silently persists tool_result strings larger + * than ~16 KB to disk and shows the model a 2 KB preview plus a path. + * In the locomo baseline_cloud_100qa_fix123 run, 11 of 14 losing QAs + * that hit this path never recovered the persisted file — the preview + * was too small to carry the answer and the model gave up. `capOutput- + * ForClaude` truncates at line boundaries below Claude Code's threshold + * and replaces the tail with a footer that tells the model how to + * refine the next call. + */ + +import { describe, expect, it } from "vitest"; +import { + CLAUDE_OUTPUT_CAP_BYTES, + capOutputForClaude, +} from "../../src/utils/output-cap.js"; + +describe("capOutputForClaude", () => { + it("returns the input unchanged when it fits under the cap", () => { + const short = "line1\nline2\nline3"; + expect(capOutputForClaude(short)).toBe(short); + }); + + it("is a no-op for an empty string and single short line", () => { + expect(capOutputForClaude("")).toBe(""); + expect(capOutputForClaude("hello")).toBe("hello"); + }); + + it("truncates at a line boundary once the input exceeds the cap", () => { + const line = "x".repeat(100); + const input = Array.from({ length: 200 }, (_, i) => `${i}:${line}`).join("\n"); + const out = capOutputForClaude(input, { kind: "grep" }); + + expect(Buffer.byteLength(out, "utf8")).toBeLessThanOrEqual(CLAUDE_OUTPUT_CAP_BYTES); + // Last surviving line must be whole — no dangling partial line before the footer. + const body = out.split("\n... [")[0]; + expect(body.split("\n").every((l) => l.startsWith(""))).toBe(true); + // Footer names the kind and reports elided line count / byte count. + expect(out).toMatch(/\[grep truncated: \d+ more lines \([\d.]+ KB\) elided — refine with '\| head -N' or a tighter pattern\]/); + }); + + it("reports the correct number of elided lines in the footer", () => { + const line = "x".repeat(100); + const input = Array.from({ length: 500 }, () => line).join("\n"); + const out = capOutputForClaude(input, { kind: "cat" }); + + const bodyLines = out.split("\n... [")[0].split("\n").length; + const footerMatch = out.match(/(\d+) more lines/); + expect(footerMatch).not.toBeNull(); + const elided = Number(footerMatch![1]); + // Body + elided should account for all original lines. + expect(bodyLines + elided).toBe(500); + }); + + it("handles a single oversized line by taking a byte prefix", () => { + // One giant line — no newlines to cut on. + const input = "a".repeat(CLAUDE_OUTPUT_CAP_BYTES * 3); + const out = capOutputForClaude(input, { kind: "grep" }); + + expect(Buffer.byteLength(out, "utf8")).toBeLessThanOrEqual(CLAUDE_OUTPUT_CAP_BYTES); + expect(out).toContain("[grep truncated:"); + expect(out).toMatch(/[\d.]+ KB total/); + }); + + // Regression guard for PR #64 review comment: naive `Buffer.slice(0, budget)` + // can cut a multi-byte UTF-8 sequence in half, and `.toString("utf8")` then + // inserts U+FFFD replacement characters at the tail of the output. The cap + // backs up to the nearest valid UTF-8 start byte before decoding. + + it("single-line truncation never produces U+FFFD replacement characters", () => { + // Each "©" is 2 bytes (c2 a9). Fill with enough of them that the byte + // budget lands inside one — the previous implementation would slice mid- + // sequence and leak at least one U+FFFD; the fix backs up and emits a + // clean prefix. + const input = "©".repeat(10_000); + expect(Buffer.byteLength(input, "utf8")).toBeGreaterThan(CLAUDE_OUTPUT_CAP_BYTES); + const out = capOutputForClaude(input, { kind: "grep" }); + + // Body is "\n... [grep truncated: …]". The prefix must be clean. + const prefix = out.split("\n... [grep truncated:")[0]; + expect(prefix).not.toContain("\uFFFD"); + // And still useful — we kept ~most of the budget worth of characters. + expect(prefix.length).toBeGreaterThan(CLAUDE_OUTPUT_CAP_BYTES / 4); + }); + + it("multi-byte content with newlines still truncates on line boundaries without corruption", () => { + // Each line is "© ©".repeat(60) ≈ 240 bytes. 100 lines → 24 KB, exceeds + // the cap; truncation happens at a newline boundary so no multi-byte + // split is even attempted, but we still assert cleanliness. + const line = "© ©".repeat(60); + const input = Array.from({ length: 100 }, () => line).join("\n"); + const out = capOutputForClaude(input, { kind: "grep" }); + expect(out).not.toContain("\uFFFD"); + expect(Buffer.byteLength(out, "utf8")).toBeLessThanOrEqual(CLAUDE_OUTPUT_CAP_BYTES); + }); + + it("uses a custom maxBytes when provided", () => { + const input = Array.from({ length: 20 }, (_, i) => `line${i}:${"x".repeat(80)}`).join("\n"); + const out = capOutputForClaude(input, { maxBytes: 500, kind: "ls" }); + + expect(Buffer.byteLength(out, "utf8")).toBeLessThanOrEqual(500); + expect(out).toContain("[ls truncated:"); + }); + + it("defaults the footer kind to 'output' when no kind is provided", () => { + const input = "x".repeat(CLAUDE_OUTPUT_CAP_BYTES * 2); + const out = capOutputForClaude(input); + expect(out).toContain("[output truncated:"); + }); + + it("produces output well under Claude Code's ~16 KB persist threshold", () => { + const bigGrepLine = (i: number) => + `/sessions/conv_${i % 10}_session_${i}.json:[D${i}:1] Caroline: ${"x".repeat(160)}`; + const input = Array.from({ length: 400 }, (_, i) => bigGrepLine(i)).join("\n"); + const inputSize = Buffer.byteLength(input, "utf8"); + expect(inputSize).toBeGreaterThan(16 * 1024); // confirm the fixture triggers truncation + + const out = capOutputForClaude(input, { kind: "grep" }); + // 2 KB preview was the painful case — we must give the model notably more + // than that, but still fit comfortably below the 16 KB persist threshold. + expect(Buffer.byteLength(out, "utf8")).toBeGreaterThan(4 * 1024); + expect(Buffer.byteLength(out, "utf8")).toBeLessThanOrEqual(CLAUDE_OUTPUT_CAP_BYTES); + }); + + // ── Regression: trailing newline shouldn't inflate the elided-line count ── + // + // `output.split("\n")` on "a\nb\n" returns ["a", "b", ""]. Treating the + // trailing empty entry as a "real" line made the footer's "N more lines + // elided" number off by one whenever the original input ended with a + // newline (which grep and cat both do in practice). + + it("does not count a trailing newline as an extra line when reporting elided lines", () => { + const line = "x".repeat(100); + // 500 real content lines followed by a terminating "\n". Input ends with \n. + const input = Array.from({ length: 500 }, () => line).join("\n") + "\n"; + const out = capOutputForClaude(input, { kind: "grep" }); + + const footerMatch = out.match(/(\d+) more lines/); + expect(footerMatch).not.toBeNull(); + const elided = Number(footerMatch![1]); + + // Parse the kept-body to count surviving real lines. Split produces a + // trailing "" entry when the kept body itself ends with a newline; drop + // it the same way the production code does. + const body = out.split("\n... [")[0]; + const bodySplit = body.split("\n"); + const keptLines = bodySplit[bodySplit.length - 1] === "" ? bodySplit.length - 1 : bodySplit.length; + + // The 500 real lines must be accounted for exactly once — no double + // counting of the trailing newline. + expect(keptLines + elided).toBe(500); + }); + + it("the elided count matches exactly when there is no trailing newline", () => { + const line = "x".repeat(100); + const input = Array.from({ length: 500 }, () => line).join("\n"); // no trailing \n + const out = capOutputForClaude(input, { kind: "grep" }); + + const bodyLines = out.split("\n... [")[0].split("\n").length; + const footerMatch = out.match(/(\d+) more lines/); + expect(footerMatch).not.toBeNull(); + expect(bodyLines + Number(footerMatch![1])).toBe(500); + }); +}); diff --git a/claude-code/tests/pre-tool-use-baseline-cloud.test.ts b/claude-code/tests/pre-tool-use-baseline-cloud.test.ts new file mode 100644 index 0000000..f07831a --- /dev/null +++ b/claude-code/tests/pre-tool-use-baseline-cloud.test.ts @@ -0,0 +1,411 @@ +/** + * Integration coverage for the three real LoCoMo QAs that the + * `locomo_benchmark/baseline` cloud baseline run got wrong before fix + * #1 landed. Each case exercises the Read/Bash entry points of + * `processPreToolUse` against a workspace snapshot that mirrors the + * real baseline workspace at the time of the regression: + * + * - `memory` table: empty (summaries have been dropped) + * - `sessions` table: 272 rows, one per LoCoMo session file + * + * The fix (commit 4271baf) taught `buildVirtualIndexContent` and the + * /index.md fallback in `readVirtualPathContents` to merge session rows + * alongside summary rows. Without that fix the synthesized index + * reported "0 sessions:" in this workspace and agents concluded memory + * was empty. These tests fail loudly if the regression returns. + */ + +import { describe, expect, it, vi } from "vitest"; +import { existsSync, mkdtempSync, readFileSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { processPreToolUse, writeReadCacheFile } from "../../src/hooks/pre-tool-use.js"; +import { + buildVirtualIndexContent, + readVirtualPathContents, +} from "../../src/hooks/virtual-table-query.js"; + +// ── Fixture: 272 session rows matching the real `locomo_benchmark/baseline` +// workspace shape — `/sessions/conv__session_.json` — spanning +// conv 0..9 with session counts matching the LoCoMo dataset. +const SESSION_COUNTS_PER_CONV: Record = { + 0: 35, 1: 34, 2: 28, 3: 25, 4: 26, 5: 27, 6: 23, 7: 27, 8: 26, 9: 21, +}; + +function makeSessionRows(): Array<{ path: string; description: string }> { + const rows: Array<{ path: string; description: string }> = []; + for (const [conv, count] of Object.entries(SESSION_COUNTS_PER_CONV)) { + for (let s = 1; s <= count; s++) { + rows.push({ + path: `/sessions/conv_${conv}_session_${s}.json`, + description: `LoCoMo conv ${conv} session ${s}`, + }); + } + } + return rows; +} + +const SESSION_ROWS = makeSessionRows(); + +// Sanity-check the fixture shape so a bad edit fails here, not deep in a test. +if (SESSION_ROWS.length !== 272) { + throw new Error(`fixture should model 272 rows, got ${SESSION_ROWS.length}`); +} + +// ── Real QAs from `results/baseline_cloud/scored_baseline_cloud.jsonl` +// that baseline-local got right and baseline-cloud got wrong before the +// fix. Each row is verbatim from the scored JSONL except `session_file` +// which records the session we'd expect Claude to land on. +const REAL_QAS = [ + { + name: "qa_3: Caroline's research (fix #2 smoke — real run did Read x3)", + question: "What did Caroline research?", + gold_answer: "Adoption agencies", + expected_session_file: "/sessions/conv_0_session_1.json", + }, + { + name: "qa_6: Melanie's camping plans", + question: "When is Melanie planning on going camping?", + gold_answer: "June 2023", + expected_session_file: "/sessions/conv_0_session_2.json", + }, + { + name: "qa_25: Caroline's LGBTQ conference", + question: "When did Caroline go to the LGBTQ conference?", + gold_answer: "10 July 2023", + expected_session_file: "/sessions/conv_0_session_7.json", + }, + { + name: "qa_29: Melanie's pottery workshop", + question: "When did Melanie go to the pottery workshop?", + gold_answer: "The Friday before 15 July 2023", + expected_session_file: "/sessions/conv_0_session_7.json", + }, + { + name: "qa_46: Melanie as an ally", + question: "Would Melanie be considered an ally to the transgender community?", + gold_answer: "Yes, she is supportive", + expected_session_file: "/sessions/conv_0_session_10.json", + }, +] as const; + +const BASE_CONFIG = { + token: "test-token", + apiUrl: "https://api.test", + orgId: "locomo_benchmark", + workspaceId: "baseline", +}; + +/** Simulates the real baseline workspace: memory empty, sessions populated. */ +function makeBaselineWorkspaceApi(sessionRows = SESSION_ROWS) { + return { + query: vi.fn(async (sql: string) => { + // Memory-table queries return 0 rows (memory table dropped). + if (/FROM\s+"memory"/i.test(sql)) return []; + // Sessions-table fallback query for the virtual /index.md: + if (/FROM\s+"sessions".*\/sessions\/%/i.test(sql)) return sessionRows; + // Union query for exact-path reads of /index.md resolves to nothing — + // forces the fallback branch that builds the synthetic index. + if (/UNION ALL/i.test(sql)) return []; + return []; + }), + } as any; +} + +describe("baseline_cloud 3-QA regression: sessions-only workspace", () => { + it("pure builder renders a real 272-row index without the old '0 sessions:' bug", () => { + const content = buildVirtualIndexContent([], SESSION_ROWS); + + expect(content).toContain("272 entries (0 summaries, 272 sessions):"); + expect(content).toContain("## Sessions"); + expect(content).not.toContain("## Summaries"); + // Bug guard: the old output had a lone "${n} sessions:" header with + // n taken from summary rows only. In this workspace that would be 0. + expect(content).not.toMatch(/^0 sessions:$/m); + expect(content).not.toContain("\n0 sessions:\n"); + + // Every real session path from the fixture must appear in the index. + for (const row of SESSION_ROWS) { + expect(content).toContain(row.path); + } + }); + + it("readVirtualPathContents fallback pulls sessions into /index.md for the baseline workspace", async () => { + const api = makeBaselineWorkspaceApi(); + const result = await readVirtualPathContents(api, "memory", "sessions", ["/index.md"]); + const indexContent = result.get("/index.md") ?? ""; + + expect(indexContent).toContain("272 entries (0 summaries, 272 sessions):"); + // Must land on the three sessions that carry answers for our 3 real QAs. + for (const qa of REAL_QAS) { + expect(indexContent).toContain(qa.expected_session_file); + } + }); + + for (const qa of REAL_QAS) { + describe(qa.name, () => { + it("Read /home/.deeplake/memory/index.md intercept returns file_path (Read-tool shape) pointing to the real session listing", async () => { + const api = makeBaselineWorkspaceApi(); + const capturedReadFiles: Array<{ sessionId: string; virtualPath: string; content: string; returnedPath: string }> = []; + + const decision = await processPreToolUse( + { + session_id: `s-${qa.expected_session_file}`, + tool_name: "Read", + tool_input: { file_path: "~/.deeplake/memory/index.md" }, + tool_use_id: "tu-read-index", + }, + { + config: BASE_CONFIG, + createApi: vi.fn(() => api), + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + readCachedIndexContentFn: () => null, + writeCachedIndexContentFn: () => undefined, + writeReadCacheFileFn: ((sessionId: string, virtualPath: string, content: string) => { + const returnedPath = `/tmp/baseline-cloud-3qa-test-${sessionId.replace(/[^a-zA-Z0-9._-]/g, "_")}${virtualPath}`; + capturedReadFiles.push({ sessionId, virtualPath, content, returnedPath }); + return returnedPath; + }) as any, + }, + ); + + // Regression guard for bug #2: Read intercept MUST return a decision + // that causes main() to emit `updatedInput: {file_path}`. Today that + // means the decision carries `file_path`. If this asserts "undefined", + // Claude Code's Read tool will error with "path must be of type string". + expect(decision).not.toBeNull(); + expect(decision?.file_path).toBeDefined(); + expect(typeof decision?.file_path).toBe("string"); + + // Content must be materialized once, with the real index shape. + expect(capturedReadFiles).toHaveLength(1); + const materialized = capturedReadFiles[0]; + expect(materialized?.virtualPath).toBe("/index.md"); + expect(decision?.file_path).toBe(materialized?.returnedPath); + + const body = materialized?.content ?? ""; + expect(body).toContain("# Memory Index"); + expect(body).toContain("272 entries (0 summaries, 272 sessions):"); + expect(body).toContain(qa.expected_session_file); + // Fix #1 regression guard (still important after fix #2): the old + // synthesized index reported sessions from the memory table only. + expect(body).not.toMatch(/\b0 sessions:/); + expect(body).not.toMatch(/\b1 sessions:/); + }); + + it("Bash cat index.md intercept returns the same listing via {command} (bash shape preserved)", async () => { + const api = makeBaselineWorkspaceApi(); + + const decision = await processPreToolUse( + { + session_id: `s-bash-${qa.expected_session_file}`, + tool_name: "Bash", + tool_input: { command: "cat ~/.deeplake/memory/index.md" }, + tool_use_id: "tu-cat-index", + }, + { + config: BASE_CONFIG, + createApi: vi.fn(() => api), + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + readCachedIndexContentFn: () => null, + writeCachedIndexContentFn: () => undefined, + }, + ); + + expect(decision).not.toBeNull(); + // Bash intercepts keep the historical {command, description} shape — + // Claude Code's Bash tool reads `command`. The content is inlined as + // an `echo "..."` payload so the virtual shell isn't needed here. + expect(decision?.file_path).toBeUndefined(); + const body = decision?.command ?? ""; + expect(body).toContain("272 entries (0 summaries, 272 sessions):"); + expect(body).toContain(qa.expected_session_file); + }); + }); + } + + // ── Regression coverage anchored in a real benchmark run ───────────── + // + // In `baseline_cloud_9qa_read_candidates_fix2` (2026-04-20), haiku chose + // to call the Read tool directly against session files — not just + // /index.md. Specifically, qa_3 did three Read calls including + // Read /home/.deeplake/memory/sessions/conv_0_session_1.json and + // Read /home/.deeplake/memory/sessions/conv_0_session_2.json, and all + // three succeeded (zero "path must be of type string" errors) after + // fix #2 landed. The previous run on the same workspace without the fix + // produced that error on every memory-path Read call. + // + // This test drives the same session-file Read through processPreToolUse + // and asserts the decision shape matches what Claude Code's Read tool + // expects — i.e. `updatedInput: {file_path}`, not `{command}`. + + it("Read /sessions/ intercept returns file_path pointing to the session content (qa_3 real-run path)", async () => { + const sessionJson = JSON.stringify({ + conversation_id: 0, + session_number: 1, + date_time: "8 May, 2023", + speakers: { speaker_a: "Caroline", speaker_b: "Melanie" }, + turns: [ + { speaker: "Caroline", dia_id: "D1:1", text: "Hey Mel! Good to see you!" }, + ], + }); + + const api = { + query: vi.fn(async (sql: string) => { + // Exact-path read hits the sessions table. + if (/FROM\s+"sessions"/i.test(sql) && /conv_0_session_1\.json/.test(sql)) { + return [{ path: "/sessions/conv_0_session_1.json", content: sessionJson, source_order: 1 }]; + } + if (/FROM\s+"memory"/i.test(sql)) return []; + return []; + }), + } as any; + const capturedReadFiles: Array<{ sessionId: string; virtualPath: string; content: string }> = []; + + const decision = await processPreToolUse( + { + session_id: "s-qa3-session-read", + tool_name: "Read", + tool_input: { file_path: "~/.deeplake/memory/sessions/conv_0_session_1.json" }, + tool_use_id: "tu-read-session-1", + }, + { + config: BASE_CONFIG, + createApi: vi.fn(() => api), + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + readCachedIndexContentFn: () => null, + writeCachedIndexContentFn: () => undefined, + writeReadCacheFileFn: ((sessionId: string, virtualPath: string, content: string) => { + capturedReadFiles.push({ sessionId, virtualPath, content }); + return `/tmp/test-${sessionId}${virtualPath}`; + }) as any, + }, + ); + + // Read-tool shape: decision must carry file_path, not just command. + expect(decision).not.toBeNull(); + expect(decision?.file_path).toBe("/tmp/test-s-qa3-session-read/sessions/conv_0_session_1.json"); + + // Content materialized exactly once, at the right virtual path, with + // the real session payload Claude needs to answer qa_3. + expect(capturedReadFiles).toHaveLength(1); + expect(capturedReadFiles[0]?.virtualPath).toBe("/sessions/conv_0_session_1.json"); + expect(capturedReadFiles[0]?.content).toContain("Caroline"); + expect(capturedReadFiles[0]?.content).toContain("8 May, 2023"); + }); + + // ── writeReadCacheFile security guard ───────────────────────────────────── + // + // Claude Code's Read intercept materializes fetched content into + // ~/.deeplake/query-cache//read/. DB-derived + // virtualPaths are user-controlled (anyone with write access to the + // `sessions` / `memory` tables controls them), so `..` segments must not + // be allowed to escape the per-session cache dir. The PR #63 bot review + // flagged this. + + describe("writeReadCacheFile path-traversal guard", () => { + it("writes a well-formed virtualPath inside the per-session cache root", () => { + const cacheRoot = mkdtempSync(join(tmpdir(), "writeReadCache-ok-")); + try { + const abs = writeReadCacheFile("sess-1", "/sessions/conv_0_session_1.json", "hello", { cacheRoot }); + expect(abs).toBe(join(cacheRoot, "sess-1", "read", "sessions", "conv_0_session_1.json")); + expect(existsSync(abs)).toBe(true); + expect(readFileSync(abs, "utf-8")).toBe("hello"); + } finally { + rmSync(cacheRoot, { recursive: true, force: true }); + } + }); + + it("refuses a virtualPath that escapes the cache root via ../ segments", () => { + const cacheRoot = mkdtempSync(join(tmpdir(), "writeReadCache-trav-")); + try { + expect(() => + writeReadCacheFile("sess-2", "/sessions/../../../etc/passwd", "pwned", { cacheRoot }) + ).toThrow(/path escapes cache root/); + // Guard must fire BEFORE any write lands anywhere under cacheRoot. + expect(existsSync(join(cacheRoot, "sess-2", "read", "sessions"))).toBe(false); + expect(existsSync(join(cacheRoot, "etc"))).toBe(false); + } finally { + rmSync(cacheRoot, { recursive: true, force: true }); + } + }); + + it("refuses traversal that lands outside the cache root entirely", () => { + const cacheRoot = mkdtempSync(join(tmpdir(), "writeReadCache-out-")); + try { + // Resolves to something like /tmp/writeReadCache-out-XXX/sess-3/read/../../../../../../etc/shadow + // → /etc/shadow — fully outside cacheRoot. + expect(() => + writeReadCacheFile("sess-3", "/../../../../../../etc/shadow", "x", { cacheRoot }) + ).toThrow(/path escapes cache root/); + } finally { + rmSync(cacheRoot, { recursive: true, force: true }); + } + }); + + it("accepts a path that normalizes back inside the cache root", () => { + const cacheRoot = mkdtempSync(join(tmpdir(), "writeReadCache-norm-")); + try { + // `/sessions/foo/../bar.json` → `/sessions/bar.json`, still inside. + const abs = writeReadCacheFile("sess-4", "/sessions/foo/../bar.json", "ok", { cacheRoot }); + expect(abs).toBe(join(cacheRoot, "sess-4", "read", "sessions", "bar.json")); + expect(readFileSync(abs, "utf-8")).toBe("ok"); + } finally { + rmSync(cacheRoot, { recursive: true, force: true }); + } + }); + }); + + // ── /index.md fallback lives in virtual-table-query.ts only ─────────────── + // + // An earlier draft of fix #1 duplicated the synthesized-index builder + // inside pre-tool-use.ts. The bot review flagged that duplicate as + // unreachable + using the old single-table SQL ("N sessions:" header, + // missing `## Sessions`). The duplicate has since been removed; this + // test locks in that removal — `processPreToolUse` must use the dual- + // table builder and never synthesize its own broken fallback. + + it("index.md intercept never falls back to the single-table inline builder", async () => { + // readVirtualPathContentFn returns non-null for /index.md (fix #1 + // guarantee), so the old inline fallback is now unreachable. If + // somebody re-introduces it, this test fails because the bad string + // "${n} sessions:" would appear in the output instead of the dual- + // table "${total} entries (${s} summaries, ${n} sessions):" header. + const api = { query: vi.fn(async () => []) } as any; + const readVirtualPathContentFn = vi.fn(async () => "# Memory Index\n\n272 entries (0 summaries, 272 sessions):\n"); + let materialized: string | undefined; + + const decision = await processPreToolUse( + { + session_id: "s-index-fallback", + tool_name: "Read", + tool_input: { file_path: "~/.deeplake/memory/index.md" }, + tool_use_id: "tu-fallback", + }, + { + config: BASE_CONFIG, + createApi: vi.fn(() => api), + readVirtualPathContentFn: readVirtualPathContentFn as any, + readCachedIndexContentFn: () => null, + writeCachedIndexContentFn: () => undefined, + writeReadCacheFileFn: ((_sid: string, _vp: string, content: string) => { + materialized = content; + return "/tmp/fake-index-path"; + }) as any, + }, + ); + + expect(decision).not.toBeNull(); + expect(materialized).toBeDefined(); + // The dual-table builder's content was materialized, not the + // single-table "N sessions:" fallback. + expect(materialized).toContain("272 entries (0 summaries, 272 sessions):"); + expect(materialized).not.toMatch(/\n\d+ sessions:\n/); + // Production code must not issue its own fallback SELECT against + // memory for /index.md — it delegates entirely to readVirtualPath. + const summariesOnlyFallback = api.query.mock.calls.find((call: any[]) => + String(call[0] || "").includes(`FROM "memory" WHERE path LIKE '/summaries/%'`) + ); + expect(summariesOnlyFallback).toBeUndefined(); + }); +}); diff --git a/claude-code/tests/pre-tool-use-branches.test.ts b/claude-code/tests/pre-tool-use-branches.test.ts new file mode 100644 index 0000000..cb3de12 --- /dev/null +++ b/claude-code/tests/pre-tool-use-branches.test.ts @@ -0,0 +1,640 @@ +/** + * Branch-coverage suite for `src/hooks/pre-tool-use.ts`. + * + * The PR already has an end-to-end regression suite in + * `pre-tool-use-baseline-cloud.test.ts`, but that file anchors to real + * LoCoMo QAs and only exercises the `/index.md` and `/sessions/*` Read + * paths plus one Bash `cat`. This file fills in the remaining branches + * that the hook supports — Glob, Grep, Bash ls/head/tail/wc/find, the + * unsafe-command guidance path, and the no-config fallback — so the + * whole file can stay above the 90% coverage bar. + */ + +import { describe, expect, it, vi } from "vitest"; +import { homedir } from "node:os"; +import { join } from "node:path"; +import { + buildAllowDecision, + buildReadDecision, + extractGrepParams, + getShellCommand, + isSafe, + processPreToolUse, + rewritePaths, + touchesMemory, +} from "../../src/hooks/pre-tool-use.js"; + +// MEMORY_PATH is `${homedir()}/.deeplake/memory` — differs between CI +// (`/home/runner/...`) and dev (`/home//...`), so any test that +// asserts on the literal form has to build it from homedir() too. +const MEM_ABS = join(homedir(), ".deeplake", "memory"); + +const BASE_CONFIG = { + token: "t", + apiUrl: "http://example", + orgId: "org", + orgName: "org", + userName: "u", + workspaceId: "default", + apiOrigin: "http://example", +}; + +function makeApi() { + return { query: vi.fn(async () => []) } as any; +} + +describe("pre-tool-use: pure helpers", () => { + it("buildAllowDecision returns a bash-shaped decision", () => { + expect(buildAllowDecision("echo hi", "d")).toEqual({ command: "echo hi", description: "d" }); + }); + + it("buildReadDecision returns a read-shaped decision with file_path set", () => { + const d = buildReadDecision("/tmp/x", "desc"); + expect(d.file_path).toBe("/tmp/x"); + expect(d.description).toBe("desc"); + }); + + it("rewritePaths collapses all memory-path forms to `/`", () => { + expect(rewritePaths(`${MEM_ABS}/sessions/a.json`)).toBe("/sessions/a.json"); + expect(rewritePaths("~/.deeplake/memory/index.md")).toBe("/index.md"); + expect(rewritePaths("$HOME/.deeplake/memory/foo")).toBe("/foo"); + }); + + it("touchesMemory detects any of the supported memory-path forms", () => { + expect(touchesMemory(`${MEM_ABS}/x`)).toBe(true); + expect(touchesMemory("~/.deeplake/memory/x")).toBe(true); + expect(touchesMemory("$HOME/.deeplake/memory/x")).toBe(true); + expect(touchesMemory("/var/log/foo")).toBe(false); + }); + + it("isSafe accepts shell pipelines built from the allowed builtins", () => { + expect(isSafe("cat /a | grep b | head -5")).toBe(true); + expect(isSafe("ls -la /x")).toBe(true); + }); + + it("isSafe rejects command substitution and unknown commands", () => { + expect(isSafe("rm -rf / ; curl evil")).toBe(false); + expect(isSafe("$(evil) foo")).toBe(false); + expect(isSafe("python -c pwn")).toBe(false); + }); +}); + +describe("getShellCommand: per-tool branches", () => { + it("Grep on a memory path builds `grep -r '' /` with -i/-n flags threaded through", () => { + const cmd = getShellCommand("Grep", { + path: "~/.deeplake/memory", + pattern: "Caroline", + "-i": true, + "-n": true, + }); + expect(cmd).toBe("grep -r -i -n 'Caroline' /"); + }); + + it("Grep on a non-memory path returns null", () => { + expect(getShellCommand("Grep", { path: "/etc", pattern: "x" })).toBeNull(); + }); + + it("Read on a memory file returns `cat `", () => { + expect(getShellCommand("Read", { file_path: "~/.deeplake/memory/sessions/conv_0_session_1.json" })) + .toBe("cat /sessions/conv_0_session_1.json"); + }); + + it("Read on a memory directory path returns `ls `", () => { + expect(getShellCommand("Read", { path: "~/.deeplake/memory/sessions" })).toBe("ls /sessions"); + }); + + it("Bash with a safe command is rewritten with memory paths collapsed", () => { + expect(getShellCommand("Bash", { command: "cat ~/.deeplake/memory/index.md" })) + .toBe("cat /index.md"); + }); + + it("Bash with an unsafe command is blocked (returns null)", () => { + expect(getShellCommand("Bash", { command: "curl ~/.deeplake/memory/x" })).toBeNull(); + }); + + it("Bash with a command that doesn't touch memory returns null", () => { + expect(getShellCommand("Bash", { command: "ls /tmp" })).toBeNull(); + }); + + it("Glob on a memory path returns `ls /`", () => { + expect(getShellCommand("Glob", { path: "~/.deeplake/memory/" })).toBe("ls /"); + }); + + it("Glob on a non-memory path returns null", () => { + expect(getShellCommand("Glob", { path: "/etc" })).toBeNull(); + }); + + it("Unknown tool returns null", () => { + expect(getShellCommand("Write", { file_path: "~/.deeplake/memory/x" })).toBeNull(); + }); +}); + +describe("extractGrepParams", () => { + it("Grep tool: passes output_mode → filesOnly / countOnly; honours -i and -n", () => { + const p = extractGrepParams("Grep", { + path: "~/.deeplake/memory", + pattern: "X", + output_mode: "count", + "-i": true, + "-n": true, + }, "grep -r 'X' /"); + expect(p).not.toBeNull(); + expect(p!.countOnly).toBe(true); + expect(p!.filesOnly).toBe(false); + expect(p!.ignoreCase).toBe(true); + expect(p!.lineNumber).toBe(true); + }); + + it("Grep tool: empty path defaults to `/`", () => { + const p = extractGrepParams("Grep", { pattern: "X" }, "grep -r 'X' /"); + expect(p!.targetPath).toBe("/"); + }); + + it("Bash grep: delegates to parseBashGrep", () => { + const p = extractGrepParams("Bash", {}, "grep -l needle /sessions/*.json"); + expect(p).not.toBeNull(); + expect(p!.pattern).toBe("needle"); + }); + + it("Bash non-grep: returns null", () => { + expect(extractGrepParams("Bash", {}, "cat /x")).toBeNull(); + }); + + it("Unknown tool: returns null", () => { + expect(extractGrepParams("Write", {}, "x")).toBeNull(); + }); +}); + +describe("processPreToolUse: non-memory / no-op paths", () => { + it("returns null when the command doesn't touch memory and there's no shellCmd", async () => { + const d = await processPreToolUse( + { session_id: "s", tool_name: "Bash", tool_input: { command: "ls /tmp" }, tool_use_id: "t" }, + { config: BASE_CONFIG as any }, + ); + expect(d).toBeNull(); + }); + + it("returns [RETRY REQUIRED] guidance when an unsupported command mentions the memory path", async () => { + const d = await processPreToolUse( + { session_id: "s", tool_name: "Bash", tool_input: { command: "curl ~/.deeplake/memory/x" }, tool_use_id: "t" }, + { config: BASE_CONFIG as any, logFn: vi.fn() }, + ); + expect(d?.command).toContain("[RETRY REQUIRED]"); + expect(d?.command).toContain("bash builtins"); + }); + + it("falls back to the shell bundle when no config is loaded", async () => { + const d = await processPreToolUse( + { session_id: "s", tool_name: "Bash", tool_input: { command: "cat ~/.deeplake/memory/index.md" }, tool_use_id: "t" }, + { config: null as any, shellBundle: "/SHELL" }, + ); + expect(d?.command).toContain(`node "/SHELL" -c`); + expect(d?.description).toContain("[DeepLake shell]"); + }); +}); + +describe("processPreToolUse: Glob / ls branches", () => { + it("Glob on memory routes through listVirtualPathRows and renders a directory listing", async () => { + const listVirtualPathRowsFn = vi.fn(async () => [ + { path: "/sessions/conv_0_session_1.json", size_bytes: 100 }, + { path: "/sessions/conv_0_session_2.json", size_bytes: 200 }, + { path: "/summaries/alice/s1.md", size_bytes: 50 }, + ]) as any; + + const d = await processPreToolUse( + { session_id: "s", tool_name: "Glob", tool_input: { path: "~/.deeplake/memory/" }, tool_use_id: "t" }, + { + config: BASE_CONFIG as any, + createApi: vi.fn(() => makeApi()), + listVirtualPathRowsFn, + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + }, + ); + expect(d?.command).toContain("sessions/"); + expect(d?.command).toContain("summaries/"); + expect(d?.description).toContain("[DeepLake direct] ls /"); + }); + + it("Bash `ls -la ` returns a long-format listing", async () => { + const listVirtualPathRowsFn = vi.fn(async () => [ + { path: "/summaries/alice/s1.md", size_bytes: 42 }, + ]) as any; + + const d = await processPreToolUse( + { session_id: "s", tool_name: "Bash", tool_input: { command: "ls -la ~/.deeplake/memory/summaries" }, tool_use_id: "t" }, + { + config: BASE_CONFIG as any, + createApi: vi.fn(() => makeApi()), + listVirtualPathRowsFn, + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + }, + ); + expect(d?.command).toContain("drwxr-xr-x"); + expect(d?.command).toContain("alice/"); + }); + + it("ls on an empty directory reports `(empty directory)` — not a bogus path listing", async () => { + const d = await processPreToolUse( + { session_id: "s", tool_name: "Bash", tool_input: { command: "ls ~/.deeplake/memory/nope" }, tool_use_id: "t" }, + { + config: BASE_CONFIG as any, + createApi: vi.fn(() => makeApi()), + listVirtualPathRowsFn: vi.fn(async () => []) as any, + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + }, + ); + expect(d?.command).toContain("(empty directory)"); + }); +}); + +describe("processPreToolUse: Bash read-shape intercepts", () => { + const makeApiWith = (content: string | null) => ({ + api: makeApi(), + readVirtualPathContentFn: vi.fn(async () => content) as any, + }); + + it("`cat ` returns the raw content", async () => { + const { api, readVirtualPathContentFn } = makeApiWith("line1\nline2\nline3"); + const d = await processPreToolUse( + { session_id: "s", tool_name: "Bash", tool_input: { command: "cat ~/.deeplake/memory/sessions/a.json" }, tool_use_id: "t" }, + { + config: BASE_CONFIG as any, + createApi: vi.fn(() => api), + readVirtualPathContentFn, + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + }, + ); + expect(d?.command).toContain("line1"); + expect(d?.description).toContain("[DeepLake direct] cat"); + }); + + it("`head -N ` limits to the first N lines", async () => { + const { api, readVirtualPathContentFn } = makeApiWith("l1\nl2\nl3\nl4"); + const d = await processPreToolUse( + { session_id: "s", tool_name: "Bash", tool_input: { command: "head -2 ~/.deeplake/memory/sessions/a.json" }, tool_use_id: "t" }, + { + config: BASE_CONFIG as any, + createApi: vi.fn(() => api), + readVirtualPathContentFn, + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + }, + ); + expect(d?.command).toContain("l1\\nl2"); + expect(d?.command).not.toContain("l3"); + }); + + it("`tail -N ` limits to the last N lines", async () => { + const { api, readVirtualPathContentFn } = makeApiWith("l1\nl2\nl3\nl4"); + const d = await processPreToolUse( + { session_id: "s", tool_name: "Bash", tool_input: { command: "tail -2 ~/.deeplake/memory/sessions/a.json" }, tool_use_id: "t" }, + { + config: BASE_CONFIG as any, + createApi: vi.fn(() => api), + readVirtualPathContentFn, + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + }, + ); + expect(d?.command).toContain("l3\\nl4"); + expect(d?.command).not.toContain("l1"); + }); + + it("`wc -l ` returns the line count with the virtual path", async () => { + const { api, readVirtualPathContentFn } = makeApiWith("a\nb\nc"); + const d = await processPreToolUse( + { session_id: "s", tool_name: "Bash", tool_input: { command: "wc -l ~/.deeplake/memory/sessions/a.json" }, tool_use_id: "t" }, + { + config: BASE_CONFIG as any, + createApi: vi.fn(() => api), + readVirtualPathContentFn, + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + }, + ); + expect(d?.command).toContain("3 /sessions/a.json"); + expect(d?.description).toContain("wc -l"); + }); +}); + +describe("processPreToolUse: find / grep / fallback", () => { + it("Bash `find -name ''` lists matching paths", async () => { + const findVirtualPathsFn = vi.fn(async () => [ + "/sessions/conv_0_session_1.json", + "/sessions/conv_0_session_2.json", + ]) as any; + + const d = await processPreToolUse( + { session_id: "s", tool_name: "Bash", tool_input: { command: "find ~/.deeplake/memory/sessions -name '*.json'" }, tool_use_id: "t" }, + { + config: BASE_CONFIG as any, + createApi: vi.fn(() => makeApi()), + findVirtualPathsFn, + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + }, + ); + expect(d?.command).toContain("/sessions/conv_0_session_1.json"); + expect(d?.description).toContain("[DeepLake direct] find"); + }); + + it("Bash `find … | wc -l` returns the count", async () => { + const findVirtualPathsFn = vi.fn(async () => ["/a.json", "/b.json", "/c.json"]) as any; + const d = await processPreToolUse( + { session_id: "s", tool_name: "Bash", tool_input: { command: "find ~/.deeplake/memory/sessions -name '*.json' | wc -l" }, tool_use_id: "t" }, + { + config: BASE_CONFIG as any, + createApi: vi.fn(() => makeApi()), + findVirtualPathsFn, + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + }, + ); + expect(d?.command).toContain('"3"'); + }); + + it("Grep tool: falls through to handleGrepDirect and returns the matches", async () => { + const handleGrepDirectFn = vi.fn(async () => "/sessions/a.json:match line") as any; + const d = await processPreToolUse( + { + session_id: "s", + tool_name: "Grep", + tool_input: { path: "~/.deeplake/memory", pattern: "match", output_mode: "content" }, + tool_use_id: "t", + }, + { + config: BASE_CONFIG as any, + createApi: vi.fn(() => makeApi()), + handleGrepDirectFn, + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + }, + ); + expect(d?.command).toContain("match line"); + }); + + it("throws in direct-read path → falls back to the shell bundle", async () => { + const d = await processPreToolUse( + { session_id: "s", tool_name: "Bash", tool_input: { command: "cat ~/.deeplake/memory/sessions/a.json" }, tool_use_id: "t" }, + { + config: BASE_CONFIG as any, + createApi: vi.fn(() => makeApi()), + readVirtualPathContentFn: vi.fn(async () => { throw new Error("boom"); }) as any, + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + shellBundle: "/SHELL", + logFn: vi.fn(), + }, + ); + expect(d?.command).toContain('node "/SHELL" -c'); + }); +}); + +describe("processPreToolUse: index cache short-circuit", () => { + // `readVirtualPathContentsWithCache` is an inline callback the hook + // passes to `executeCompiledBashCommand` so the compiled-segments path + // can reuse the already-fetched /index.md content without hitting SQL + // twice. The happy path is only exercised when the compiler actually + // invokes the callback — these tests simulate exactly that. + + it("returns the cached /index.md immediately without calling readVirtualPathContents", async () => { + const readVirtualPathContentsFn = vi.fn(async (_api, _m, _s, paths: string[]) => + new Map(paths.map(p => [p, `FETCHED:${p}`])), + ) as any; + const readCachedIndexContentFn = vi.fn(() => "CACHED INDEX"); + const writeCachedIndexContentFn = vi.fn(); + + const executeCompiledBashCommandFn = vi.fn(async (_api, _memory, _sessions, _cmd, deps) => { + // Mimic what the real compiler does when it needs /index.md content. + const fetched = await deps.readVirtualPathContentsFn(_api, _memory, _sessions, ["/index.md", "/sessions/x.json"]); + return `idx=${fetched.get("/index.md")}\nx=${fetched.get("/sessions/x.json")}`; + }) as any; + + const d = await processPreToolUse( + { session_id: "s1", tool_name: "Bash", tool_input: { command: "cat ~/.deeplake/memory/index.md && cat ~/.deeplake/memory/sessions/x.json" }, tool_use_id: "t" }, + { + config: BASE_CONFIG as any, + createApi: vi.fn(() => makeApi()), + readCachedIndexContentFn, + writeCachedIndexContentFn, + readVirtualPathContentsFn, + executeCompiledBashCommandFn, + }, + ); + + expect(d?.command).toContain("idx=CACHED INDEX"); + expect(d?.command).toContain("x=FETCHED:/sessions/x.json"); + // /index.md came from the per-session cache; only the /sessions/x.json + // path went to the API. + expect(readCachedIndexContentFn).toHaveBeenCalledWith("s1"); + expect(readVirtualPathContentsFn).toHaveBeenCalledWith( + expect.anything(), + expect.anything(), + expect.anything(), + ["/sessions/x.json"], + ); + // Cache re-write always fires when /index.md is in the result set — + // idempotent for the hit path (same content in, same content out). + expect(writeCachedIndexContentFn).toHaveBeenCalledWith("s1", "CACHED INDEX"); + }); + + it("writes the freshly-fetched /index.md into the session cache when there's no hit", async () => { + const readVirtualPathContentsFn = vi.fn(async (_api, _m, _s, paths: string[]) => + new Map(paths.map(p => [p, p === "/index.md" ? "FRESH INDEX" : null])), + ) as any; + const readCachedIndexContentFn = vi.fn(() => null); + const writeCachedIndexContentFn = vi.fn(); + + const executeCompiledBashCommandFn = vi.fn(async (_api, _m, _s, _cmd, deps) => { + const fetched = await deps.readVirtualPathContentsFn(_api, _m, _s, ["/index.md"]); + return `out=${fetched.get("/index.md")}`; + }) as any; + + const d = await processPreToolUse( + { session_id: "s2", tool_name: "Bash", tool_input: { command: "cat ~/.deeplake/memory/index.md" }, tool_use_id: "t" }, + { + config: BASE_CONFIG as any, + createApi: vi.fn(() => makeApi()), + readCachedIndexContentFn, + writeCachedIndexContentFn, + readVirtualPathContentsFn, + executeCompiledBashCommandFn, + }, + ); + + expect(d?.command).toContain("FRESH INDEX"); + expect(writeCachedIndexContentFn).toHaveBeenCalledWith("s2", "FRESH INDEX"); + }); + + it("Read on the memory root (no extension in basename) routes to the ls directory branch", async () => { + const listVirtualPathRowsFn = vi.fn(async () => [ + { path: "/sessions/conv_0_session_1.json", size_bytes: 100 }, + { path: "/summaries/alice/s1.md" /* no size_bytes → null branch */ }, + ]) as any; + + const d = await processPreToolUse( + { session_id: "s", tool_name: "Read", tool_input: { file_path: "~/.deeplake/memory/" }, tool_use_id: "t" }, + { + config: BASE_CONFIG as any, + createApi: vi.fn(() => makeApi()), + listVirtualPathRowsFn, + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + }, + ); + expect(d?.command).toContain("sessions/"); + expect(d?.command).toContain("summaries/"); + }); + + it("Read on a directory with trailing slashes strips them before listing", async () => { + const listVirtualPathRowsFn = vi.fn(async () => [ + { path: "/sessions/conv_0_session_1.json", size_bytes: 42 }, + ]) as any; + + const d = await processPreToolUse( + { session_id: "s", tool_name: "Read", tool_input: { file_path: "~/.deeplake/memory/sessions///" }, tool_use_id: "t" }, + { + config: BASE_CONFIG as any, + createApi: vi.fn(() => makeApi()), + listVirtualPathRowsFn, + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + }, + ); + expect(d?.command).toContain("conv_0_session_1.json"); + }); + + it("`head ` (no explicit -N) defaults to 10 lines", async () => { + const readVirtualPathContentFn = vi.fn(async () => + Array.from({ length: 20 }, (_, i) => `L${i}`).join("\n") + ) as any; + const d = await processPreToolUse( + { session_id: "s", tool_name: "Bash", tool_input: { command: "head ~/.deeplake/memory/sessions/a.json" }, tool_use_id: "t" }, + { + config: BASE_CONFIG as any, + createApi: vi.fn(() => makeApi()), + readVirtualPathContentFn, + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + }, + ); + expect(d?.command).toContain("L0"); + expect(d?.command).toContain("L9"); + expect(d?.command).not.toContain("L10"); + }); + + it("`tail ` (no explicit -N) defaults to the last 10 lines", async () => { + const readVirtualPathContentFn = vi.fn(async () => + Array.from({ length: 20 }, (_, i) => `L${i}`).join("\n") + ) as any; + const d = await processPreToolUse( + { session_id: "s", tool_name: "Bash", tool_input: { command: "tail ~/.deeplake/memory/sessions/a.json" }, tool_use_id: "t" }, + { + config: BASE_CONFIG as any, + createApi: vi.fn(() => makeApi()), + readVirtualPathContentFn, + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + }, + ); + expect(d?.command).toContain("L19"); + expect(d?.command).toContain("L10"); + expect(d?.command).not.toContain("L9"); + }); + + it("ls -la listing includes both file entries (-rw-) and directory entries (drwx)", async () => { + // A flat file directly under the listed dir → file entry (isDir=false). + // A nested path under a subdir → directory entry (isDir=true). + const listVirtualPathRowsFn = vi.fn(async () => [ + { path: "/summaries/top-level.md", size_bytes: 42 }, + { path: "/summaries/alice/s1.md", size_bytes: 100 }, + { path: "/summaries/", size_bytes: 0 }, // empty suffix — skipped by `if (!name) continue` + ]) as any; + + const d = await processPreToolUse( + { session_id: "s", tool_name: "Bash", tool_input: { command: "ls -la ~/.deeplake/memory/summaries" }, tool_use_id: "t" }, + { + config: BASE_CONFIG as any, + createApi: vi.fn(() => makeApi()), + listVirtualPathRowsFn, + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + }, + ); + // File entry → -rw-r--r-- prefix + expect(d?.command).toContain("-rw-r--r--"); + expect(d?.command).toContain("top-level.md"); + // Directory entry → drwxr-xr-x prefix + expect(d?.command).toContain("drwxr-xr-x"); + expect(d?.command).toContain("alice/"); + }); + + it("cat | head pipeline routes to the head fast-path", async () => { + const readVirtualPathContentFn = vi.fn(async () => + Array.from({ length: 30 }, (_, i) => `L${i}`).join("\n") + ) as any; + const d = await processPreToolUse( + { session_id: "s", tool_name: "Bash", tool_input: { command: "cat ~/.deeplake/memory/sessions/a.json | head -3" }, tool_use_id: "t" }, + { + config: BASE_CONFIG as any, + createApi: vi.fn(() => makeApi()), + readVirtualPathContentFn, + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + }, + ); + expect(d?.command).toContain("L0"); + expect(d?.command).toContain("L2"); + expect(d?.command).not.toContain("L3"); + }); + + it("Grep whose handleGrepDirect returns null falls through — no decision from grep path", async () => { + const handleGrepDirectFn = vi.fn(async () => null) as any; + const listVirtualPathRowsFn = vi.fn(async () => [ + { path: "/summaries/alice/s1.md", size_bytes: 100 }, + ]) as any; + // We send a Read on a directory so after grep-null fall-through the ls + // branch takes over with a real decision — proving the flow continues + // past the null grep result instead of erroring. + const d = await processPreToolUse( + { session_id: "s", tool_name: "Read", tool_input: { path: "~/.deeplake/memory/summaries" }, tool_use_id: "t" }, + { + config: BASE_CONFIG as any, + createApi: vi.fn(() => makeApi()), + handleGrepDirectFn, + listVirtualPathRowsFn, + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + }, + ); + expect(d?.command).toContain("alice/"); + }); + + it("Bash `ls ` without -l uses short-format listing (no permissions prefix)", async () => { + const listVirtualPathRowsFn = vi.fn(async () => [ + { path: "/sessions/conv_0_session_1.json", size_bytes: 100 }, + ]) as any; + const d = await processPreToolUse( + { session_id: "s", tool_name: "Bash", tool_input: { command: "ls ~/.deeplake/memory/sessions" }, tool_use_id: "t" }, + { + config: BASE_CONFIG as any, + createApi: vi.fn(() => makeApi()), + listVirtualPathRowsFn, + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + }, + ); + expect(d?.command).not.toContain("drwxr-xr-x"); + expect(d?.command).toContain("conv_0_session_1.json"); + }); + + it("handles the no-paths edge case (empty cachePaths passed by the compiler)", async () => { + const readVirtualPathContentsFn = vi.fn(async () => new Map()) as any; + const readCachedIndexContentFn = vi.fn(() => null); + + const executeCompiledBashCommandFn = vi.fn(async (_api, _m, _s, _cmd, deps) => { + const result = await deps.readVirtualPathContentsFn(_api, _m, _s, []); + return `size=${result.size}`; + }) as any; + + const d = await processPreToolUse( + { session_id: "s3", tool_name: "Bash", tool_input: { command: "cat ~/.deeplake/memory/sessions/a.json" }, tool_use_id: "t" }, + { + config: BASE_CONFIG as any, + createApi: vi.fn(() => makeApi()), + readCachedIndexContentFn, + writeCachedIndexContentFn: vi.fn(), + readVirtualPathContentsFn, + executeCompiledBashCommandFn, + }, + ); + expect(d?.command).toContain("size=0"); + // Didn't touch SQL because paths were empty. + expect(readVirtualPathContentsFn).not.toHaveBeenCalled(); + }); +}); diff --git a/claude-code/tests/pre-tool-use.test.ts b/claude-code/tests/pre-tool-use.test.ts index bbf00ba..f5bb682 100644 --- a/claude-code/tests/pre-tool-use.test.ts +++ b/claude-code/tests/pre-tool-use.test.ts @@ -305,6 +305,15 @@ describe("pre-tool-use: non-Bash tools targeting memory", () => { } }); + it("intercepts Read using path alias for the memory root", () => { + const r = runPreToolUse("Read", { path: "~/.deeplake/memory" }); + expect(r.empty).toBe(false); + if (!r.empty) { + expect(r.decision).toBe("allow"); + expect(r.updatedCommand).toContain("ls /"); + } + }); + it("intercepts Glob targeting memory path", () => { const r = runPreToolUse("Glob", { path: "~/.deeplake/memory/", pattern: "*.md" }); expect(r.empty).toBe(false); diff --git a/claude-code/tests/query-cache.test.ts b/claude-code/tests/query-cache.test.ts new file mode 100644 index 0000000..84f62a9 --- /dev/null +++ b/claude-code/tests/query-cache.test.ts @@ -0,0 +1,68 @@ +import { mkdtempSync, rmSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; +import { afterEach, describe, expect, it, vi } from "vitest"; +import { + clearSessionQueryCache, + getSessionQueryCacheDir, + readCachedIndexContent, + writeCachedIndexContent, +} from "../../src/hooks/query-cache.js"; + +describe("query-cache", () => { + const tempRoots: string[] = []; + + afterEach(() => { + for (const root of tempRoots.splice(0)) { + rmSync(root, { recursive: true, force: true }); + } + vi.restoreAllMocks(); + }); + + it("writes and reads cached index content per session", () => { + const cacheRoot = mkdtempSync(join(tmpdir(), "hivemind-query-cache-")); + tempRoots.push(cacheRoot); + + writeCachedIndexContent("session-1", "# Memory Index", { cacheRoot }); + + expect(readCachedIndexContent("session-1", { cacheRoot })).toBe("# Memory Index"); + expect(getSessionQueryCacheDir("session-1", { cacheRoot })).toBe(join(cacheRoot, "session-1")); + }); + + it("returns null for missing cache files and logs non-ENOENT read and write failures", () => { + const cacheRoot = mkdtempSync(join(tmpdir(), "hivemind-query-cache-")); + tempRoots.push(cacheRoot); + const logFn = vi.fn(); + + expect(readCachedIndexContent("missing", { cacheRoot, logFn })).toBeNull(); + expect(logFn).not.toHaveBeenCalled(); + + expect(readCachedIndexContent("broken", { + cacheRoot: "\u0000", + logFn, + })).toBeNull(); + expect(logFn).toHaveBeenCalledWith(expect.stringContaining("read failed")); + + writeCachedIndexContent("blocked", "content", { + cacheRoot: "\u0000", + logFn, + }); + expect(logFn).toHaveBeenCalledWith(expect.stringContaining("write failed")); + }); + + it("clears a session cache directory and swallows removal errors", () => { + const cacheRoot = mkdtempSync(join(tmpdir(), "hivemind-query-cache-")); + tempRoots.push(cacheRoot); + writeCachedIndexContent("session-2", "cached", { cacheRoot }); + + clearSessionQueryCache("session-2", { cacheRoot }); + expect(readCachedIndexContent("session-2", { cacheRoot })).toBeNull(); + + const logFn = vi.fn(); + clearSessionQueryCache("session-2", { + cacheRoot: "\u0000", + logFn, + }); + expect(logFn).toHaveBeenCalledWith(expect.stringContaining("clear failed")); + }); +}); diff --git a/claude-code/tests/session-queue.test.ts b/claude-code/tests/session-queue.test.ts new file mode 100644 index 0000000..068f41b --- /dev/null +++ b/claude-code/tests/session-queue.test.ts @@ -0,0 +1,579 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; +import { + existsSync, + mkdtempSync, + readFileSync, + renameSync, + rmSync, + utimesSync, + writeFileSync, +} from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; +import { + appendQueuedSessionRow, + buildQueuedSessionRow, + buildSessionInsertSql, + buildSessionPath, + clearSessionWriteDisabled, + drainSessionQueues, + flushSessionQueue, + isSessionWriteDisabled, + isSessionWriteAuthError, + markSessionWriteDisabled, + type QueuedSessionRow, + type SessionQueueApi, + tryAcquireSessionDrainLock, +} from "../../src/hooks/session-queue.js"; + +const tempDirs: string[] = []; + +function makeQueueDir(): string { + const dir = mkdtempSync(join(tmpdir(), "hivemind-session-queue-")); + tempDirs.push(dir); + return dir; +} + +function makeRow(sessionId: string, seq: number, overrides: Partial = {}): QueuedSessionRow { + const sessionPath = buildSessionPath( + { userName: "alice", orgName: "acme", workspaceId: "default" }, + sessionId, + ); + const timestamp = `2026-01-01T00:00:${String(seq % 60).padStart(2, "0")}Z`; + const line = JSON.stringify({ + id: `event-${seq}`, + session_id: sessionId, + hook_event_name: "PostToolUse", + timestamp, + type: "tool_call", + tool_name: "Read", + content: `row-${seq}`, + }); + + return { + ...buildQueuedSessionRow({ + sessionPath, + line, + userName: "alice", + projectName: "repo", + description: "PostToolUse", + agent: "claude_code", + timestamp, + }), + ...overrides, + }; +} + +function makeApi(queryImpl?: (sql: string) => Promise[]>) { + const api: SessionQueueApi & { + query: ReturnType; + ensureSessionsTable: ReturnType; + } = { + query: vi.fn(queryImpl ?? (async () => [])), + ensureSessionsTable: vi.fn(async () => undefined), + }; + return api; +} + +afterEach(() => { + while (tempDirs.length > 0) { + const dir = tempDirs.pop(); + if (dir) rmSync(dir, { recursive: true, force: true }); + } +}); + +describe("session queue", () => { + it("appends one JSONL line per queued row", () => { + const queueDir = makeQueueDir(); + const row = makeRow("session-append", 1); + + const queuePath = appendQueuedSessionRow(row, queueDir); + const lines = readFileSync(queuePath, "utf-8").trim().split("\n"); + + expect(lines).toHaveLength(1); + expect(JSON.parse(lines[0])).toEqual(row); + }); + + it("builds a multi-row INSERT that preserves JSONB payloads", () => { + const row1 = makeRow("session-sql", 1, { + message: JSON.stringify({ content: "it's", path: "C:\\Users\\alice\\file.ts" }), + }); + const row2 = makeRow("session-sql", 2); + + const sql = buildSessionInsertSql("sessions", [row1, row2]); + + expect(sql.match(/::jsonb/g)).toHaveLength(2); + expect(sql).toContain("it''s"); + expect(sql).toContain('"path":"C:'); + expect(sql).toContain("file.ts"); + expect(sql).toContain("), ("); + }); + + it("wraps malformed messages in a valid JSON object before casting to jsonb", () => { + const row = makeRow("session-sql-fallback", 1, { + message: "{not-json", + }); + + const sql = buildSessionInsertSql("sessions", [row]); + + expect(sql).toContain(`"type":"raw_message"`); + expect(sql).toContain(`"content":"{not-json"`); + expect(sql).toContain("::jsonb"); + }); + + it("rejects empty INSERT batches", () => { + expect(() => buildSessionInsertSql("sessions", [])).toThrow("rows must not be empty"); + }); + + it("returns empty when there is nothing to flush", async () => { + const queueDir = makeQueueDir(); + const api = makeApi(); + + const result = await flushSessionQueue(api, { + sessionId: "session-empty", + sessionsTable: "sessions", + queueDir, + }); + + expect(result).toEqual({ status: "empty", rows: 0, batches: 0 }); + expect(api.query).not.toHaveBeenCalled(); + }); + + it("flushes a queue in chunked multi-row INSERT batches", async () => { + const queueDir = makeQueueDir(); + const api = makeApi(); + + for (let i = 0; i < 51; i++) { + appendQueuedSessionRow(makeRow("session-batch", i), queueDir); + } + + const result = await flushSessionQueue(api, { + sessionId: "session-batch", + sessionsTable: "sessions", + queueDir, + maxBatchRows: 50, + drainAll: true, + }); + + expect(result).toEqual({ status: "flushed", rows: 51, batches: 2 }); + expect(api.query).toHaveBeenCalledTimes(2); + expect(api.ensureSessionsTable).not.toHaveBeenCalled(); + expect(existsSync(join(queueDir, "session-batch.jsonl"))).toBe(false); + expect(existsSync(join(queueDir, "session-batch.inflight"))).toBe(false); + }); + + it("retries once after ensuring the sessions table", async () => { + const queueDir = makeQueueDir(); + appendQueuedSessionRow(makeRow("session-retry", 1), queueDir); + + let attempts = 0; + const api = makeApi(async () => { + attempts += 1; + if (attempts === 1) throw new Error("table sessions does not exist"); + return []; + }); + + const result = await flushSessionQueue(api, { + sessionId: "session-retry", + sessionsTable: "sessions", + queueDir, + }); + + expect(result).toEqual({ status: "flushed", rows: 1, batches: 1 }); + expect(api.ensureSessionsTable).toHaveBeenCalledWith("sessions"); + expect(api.query).toHaveBeenCalledTimes(2); + }); + + it("removes empty queue files without issuing inserts", async () => { + const queueDir = makeQueueDir(); + writeFileSync(join(queueDir, "session-empty-file.jsonl"), ""); + + const api = makeApi(); + const result = await flushSessionQueue(api, { + sessionId: "session-empty-file", + sessionsTable: "sessions", + queueDir, + }); + + expect(result).toEqual({ status: "flushed", rows: 0, batches: 0 }); + expect(api.query).not.toHaveBeenCalled(); + expect(existsSync(join(queueDir, "session-empty-file.inflight"))).toBe(false); + }); + + it("rethrows non-auth ensureSessionsTable failures", async () => { + const queueDir = makeQueueDir(); + appendQueuedSessionRow(makeRow("session-ensure-error", 1), queueDir); + + const api = makeApi(async () => { + throw new Error("table sessions does not exist"); + }); + api.ensureSessionsTable.mockRejectedValueOnce(new Error("dial tcp reset")); + + await expect(flushSessionQueue(api, { + sessionId: "session-ensure-error", + sessionsTable: "sessions", + queueDir, + })).rejects.toThrow("dial tcp reset"); + }); + + it("rethrows non-auth retry failures after ensureSessionsTable succeeds", async () => { + const queueDir = makeQueueDir(); + appendQueuedSessionRow(makeRow("session-retry-error", 1), queueDir); + + let attempts = 0; + const api = makeApi(async () => { + attempts += 1; + if (attempts === 1) throw new Error("table sessions does not exist"); + throw new Error("network blew up"); + }); + + await expect(flushSessionQueue(api, { + sessionId: "session-retry-error", + sessionsTable: "sessions", + queueDir, + })).rejects.toThrow("network blew up"); + expect(api.ensureSessionsTable).toHaveBeenCalledWith("sessions"); + }); + + it("re-queues failed inflight rows back into the queue", async () => { + const queueDir = makeQueueDir(); + appendQueuedSessionRow(makeRow("session-fail", 1), queueDir); + + const api = makeApi(async () => { + appendQueuedSessionRow(makeRow("session-fail", 2), queueDir); + throw new Error("network blew up"); + }); + + await expect(flushSessionQueue(api, { + sessionId: "session-fail", + sessionsTable: "sessions", + queueDir, + })).rejects.toThrow("network blew up"); + + const lines = readFileSync(join(queueDir, "session-fail.jsonl"), "utf-8").trim().split("\n"); + expect(lines).toHaveLength(2); + const messages = lines.map((line) => JSON.parse(line).message); + expect(messages.some((m: string) => m.includes("row-1"))).toBe(true); + expect(messages.some((m: string) => m.includes("row-2"))).toBe(true); + expect(existsSync(join(queueDir, "session-fail.inflight"))).toBe(false); + }); + + it("returns busy while another flusher owns the inflight file", async () => { + const queueDir = makeQueueDir(); + appendQueuedSessionRow(makeRow("session-busy", 1), queueDir); + renameSync( + join(queueDir, "session-busy.jsonl"), + join(queueDir, "session-busy.inflight"), + ); + appendQueuedSessionRow(makeRow("session-busy", 2), queueDir); + + const api = makeApi(); + const result = await flushSessionQueue(api, { + sessionId: "session-busy", + sessionsTable: "sessions", + queueDir, + }); + + expect(result).toEqual({ status: "busy", rows: 0, batches: 0 }); + expect(api.query).not.toHaveBeenCalled(); + }); + + it("waits for inflight ownership to clear before flushing queued rows", async () => { + const queueDir = makeQueueDir(); + appendQueuedSessionRow(makeRow("session-wait", 1), queueDir); + renameSync( + join(queueDir, "session-wait.jsonl"), + join(queueDir, "session-wait.inflight"), + ); + appendQueuedSessionRow(makeRow("session-wait", 2), queueDir); + + setTimeout(() => { + rmSync(join(queueDir, "session-wait.inflight"), { force: true }); + }, 50); + + const api = makeApi(); + const result = await flushSessionQueue(api, { + sessionId: "session-wait", + sessionsTable: "sessions", + queueDir, + waitIfBusyMs: 250, + }); + + expect(result).toEqual({ status: "flushed", rows: 1, batches: 1 }); + expect(api.query).toHaveBeenCalledTimes(1); + expect((api.query.mock.calls[0]?.[0] as string) ?? "").toContain("row-2"); + }); + + it("drains stale inflight files on session start replay", async () => { + const queueDir = makeQueueDir(); + appendQueuedSessionRow(makeRow("session-stale", 1), queueDir); + renameSync( + join(queueDir, "session-stale.jsonl"), + join(queueDir, "session-stale.inflight"), + ); + utimesSync(join(queueDir, "session-stale.inflight"), 0, 0); + + const api = makeApi(); + const result = await drainSessionQueues(api, { + sessionsTable: "sessions", + queueDir, + staleInflightMs: 1, + }); + + expect(result).toEqual({ + queuedSessions: 1, + flushedSessions: 1, + rows: 1, + batches: 1, + }); + expect(api.query).toHaveBeenCalledTimes(1); + expect(existsSync(join(queueDir, "session-stale.inflight"))).toBe(false); + }); + + it("drains queued .jsonl sessions on session start replay", async () => { + const queueDir = makeQueueDir(); + appendQueuedSessionRow(makeRow("session-drain-queued", 1), queueDir); + + const api = makeApi(); + const result = await drainSessionQueues(api, { + sessionsTable: "sessions", + queueDir, + }); + + expect(result).toEqual({ + queuedSessions: 1, + flushedSessions: 1, + rows: 1, + batches: 1, + }); + }); + + it("counts queued sessions even when local auth-disable prevents flushing", async () => { + const queueDir = makeQueueDir(); + appendQueuedSessionRow(makeRow("session-drain-disabled", 1), queueDir); + markSessionWriteDisabled("sessions", "403 Forbidden", queueDir); + + const result = await drainSessionQueues(makeApi(), { + sessionsTable: "sessions", + queueDir, + }); + + expect(result).toEqual({ + queuedSessions: 1, + flushedSessions: 0, + rows: 0, + batches: 0, + }); + }); + + it("marks session writes disabled on auth failures and preserves the queue", async () => { + const queueDir = makeQueueDir(); + appendQueuedSessionRow(makeRow("session-auth", 1), queueDir); + + const api = makeApi(async () => { + throw new Error("Query failed: 403: Forbidden"); + }); + + const result = await flushSessionQueue(api, { + sessionId: "session-auth", + sessionsTable: "sessions", + queueDir, + }); + + expect(result).toEqual({ status: "disabled", rows: 0, batches: 0 }); + expect(api.ensureSessionsTable).not.toHaveBeenCalled(); + expect(isSessionWriteDisabled("sessions", queueDir)).toBe(true); + expect(existsSync(join(queueDir, "session-auth.jsonl"))).toBe(true); + }); + + it("skips flush attempts while session writes are locally disabled", async () => { + const queueDir = makeQueueDir(); + appendQueuedSessionRow(makeRow("session-skip", 1), queueDir); + + const api = makeApi(); + const first = await flushSessionQueue(api, { + sessionId: "session-skip", + sessionsTable: "sessions", + queueDir, + }); + expect(first.status).toBe("flushed"); + + appendQueuedSessionRow(makeRow("session-skip", 2), queueDir); + const failingApi = makeApi(async () => { + throw new Error("403 Forbidden"); + }); + const disabled = await flushSessionQueue(failingApi, { + sessionId: "session-skip", + sessionsTable: "sessions", + queueDir, + }); + expect(disabled.status).toBe("disabled"); + + const skipped = await flushSessionQueue(api, { + sessionId: "session-skip", + sessionsTable: "sessions", + queueDir, + }); + expect(skipped).toEqual({ status: "disabled", rows: 0, batches: 0 }); + expect(api.query).toHaveBeenCalledTimes(1); + + clearSessionWriteDisabled("sessions", queueDir); + }); + + it("returns empty when writes are disabled but no queue files remain", async () => { + const queueDir = makeQueueDir(); + markSessionWriteDisabled("sessions", "403 Forbidden", queueDir); + + const result = await flushSessionQueue(makeApi(), { + sessionId: "session-disabled-empty", + sessionsTable: "sessions", + queueDir, + }); + + expect(result).toEqual({ status: "empty", rows: 0, batches: 0 }); + }); + + it("recovers stale inflight files during a direct flush when allowed", async () => { + const queueDir = makeQueueDir(); + appendQueuedSessionRow(makeRow("session-recover", 1), queueDir); + renameSync( + join(queueDir, "session-recover.jsonl"), + join(queueDir, "session-recover.inflight"), + ); + utimesSync(join(queueDir, "session-recover.inflight"), 0, 0); + + const api = makeApi(); + const result = await flushSessionQueue(api, { + sessionId: "session-recover", + sessionsTable: "sessions", + queueDir, + allowStaleInflight: true, + staleInflightMs: 1, + }); + + expect(result).toEqual({ status: "flushed", rows: 1, batches: 1 }); + expect(api.query).toHaveBeenCalledTimes(1); + }); + + it("recovers stale inflight files after waiting on a busy session", async () => { + const queueDir = makeQueueDir(); + appendQueuedSessionRow(makeRow("session-wait-stale", 1), queueDir); + renameSync( + join(queueDir, "session-wait-stale.jsonl"), + join(queueDir, "session-wait-stale.inflight"), + ); + utimesSync(join(queueDir, "session-wait-stale.inflight"), 0, 0); + + const api = makeApi(); + const result = await flushSessionQueue(api, { + sessionId: "session-wait-stale", + sessionsTable: "sessions", + queueDir, + allowStaleInflight: true, + staleInflightMs: 1, + waitIfBusyMs: 1, + }); + + expect(result).toEqual({ status: "flushed", rows: 1, batches: 1 }); + expect(api.query).toHaveBeenCalledTimes(1); + }); + + it("ignores fresh inflight files during drain replay", async () => { + const queueDir = makeQueueDir(); + appendQueuedSessionRow(makeRow("session-fresh-inflight", 1), queueDir); + renameSync( + join(queueDir, "session-fresh-inflight.jsonl"), + join(queueDir, "session-fresh-inflight.inflight"), + ); + + const result = await drainSessionQueues(makeApi(), { + sessionsTable: "sessions", + queueDir, + staleInflightMs: 60_000, + }); + + expect(result).toEqual({ + queuedSessions: 0, + flushedSessions: 0, + rows: 0, + batches: 0, + }); + expect(existsSync(join(queueDir, "session-fresh-inflight.inflight"))).toBe(true); + }); + + it("removes expired and malformed disabled markers", () => { + const queueDir = makeQueueDir(); + markSessionWriteDisabled("sessions", "403 Forbidden", queueDir); + + expect(isSessionWriteDisabled("sessions", queueDir, 0)).toBe(false); + + const disabledPath = join(queueDir, ".sessions.disabled.json"); + writeFileSync(disabledPath, "{not-json"); + expect(isSessionWriteDisabled("sessions", queueDir)).toBe(false); + expect(existsSync(disabledPath)).toBe(false); + }); + + it("marks writes disabled when ensureSessionsTable fails with auth", async () => { + const queueDir = makeQueueDir(); + appendQueuedSessionRow(makeRow("session-ensure-auth", 1), queueDir); + + const api = makeApi(async () => { + throw new Error("table sessions does not exist"); + }); + api.ensureSessionsTable.mockRejectedValueOnce(new Error("403 Forbidden")); + + const result = await flushSessionQueue(api, { + sessionId: "session-ensure-auth", + sessionsTable: "sessions", + queueDir, + }); + + expect(result).toEqual({ status: "disabled", rows: 0, batches: 0 }); + expect(isSessionWriteDisabled("sessions", queueDir)).toBe(true); + }); + + it("marks writes disabled when the retry after ensure fails with auth", async () => { + const queueDir = makeQueueDir(); + appendQueuedSessionRow(makeRow("session-retry-auth", 1), queueDir); + + let attempts = 0; + const api = makeApi(async () => { + attempts += 1; + if (attempts === 1) throw new Error("table sessions does not exist"); + throw new Error("401 Unauthorized"); + }); + + const result = await flushSessionQueue(api, { + sessionId: "session-retry-auth", + sessionsTable: "sessions", + queueDir, + }); + + expect(result).toEqual({ status: "disabled", rows: 0, batches: 0 }); + expect(api.ensureSessionsTable).toHaveBeenCalledWith("sessions"); + expect(isSessionWriteDisabled("sessions", queueDir)).toBe(true); + }); + + it("treats string auth errors as auth failures and ignores unrelated errors", () => { + expect(isSessionWriteAuthError("401 Unauthorized")).toBe(true); + expect(isSessionWriteAuthError("something else")).toBe(false); + }); + + it("acquires, releases, and reclaims stale drain locks", () => { + const queueDir = makeQueueDir(); + + const release = tryAcquireSessionDrainLock("sessions", queueDir, 60_000); + expect(release).toBeTypeOf("function"); + expect(existsSync(join(queueDir, ".sessions.drain.lock"))).toBe(true); + + expect(tryAcquireSessionDrainLock("sessions", queueDir, 60_000)).toBeNull(); + + utimesSync(join(queueDir, ".sessions.drain.lock"), 0, 0); + const reclaimed = tryAcquireSessionDrainLock("sessions", queueDir, 1); + expect(reclaimed).toBeTypeOf("function"); + + reclaimed?.(); + expect(existsSync(join(queueDir, ".sessions.drain.lock"))).toBe(false); + release?.(); + }); +}); diff --git a/claude-code/tests/sessions-table.test.ts b/claude-code/tests/sessions-table.test.ts index 8c65aa8..40a254f 100644 --- a/claude-code/tests/sessions-table.test.ts +++ b/claude-code/tests/sessions-table.test.ts @@ -77,11 +77,11 @@ function makeClient(memoryRows: Row[] = [], sessionRows: Row[] = []) { // ── Tests ─────────────────────────────────────────────────────────────────── describe("DeeplakeFs — sessions table multi-row read", () => { - it("reads session file by concatenating rows ordered by creation_date", async () => { + it("reads session file by normalizing rows ordered by creation_date", async () => { const sessionRows: Row[] = [ { path: "/sessions/alice/alice_org_default_s1.jsonl", text_content: '{"type":"user_message","content":"hello"}', size_bytes: 40, mime_type: "application/json", creation_date: "2026-01-01T00:00:01Z" }, - { path: "/sessions/alice/alice_org_default_s1.jsonl", text_content: '{"type":"tool_call","tool_name":"Read"}', size_bytes: 38, mime_type: "application/json", creation_date: "2026-01-01T00:00:02Z" }, - { path: "/sessions/alice/alice_org_default_s1.jsonl", text_content: '{"type":"assistant_message","content":"done"}', size_bytes: 44, mime_type: "application/json", creation_date: "2026-01-01T00:00:03Z" }, + { path: "/sessions/alice/alice_org_default_s1.jsonl", text_content: '{"type":"assistant_message","content":"done"}', size_bytes: 44, mime_type: "application/json", creation_date: "2026-01-01T00:00:02Z" }, + { path: "/sessions/alice/alice_org_default_s1.jsonl", text_content: '{"type":"user_message","content":"bye"}', size_bytes: 42, mime_type: "application/json", creation_date: "2026-01-01T00:00:03Z" }, ]; const client = makeClient([], sessionRows); @@ -90,9 +90,9 @@ describe("DeeplakeFs — sessions table multi-row read", () => { const content = await fs.readFile("/sessions/alice/alice_org_default_s1.jsonl"); const lines = content.split("\n"); expect(lines).toHaveLength(3); - expect(JSON.parse(lines[0]).type).toBe("user_message"); - expect(JSON.parse(lines[1]).type).toBe("tool_call"); - expect(JSON.parse(lines[2]).type).toBe("assistant_message"); + expect(lines[0]).toBe("[user] hello"); + expect(lines[1]).toBe("[assistant] done"); + expect(lines[2]).toBe("[user] bye"); }); it("preserves creation_date ordering even if inserted out of order", async () => { @@ -121,9 +121,7 @@ describe("DeeplakeFs — sessions table multi-row read", () => { const fs = await DeeplakeFs.create(client as never, "memory", "/", "sessions"); const content = await fs.readFile("/sessions/u/s1.jsonl"); - const parsed = JSON.parse(content); - expect(parsed.type).toBe("user_message"); - expect(parsed.content).toBe("hi"); + expect(content).toBe("[user] hi"); }); it("lists session files in directory listing", async () => { diff --git a/claude-code/tests/shell-bundle-sql-trace-silence.test.ts b/claude-code/tests/shell-bundle-sql-trace-silence.test.ts new file mode 100644 index 0000000..2c55dd7 --- /dev/null +++ b/claude-code/tests/shell-bundle-sql-trace-silence.test.ts @@ -0,0 +1,86 @@ +/** + * Bundle-level regression guard for fix #3 — the shell bundle invoked by the + * pre-tool-use hook as `node shell-bundle -c "..."` must not leak + * `[deeplake-sql]` trace output onto stderr. Claude Code's Bash tool merges + * the child process's stderr into the tool_result string the model sees, so + * any trace line shows up as noise in Claude's view of the command output + * (observed in the original `baseline_cloud-100` transcripts, where 35+ + * lines of `[deeplake-sql]` noise polluted bash command results). + * + * The fix has two parts: + * 1. `traceSql` reads the HIVEMIND_TRACE_SQL / HIVEMIND_DEBUG env vars at + * call time (not at module load), so callers can turn tracing off after + * importing the SDK. + * 2. The shell bundle's one-shot entry point (`node ... -c "cmd"`) deletes + * those env vars before opening any SQL connection. + * + * This test spawns the shipped shell bundle with the trace vars set + * explicitly, runs a trivial command that's guaranteed not to touch the + * network (we point the SDK at an unreachable URL and expect the command to + * fail fast), and asserts that the combined stderr output contains zero + * `[deeplake-sql]` lines. If either fix is reverted, stderr fills with the + * trace messages and the test fails. + */ + +import { describe, expect, it } from "vitest"; +import { spawnSync } from "node:child_process"; +import { existsSync } from "node:fs"; +import { join, dirname } from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const BUNDLE_PATH = join(__dirname, "..", "bundle", "shell", "deeplake-shell.js"); + +describe("shell bundle one-shot: SQL trace silence (fix #3)", () => { + it("does not write [deeplake-sql] to stderr even when trace env vars are set", () => { + if (!existsSync(BUNDLE_PATH)) { + throw new Error(`shell bundle missing at ${BUNDLE_PATH} — run 'npm run build' first`); + } + + // Drive the bundle through a path that DEFINITELY calls DeeplakeApi.query() + // (so traceSql fires). Fake creds are good enough — the API call will fail + // fast against an unreachable host, and if the trace silencer regresses, + // the first `[deeplake-sql] query start:` line hits stderr before the + // failure. Point at 127.0.0.1:1 (closed port) with a 200ms timeout so the + // test finishes in well under a second. + const cleanEnv: NodeJS.ProcessEnv = { + PATH: process.env.PATH, + HIVEMIND_TOKEN: "fake-token-for-trace-test", + HIVEMIND_ORG_ID: "fake-org", + HIVEMIND_WORKSPACE_ID: "fake-ws", + HIVEMIND_API_URL: "http://127.0.0.1:1", + HIVEMIND_QUERY_TIMEOUT_MS: "200", + // Pre-silenced env: our fix must keep these from leaking stderr. + HIVEMIND_TRACE_SQL: "1", + DEEPLAKE_TRACE_SQL: "1", + HIVEMIND_DEBUG: "1", + DEEPLAKE_DEBUG: "1", + }; + + const result = spawnSync(process.execPath, [BUNDLE_PATH, "-c", "echo hello"], { + env: cleanEnv, + encoding: "utf-8", + timeout: 15_000, + }); + + const combined = `${result.stdout ?? ""}\n${result.stderr ?? ""}`; + // With the one-shot silencer in place there must be zero SQL trace lines, + // even though the bundle issued SQL queries (that then failed against the + // unreachable host). If the fix regresses, expect lines like: + // "[deeplake-sql] query start: SELECT path, size_bytes ..." + expect(combined).not.toContain("[deeplake-sql]"); + }, 20_000); + + it("keeps interactive mode tracing available (env vars not deleted outside one-shot)", () => { + // Sanity check that the one-shot silencing is scoped: traceSql source + // still honours the env vars, so interactive usage (no -c) with + // HIVEMIND_TRACE_SQL=1 would still emit trace lines. We can't easily + // spawn the REPL here, so we just verify the condition in source — this + // guards against an over-eager fix that silences tracing globally. + const { readFileSync } = require("node:fs"); + const apiSource = readFileSync(join(__dirname, "..", "..", "src", "deeplake-api.ts"), "utf-8"); + expect(apiSource).toMatch(/function traceSql\([^)]*\): void \{[\s\S]*process\.env\.HIVEMIND_TRACE_SQL/); + // Ensure the env read is inside the function (runtime), not a top-level const. + expect(apiSource).not.toMatch(/^const TRACE_SQL =/m); + }); +}); diff --git a/claude-code/tests/version-check.test.ts b/claude-code/tests/version-check.test.ts new file mode 100644 index 0000000..4d01aad --- /dev/null +++ b/claude-code/tests/version-check.test.ts @@ -0,0 +1,227 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { mkdirSync, rmSync, writeFileSync } from "node:fs"; +import { dirname, join } from "node:path"; +import { tmpdir } from "node:os"; +import { + getInstalledVersion, + getLatestVersionCached, + isNewer, + readFreshCachedLatestVersion, + readVersionCache, + writeVersionCache, +} from "../../src/hooks/version-check.js"; + +describe("version-check utilities", () => { + it("compares semantic versions", () => { + expect(isNewer("0.7.0", "0.6.37")).toBe(true); + expect(isNewer("0.6.37", "0.6.37")).toBe(false); + expect(isNewer("0.6.36", "0.6.37")).toBe(false); + }); +}); + +describe("getInstalledVersion", () => { + let root: string; + + beforeEach(() => { + root = join(tmpdir(), `hivemind-version-${Date.now()}-${Math.random().toString(36).slice(2)}`); + mkdirSync(root, { recursive: true }); + }); + + afterEach(() => { + rmSync(root, { recursive: true, force: true }); + }); + + it("prefers plugin manifest when present", () => { + const bundleDir = join(root, "claude-code", "bundle"); + mkdirSync(join(root, "claude-code", ".claude-plugin"), { recursive: true }); + mkdirSync(bundleDir, { recursive: true }); + writeFileSync(join(root, "claude-code", ".claude-plugin", "plugin.json"), JSON.stringify({ version: "0.6.37" })); + writeFileSync(join(root, "package.json"), JSON.stringify({ name: "hivemind", version: "0.1.0" })); + + expect(getInstalledVersion(bundleDir, ".claude-plugin")).toBe("0.6.37"); + }); + + it("falls back to package.json when plugin manifest has no version", () => { + const bundleDir = join(root, "claude-code", "bundle"); + mkdirSync(join(root, "claude-code", ".claude-plugin"), { recursive: true }); + mkdirSync(bundleDir, { recursive: true }); + writeFileSync(join(root, "claude-code", ".claude-plugin", "plugin.json"), JSON.stringify({ name: "hivemind" })); + writeFileSync(join(root, "package.json"), JSON.stringify({ name: "hivemind", version: "0.6.41" })); + + expect(getInstalledVersion(bundleDir, ".claude-plugin")).toBe("0.6.41"); + }); + + it("walks up to package.json when plugin manifest is absent", () => { + const bundleDir = join(root, "codex", "bundle"); + mkdirSync(bundleDir, { recursive: true }); + writeFileSync(join(root, "package.json"), JSON.stringify({ name: "hivemind-codex", version: "0.6.40" })); + + expect(getInstalledVersion(bundleDir, ".codex-plugin")).toBe("0.6.40"); + }); + + it("returns null when neither plugin.json nor a matching package.json exists", () => { + const bundleDir = join(root, "bundle"); + mkdirSync(bundleDir, { recursive: true }); + writeFileSync(join(root, "package.json"), JSON.stringify({ name: "other-package", version: "1.0.0" })); + + expect(getInstalledVersion(bundleDir, ".claude-plugin")).toBeNull(); + }); + + it("returns null when the plugin manifest is invalid json and no package matches", () => { + const bundleDir = join(root, "claude-code", "bundle"); + mkdirSync(join(root, "claude-code", ".claude-plugin"), { recursive: true }); + mkdirSync(bundleDir, { recursive: true }); + writeFileSync(join(root, "claude-code", ".claude-plugin", "plugin.json"), "{bad-json"); + + expect(getInstalledVersion(bundleDir, ".claude-plugin")).toBeNull(); + }); +}); + +describe("version cache", () => { + let cachePath: string; + + beforeEach(() => { + cachePath = join(tmpdir(), `hivemind-cache-${Date.now()}-${Math.random().toString(36).slice(2)}`, "version.json"); + mkdirSync(dirname(cachePath), { recursive: true }); + }); + + afterEach(() => { + rmSync(dirname(cachePath), { recursive: true, force: true }); + vi.restoreAllMocks(); + }); + + it("reads and writes cache entries", () => { + writeVersionCache({ checkedAt: 123, latest: "0.6.38", url: "https://example.com/pkg.json" }, cachePath); + expect(readVersionCache(cachePath)).toEqual({ + checkedAt: 123, + latest: "0.6.38", + url: "https://example.com/pkg.json", + }); + }); + + it("returns fresh cached version within ttl", () => { + writeVersionCache({ checkedAt: 1_000, latest: "0.6.38", url: "https://example.com/pkg.json" }, cachePath); + expect(readFreshCachedLatestVersion("https://example.com/pkg.json", 500, cachePath, 1_400)).toBe("0.6.38"); + expect(readFreshCachedLatestVersion("https://example.com/pkg.json", 500, cachePath, 1_500)).toBe("0.6.38"); + expect(readFreshCachedLatestVersion("https://example.com/pkg.json", 500, cachePath, 1_600)).toBeUndefined(); + }); + + it("returns null for invalid cache files and url mismatches", () => { + writeFileSync(cachePath, JSON.stringify({ checkedAt: "bad", latest: 42, url: 123 })); + expect(readVersionCache(cachePath)).toBeNull(); + expect(readFreshCachedLatestVersion("https://other.example.com/pkg.json", 500, cachePath, 1_200)).toBeUndefined(); + }); + + it("uses cached value without fetching when cache is fresh", async () => { + writeVersionCache({ checkedAt: 1_000, latest: "0.6.38", url: "https://example.com/pkg.json" }, cachePath); + const fetchImpl = vi.fn(); + + const latest = await getLatestVersionCached({ + url: "https://example.com/pkg.json", + timeoutMs: 3000, + ttlMs: 500, + cachePath, + nowMs: 1_400, + fetchImpl: fetchImpl as unknown as typeof fetch, + }); + + expect(latest).toBe("0.6.38"); + expect(fetchImpl).not.toHaveBeenCalled(); + }); + + it("fetches and caches when cache is stale", async () => { + writeVersionCache({ checkedAt: 1_000, latest: "0.6.38", url: "https://example.com/pkg.json" }, cachePath); + const fetchImpl = vi.fn(async () => ({ + ok: true, + json: async () => ({ version: "0.6.40" }), + })); + + const latest = await getLatestVersionCached({ + url: "https://example.com/pkg.json", + timeoutMs: 3000, + ttlMs: 100, + cachePath, + nowMs: 2_000, + fetchImpl: fetchImpl as unknown as typeof fetch, + }); + + expect(latest).toBe("0.6.40"); + expect(fetchImpl).toHaveBeenCalledTimes(1); + expect(readVersionCache(cachePath)?.latest).toBe("0.6.40"); + }); + + it("writes null when a successful fetch returns no version field", async () => { + const fetchImpl = vi.fn(async () => ({ + ok: true, + json: async () => ({ name: "hivemind" }), + })); + + const latest = await getLatestVersionCached({ + url: "https://example.com/pkg.json", + timeoutMs: 3000, + cachePath, + nowMs: 2_000, + fetchImpl: fetchImpl as unknown as typeof fetch, + }); + + expect(latest).toBeNull(); + expect(readVersionCache(cachePath)?.latest).toBeNull(); + }); + + it("falls back to stale cached value on non-ok fetch responses", async () => { + writeVersionCache({ checkedAt: 1_000, latest: "0.6.38", url: "https://example.com/pkg.json" }, cachePath); + const fetchImpl = vi.fn(async () => ({ + ok: false, + json: async () => ({ version: "0.6.40" }), + })); + + const latest = await getLatestVersionCached({ + url: "https://example.com/pkg.json", + timeoutMs: 3000, + ttlMs: 100, + cachePath, + nowMs: 2_000, + fetchImpl: fetchImpl as unknown as typeof fetch, + }); + + expect(latest).toBe("0.6.38"); + expect(readVersionCache(cachePath)?.latest).toBe("0.6.38"); + }); + + it("reuses stale cached value on fetch failure and refreshes checkedAt", async () => { + writeVersionCache({ checkedAt: 1_000, latest: "0.6.38", url: "https://example.com/pkg.json" }, cachePath); + const fetchImpl = vi.fn(async () => { throw new Error("network down"); }); + + const latest = await getLatestVersionCached({ + url: "https://example.com/pkg.json", + timeoutMs: 3000, + ttlMs: 100, + cachePath, + nowMs: 2_000, + fetchImpl: fetchImpl as unknown as typeof fetch, + }); + + expect(latest).toBe("0.6.38"); + expect(readVersionCache(cachePath)?.checkedAt).toBe(2_000); + }); + + it("returns null and still writes cache state when fetch fails without stale cache", async () => { + const fetchImpl = vi.fn(async () => { throw new Error("network down"); }); + + const latest = await getLatestVersionCached({ + url: "https://example.com/pkg.json", + timeoutMs: 3000, + ttlMs: 100, + cachePath, + nowMs: 2_000, + fetchImpl: fetchImpl as unknown as typeof fetch, + }); + + expect(latest).toBeNull(); + expect(readVersionCache(cachePath)).toEqual({ + checkedAt: 2_000, + latest: null, + url: "https://example.com/pkg.json", + }); + }); +}); diff --git a/claude-code/tests/virtual-table-query.test.ts b/claude-code/tests/virtual-table-query.test.ts new file mode 100644 index 0000000..013c6c0 --- /dev/null +++ b/claude-code/tests/virtual-table-query.test.ts @@ -0,0 +1,359 @@ +import { describe, expect, it, vi } from "vitest"; +import { + buildVirtualIndexContent, + findVirtualPaths, + listVirtualPathRowsForDirs, + listVirtualPathRows, + readVirtualPathContents, + readVirtualPathContent, +} from "../../src/hooks/virtual-table-query.js"; + +describe("virtual-table-query", () => { + it("builds a synthetic virtual index", () => { + const content = buildVirtualIndexContent([ + { + path: "/summaries/alice/s1.md", + project: "repo", + description: "session summary", + creation_date: "2026-01-01T00:00:00.000Z", + }, + ]); + expect(content).toContain("# Memory Index"); + expect(content).toContain("/summaries/alice/s1.md"); + }); + + it("builds index rows when project metadata is missing", () => { + const content = buildVirtualIndexContent([ + { + path: "/summaries/alice/s2.md", + }, + ]); + expect(content).toContain("/summaries/alice/s2.md"); + expect(content).toContain("# Memory Index"); + }); + + it("prefers a memory-table hit for exact path reads", async () => { + const api = { + query: vi.fn().mockResolvedValueOnce([ + { path: "/summaries/a.md", content: "summary body", source_order: 0 }, + ]), + } as any; + + const content = await readVirtualPathContent(api, "memory", "sessions", "/summaries/a.md"); + + expect(content).toBe("summary body"); + expect(api.query).toHaveBeenCalledTimes(1); + }); + + it("returns an empty map when no virtual paths are requested", async () => { + const api = { query: vi.fn() } as any; + + const content = await readVirtualPathContents(api, "memory", "sessions", []); + + expect(content).toEqual(new Map()); + expect(api.query).not.toHaveBeenCalled(); + }); + + it("normalizes session rows for exact path reads", async () => { + const api = { + query: vi.fn().mockResolvedValueOnce([ + { path: "/sessions/a.jsonl", content: "{\"type\":\"user_message\",\"content\":\"hello\"}", source_order: 1 }, + { path: "/sessions/a.jsonl", content: "{\"type\":\"assistant_message\",\"content\":\"hi\"}", source_order: 1 }, + ]), + } as any; + + const content = await readVirtualPathContent(api, "memory", "sessions", "/sessions/a.jsonl"); + + expect(content).toBe("[user] hello\n[assistant] hi"); + }); + + it("reads multiple exact paths in a single query and synthesizes /index.md when needed", async () => { + const api = { + query: vi.fn() + .mockResolvedValueOnce([ + { path: "/summaries/a.md", content: "summary body", source_order: 0 }, + ]) + .mockResolvedValueOnce([ + { + path: "/summaries/alice/s1.md", + project: "repo", + description: "session summary", + creation_date: "2026-01-01T00:00:00.000Z", + }, + ]) + .mockResolvedValueOnce([]), + } as any; + + const content = await readVirtualPathContents(api, "memory", "sessions", ["/summaries/a.md", "/index.md"]); + + expect(content.get("/summaries/a.md")).toBe("summary body"); + expect(content.get("/index.md")).toContain("# Memory Index"); + // 1 union query for exact paths + 2 parallel fallback queries (summaries + sessions) for /index.md + expect(api.query).toHaveBeenCalledTimes(3); + }); + + it("ignores invalid exact-read rows before merging content", async () => { + const api = { + query: vi.fn().mockResolvedValueOnce([ + { path: 42, content: "bad", source_order: 0 }, + { path: "/summaries/a.md", content: 7, source_order: 0 }, + { path: "/summaries/a.md", content: "summary body", source_order: 0 }, + ]), + } as any; + + const content = await readVirtualPathContents(api, "memory", "sessions", ["/summaries/a.md"]); + + expect(content.get("/summaries/a.md")).toBe("summary body"); + }); + + it("merges and de-duplicates rows for directory listings", async () => { + const api = { + query: vi.fn().mockResolvedValueOnce([ + { path: "/summaries/a.md", size_bytes: 10, source_order: 0 }, + { path: "/shared.md", size_bytes: 11, source_order: 0 }, + { path: "/sessions/a.jsonl", size_bytes: 12, source_order: 1 }, + { path: "/shared.md", size_bytes: 13, source_order: 1 }, + ]), + } as any; + + const rows = await listVirtualPathRows(api, "memory", "sessions", "/"); + + expect(rows).toEqual([ + { path: "/summaries/a.md", size_bytes: 10 }, + { path: "/shared.md", size_bytes: 11 }, + { path: "/sessions/a.jsonl", size_bytes: 12 }, + ]); + }); + + it("batches directory listing rows for multiple directories", async () => { + const api = { + query: vi.fn().mockResolvedValueOnce([ + { path: "/summaries/a/file1.md", size_bytes: 10, source_order: 0 }, + { path: "/summaries/b/file2.md", size_bytes: 20, source_order: 0 }, + ]), + } as any; + + const rows = await listVirtualPathRowsForDirs(api, "memory", "sessions", ["/summaries/a", "/summaries/b"]); + + expect(rows.get("/summaries/a")).toEqual([{ path: "/summaries/a/file1.md", size_bytes: 10 }]); + expect(rows.get("/summaries/b")).toEqual([{ path: "/summaries/b/file2.md", size_bytes: 20 }]); + expect(api.query).toHaveBeenCalledTimes(1); + }); + + it("lists root directories without adding a path filter and ignores invalid row paths", async () => { + const api = { + query: vi.fn().mockResolvedValueOnce([ + { path: "/summaries/a/file1.md", size_bytes: 10, source_order: 0 }, + { path: 42, size_bytes: 20, source_order: 0 }, + ]), + } as any; + + const rows = await listVirtualPathRowsForDirs(api, "memory", "sessions", ["/"]); + + expect(rows.get("/")).toEqual([{ path: "/summaries/a/file1.md", size_bytes: 10 }]); + expect((api.query.mock.calls[0]?.[0] as string) ?? "").not.toContain("WHERE path LIKE"); + }); + + it("merges and de-duplicates path search results", async () => { + const api = { + query: vi.fn().mockResolvedValueOnce([ + { path: "/summaries/a.md", source_order: 0 }, + { path: "/shared.md", source_order: 0 }, + { path: "/sessions/a.jsonl", source_order: 1 }, + { path: "/shared.md", source_order: 1 }, + ]), + } as any; + + const paths = await findVirtualPaths(api, "memory", "sessions", "/", "%.md"); + + expect(paths).toEqual(["/summaries/a.md", "/shared.md", "/sessions/a.jsonl"]); + }); + + it("falls back to per-table queries when the union query fails", async () => { + const api = { + query: vi.fn() + .mockRejectedValueOnce(new Error("bad union")) + .mockResolvedValueOnce([{ path: "/summaries/a.md", content: "summary body", source_order: 0 }]) + .mockResolvedValueOnce([]), + } as any; + + const content = await readVirtualPathContent(api, "memory", "sessions", "/summaries/a.md"); + + expect(content).toBe("summary body"); + expect(api.query).toHaveBeenCalledTimes(3); + }); + + it("returns null when union and fallback queries all fail", async () => { + const api = { + query: vi.fn() + .mockRejectedValueOnce(new Error("bad union")) + .mockRejectedValueOnce(new Error("memory down")) + .mockRejectedValueOnce(new Error("sessions down")), + } as any; + + const content = await readVirtualPathContent(api, "memory", "sessions", "/summaries/a.md"); + + expect(content).toBeNull(); + expect(api.query).toHaveBeenCalledTimes(3); + }); + + it("filters invalid paths from find results", async () => { + const api = { + query: vi.fn().mockResolvedValueOnce([ + { path: "/summaries/a.md", source_order: 0 }, + { path: "", source_order: 0 }, + { path: 123, source_order: 1 }, + ]), + } as any; + + const paths = await findVirtualPaths(api, "memory", "sessions", "/", "%.md"); + + expect(paths).toEqual(["/summaries/a.md"]); + }); + + it("normalizes non-root find directories before building the LIKE path", async () => { + const api = { + query: vi.fn().mockResolvedValueOnce([]), + } as any; + + await findVirtualPaths(api, "memory", "sessions", "/summaries/a///", "%.md"); + + expect(String(api.query.mock.calls[0]?.[0])).toContain("path LIKE '/summaries/a/%'"); + }); + + // ── Regression coverage: /index.md must list session files too ─────────── + // + // Bug: in workspaces where the `memory` table is empty or dropped (e.g. the + // sessions-only `locomo_benchmark/baseline` workspace), the synthesized + // /index.md used to report "0 sessions:" and list nothing, even when the + // `sessions` table held hundreds of rows. Agents reading that index + // concluded memory was empty and gave up on retrieval. + + describe("buildVirtualIndexContent: sessions + summaries", () => { + it("renders both sections with a combined header when both tables have rows", () => { + const content = buildVirtualIndexContent( + [ + { + path: "/summaries/alice/s1.md", + project: "repo", + description: "summary one", + creation_date: "2026-01-01T00:00:00.000Z", + }, + ], + [ + { path: "/sessions/conv_0_session_1.json", description: "session one" }, + { path: "/sessions/conv_0_session_2.json", description: "session two" }, + ], + ); + + expect(content).toContain("3 entries (1 summaries, 2 sessions):"); + expect(content).toContain("## Summaries"); + expect(content).toContain("## Sessions"); + expect(content).toContain("/summaries/alice/s1.md"); + expect(content).toContain("/sessions/conv_0_session_1.json"); + expect(content).toContain("/sessions/conv_0_session_2.json"); + // Summaries section comes before Sessions section + expect(content.indexOf("## Summaries")).toBeLessThan(content.indexOf("## Sessions")); + }); + + it("renders only sessions when the memory table is empty (the baseline_cloud regression)", () => { + const content = buildVirtualIndexContent( + [], + [ + { path: "/sessions/conv_0_session_1.json", description: "first" }, + { path: "/sessions/conv_0_session_2.json", description: "second" }, + ], + ); + + expect(content).toContain("2 entries (0 summaries, 2 sessions):"); + expect(content).toContain("## Sessions"); + expect(content).not.toContain("## Summaries"); + expect(content).toContain("/sessions/conv_0_session_1.json"); + // Guard against the old bug: must not report "0 sessions:" as the total. + expect(content).not.toMatch(/\n0 sessions:/); + }); + + it("stays backwards-compatible when called with only summary rows", () => { + const content = buildVirtualIndexContent([ + { + path: "/summaries/alice/s1.md", + project: "repo", + description: "summary only", + creation_date: "2026-01-01T00:00:00.000Z", + }, + ]); + + expect(content).toContain("1 entries (1 summaries, 0 sessions):"); + expect(content).toContain("/summaries/alice/s1.md"); + expect(content).not.toContain("## Sessions"); + }); + + it("produces a well-formed empty index when both tables are empty", () => { + const content = buildVirtualIndexContent([], []); + expect(content).toContain("# Memory Index"); + expect(content).toContain("0 entries (0 summaries, 0 sessions):"); + expect(content).not.toContain("## Summaries"); + expect(content).not.toContain("## Sessions"); + }); + }); + + describe("readVirtualPathContents: /index.md fallback queries both tables", () => { + it("queries both memory and sessions tables in parallel when /index.md has no physical row", async () => { + const api = { + query: vi.fn() + // 1. Union query for the exact-path read (no /index.md row present) + .mockResolvedValueOnce([]) + // 2. Parallel fallback: summaries from memory (empty — baseline_cloud case) + .mockResolvedValueOnce([]) + // 3. Parallel fallback: sessions table (272 rows) + .mockResolvedValueOnce([ + { path: "/sessions/conv_0_session_1.json", description: "conv 0 sess 1" }, + { path: "/sessions/conv_0_session_2.json", description: "conv 0 sess 2" }, + ]), + } as any; + + const result = await readVirtualPathContents(api, "memory", "sessions", ["/index.md"]); + const indexContent = result.get("/index.md") ?? ""; + + expect(api.query).toHaveBeenCalledTimes(3); + + const fallbackSqls = [ + String(api.query.mock.calls[1]?.[0] ?? ""), + String(api.query.mock.calls[2]?.[0] ?? ""), + ]; + const summarySql = fallbackSqls.find(sql => sql.includes("/summaries/%")) ?? ""; + const sessionsSql = fallbackSqls.find(sql => sql.includes("/sessions/%")) ?? ""; + + expect(summarySql).toContain('FROM "memory"'); + expect(summarySql).toContain("path LIKE '/summaries/%'"); + expect(sessionsSql).toContain('FROM "sessions"'); + expect(sessionsSql).toContain("path LIKE '/sessions/%'"); + + expect(indexContent).toContain("2 entries (0 summaries, 2 sessions):"); + expect(indexContent).toContain("/sessions/conv_0_session_1.json"); + expect(indexContent).toContain("/sessions/conv_0_session_2.json"); + }); + + it("still produces an index when the sessions-table fallback query fails", async () => { + const api = { + query: vi.fn() + .mockResolvedValueOnce([]) // union query for exact paths + .mockResolvedValueOnce([ + { + path: "/summaries/alice/s1.md", + project: "repo", + description: "summary", + creation_date: "2026-01-01T00:00:00.000Z", + }, + ]) + .mockRejectedValueOnce(new Error("sessions table down")), + } as any; + + const result = await readVirtualPathContents(api, "memory", "sessions", ["/index.md"]); + const indexContent = result.get("/index.md") ?? ""; + + expect(indexContent).toContain("1 entries (1 summaries, 0 sessions):"); + expect(indexContent).toContain("/summaries/alice/s1.md"); + }); + }); +}); diff --git a/codex/bundle/capture.js b/codex/bundle/capture.js index b449e10..67b7919 100755 --- a/codex/bundle/capture.js +++ b/codex/bundle/capture.js @@ -55,6 +55,9 @@ function loadConfig() { // dist/src/deeplake-api.js import { randomUUID } from "node:crypto"; +import { existsSync as existsSync2, mkdirSync, readFileSync as readFileSync2, writeFileSync } from "node:fs"; +import { join as join3 } from "node:path"; +import { tmpdir } from "node:os"; // dist/src/utils/debug.js import { appendFileSync } from "node:fs"; @@ -79,27 +82,48 @@ function sqlStr(value) { // dist/src/deeplake-api.js var log2 = (msg) => log("sdk", msg); -var TRACE_SQL = (process.env.HIVEMIND_TRACE_SQL ?? process.env.DEEPLAKE_TRACE_SQL) === "1" || (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; -var DEBUG_FILE_LOG = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; function summarizeSql(sql, maxLen = 220) { const compact = sql.replace(/\s+/g, " ").trim(); return compact.length > maxLen ? `${compact.slice(0, maxLen)}...` : compact; } function traceSql(msg) { - if (!TRACE_SQL) + const traceEnabled = (process.env.HIVEMIND_TRACE_SQL ?? process.env.DEEPLAKE_TRACE_SQL) === "1" || (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; + if (!traceEnabled) return; process.stderr.write(`[deeplake-sql] ${msg} `); - if (DEBUG_FILE_LOG) + const debugFileLog = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; + if (debugFileLog) log2(msg); } var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; var MAX_CONCURRENCY = 5; +var QUERY_TIMEOUT_MS = Number(process.env["HIVEMIND_QUERY_TIMEOUT_MS"] ?? process.env["DEEPLAKE_QUERY_TIMEOUT_MS"] ?? 1e4); +var INDEX_MARKER_TTL_MS = Number(process.env["HIVEMIND_INDEX_MARKER_TTL_MS"] ?? 6 * 60 * 6e4); function sleep(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } +function isTimeoutError(error) { + const name = error instanceof Error ? error.name.toLowerCase() : ""; + const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase(); + return name.includes("timeout") || name === "aborterror" || message.includes("timeout") || message.includes("timed out"); +} +function isDuplicateIndexError(error) { + const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase(); + return message.includes("duplicate key value violates unique constraint") || message.includes("pg_class_relname_nsp_index") || message.includes("already exists"); +} +function isSessionInsertQuery(sql) { + return /^\s*insert\s+into\s+"[^"]+"\s*\(\s*id\s*,\s*path\s*,\s*filename\s*,\s*message\s*,/i.test(sql); +} +function isTransientHtml403(text) { + const body = text.toLowerCase(); + return body.includes(" Object.fromEntries(raw.columns.map((col, i) => [col, row[i]]))); } const text = await resp.text().catch(() => ""); - if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { + const retryable403 = isSessionInsertQuery(sql) && (resp.status === 401 || resp.status === 403 && (text.length === 0 || isTransientHtml403(text))); + if (attempt < MAX_RETRIES && (RETRYABLE_CODES.has(resp.status) || retryable403)) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`); await sleep(delay); @@ -252,8 +284,61 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + buildLookupIndexName(table, suffix) { + return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); + } + getLookupIndexMarkerPath(table, suffix) { + const markerKey = [ + this.workspaceId, + this.orgId, + table, + suffix + ].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_"); + return join3(getIndexMarkerDir(), `${markerKey}.json`); + } + hasFreshLookupIndexMarker(table, suffix) { + const markerPath = this.getLookupIndexMarkerPath(table, suffix); + if (!existsSync2(markerPath)) + return false; + try { + const raw = JSON.parse(readFileSync2(markerPath, "utf-8")); + const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN; + if (!Number.isFinite(updatedAt) || Date.now() - updatedAt > INDEX_MARKER_TTL_MS) + return false; + return true; + } catch { + return false; + } + } + markLookupIndexReady(table, suffix) { + mkdirSync(getIndexMarkerDir(), { recursive: true }); + writeFileSync(this.getLookupIndexMarkerPath(table, suffix), JSON.stringify({ updatedAt: (/* @__PURE__ */ new Date()).toISOString() }), "utf-8"); + } + async ensureLookupIndex(table, suffix, columnsSql) { + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + const indexName = this.buildLookupIndexName(table, suffix); + try { + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" ${columnsSql}`); + this.markLookupIndexReady(table, suffix); + } catch (e) { + if (isDuplicateIndexError(e)) { + this.markLookupIndexReady(table, suffix); + return; + } + log2(`index "${indexName}" skipped: ${e.message}`); + } + } /** List all tables in the workspace (with retry). */ - async listTables() { + async listTables(forceRefresh = false) { + if (!forceRefresh && this._tablesCache) + return [...this._tablesCache]; + const { tables, cacheable } = await this._fetchTables(); + if (cacheable) + this._tablesCache = [...tables]; + return tables; + } + async _fetchTables() { for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { try { const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, { @@ -264,22 +349,25 @@ var DeeplakeApi = class { }); if (resp.ok) { const data = await resp.json(); - return (data.tables ?? []).map((t) => t.table_name); + return { + tables: (data.tables ?? []).map((t) => t.table_name), + cacheable: true + }; } if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { await sleep(BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200); continue; } - return []; + return { tables: [], cacheable: false }; } catch { if (attempt < MAX_RETRIES) { await sleep(BASE_DELAY_MS * Math.pow(2, attempt)); continue; } - return []; + return { tables: [], cacheable: false }; } } - return []; + return { tables: [], cacheable: false }; } /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */ async ensureTable(name) { @@ -289,6 +377,8 @@ var DeeplakeApi = class { log2(`table "${tbl}" not found, creating`); await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); + if (!tables.includes(tbl)) + this._tablesCache = [...tables, tbl]; } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ @@ -298,7 +388,10 @@ var DeeplakeApi = class { log2(`table "${name}" not found, creating`); await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${name}" created`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; } + await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); } }; @@ -309,37 +402,37 @@ function buildSessionPath(config, sessionId) { } // dist/src/hooks/summary-state.js -import { readFileSync as readFileSync2, writeFileSync, writeSync, mkdirSync, renameSync, existsSync as existsSync2, unlinkSync, openSync, closeSync } from "node:fs"; +import { readFileSync as readFileSync3, writeFileSync as writeFileSync2, writeSync, mkdirSync as mkdirSync2, renameSync, existsSync as existsSync3, unlinkSync, openSync, closeSync } from "node:fs"; import { homedir as homedir3 } from "node:os"; -import { join as join3 } from "node:path"; +import { join as join4 } from "node:path"; var dlog = (msg) => log("summary-state", msg); -var STATE_DIR = join3(homedir3(), ".claude", "hooks", "summary-state"); +var STATE_DIR = join4(homedir3(), ".claude", "hooks", "summary-state"); var YIELD_BUF = new Int32Array(new SharedArrayBuffer(4)); function statePath(sessionId) { - return join3(STATE_DIR, `${sessionId}.json`); + return join4(STATE_DIR, `${sessionId}.json`); } function lockPath(sessionId) { - return join3(STATE_DIR, `${sessionId}.lock`); + return join4(STATE_DIR, `${sessionId}.lock`); } function readState(sessionId) { const p = statePath(sessionId); - if (!existsSync2(p)) + if (!existsSync3(p)) return null; try { - return JSON.parse(readFileSync2(p, "utf-8")); + return JSON.parse(readFileSync3(p, "utf-8")); } catch { return null; } } function writeState(sessionId, state) { - mkdirSync(STATE_DIR, { recursive: true }); + mkdirSync2(STATE_DIR, { recursive: true }); const p = statePath(sessionId); const tmp = `${p}.${process.pid}.${Date.now()}.tmp`; - writeFileSync(tmp, JSON.stringify(state)); + writeFileSync2(tmp, JSON.stringify(state)); renameSync(tmp, p); } function withRmwLock(sessionId, fn) { - mkdirSync(STATE_DIR, { recursive: true }); + mkdirSync2(STATE_DIR, { recursive: true }); const rmwLock = statePath(sessionId) + ".rmw"; const deadline = Date.now() + 2e3; let fd = null; @@ -401,11 +494,11 @@ function shouldTrigger(state, cfg, now = Date.now()) { return false; } function tryAcquireLock(sessionId, maxAgeMs = 10 * 60 * 1e3) { - mkdirSync(STATE_DIR, { recursive: true }); + mkdirSync2(STATE_DIR, { recursive: true }); const p = lockPath(sessionId); - if (existsSync2(p)) { + if (existsSync3(p)) { try { - const ageMs = Date.now() - parseInt(readFileSync2(p, "utf-8"), 10); + const ageMs = Date.now() - parseInt(readFileSync3(p, "utf-8"), 10); if (Number.isFinite(ageMs) && ageMs < maxAgeMs) return false; } catch (readErr) { @@ -445,20 +538,20 @@ function releaseLock(sessionId) { // dist/src/hooks/codex/spawn-wiki-worker.js import { spawn, execSync } from "node:child_process"; import { fileURLToPath } from "node:url"; -import { dirname, join as join5 } from "node:path"; -import { writeFileSync as writeFileSync2, mkdirSync as mkdirSync3 } from "node:fs"; -import { homedir as homedir4, tmpdir } from "node:os"; +import { dirname, join as join6 } from "node:path"; +import { writeFileSync as writeFileSync3, mkdirSync as mkdirSync4 } from "node:fs"; +import { homedir as homedir4, tmpdir as tmpdir2 } from "node:os"; // dist/src/utils/wiki-log.js -import { mkdirSync as mkdirSync2, appendFileSync as appendFileSync2 } from "node:fs"; -import { join as join4 } from "node:path"; +import { mkdirSync as mkdirSync3, appendFileSync as appendFileSync2 } from "node:fs"; +import { join as join5 } from "node:path"; function makeWikiLogger(hooksDir, filename = "deeplake-wiki.log") { - const path = join4(hooksDir, filename); + const path = join5(hooksDir, filename); return { path, log(msg) { try { - mkdirSync2(hooksDir, { recursive: true }); + mkdirSync3(hooksDir, { recursive: true }); appendFileSync2(path, `[${utcTimestamp()}] ${msg} `); } catch { @@ -469,7 +562,7 @@ function makeWikiLogger(hooksDir, filename = "deeplake-wiki.log") { // dist/src/hooks/codex/spawn-wiki-worker.js var HOME = homedir4(); -var wikiLogger = makeWikiLogger(join5(HOME, ".codex", "hooks")); +var wikiLogger = makeWikiLogger(join6(HOME, ".codex", "hooks")); var WIKI_LOG = wikiLogger.path; var WIKI_PROMPT_TEMPLATE = `You are building a personal wiki from a coding session. Your goal is to extract every piece of knowledge \u2014 entities, decisions, relationships, and facts \u2014 into a structured, searchable wiki entry. @@ -531,10 +624,10 @@ function findCodexBin() { function spawnCodexWikiWorker(opts) { const { config, sessionId, cwd, bundleDir, reason } = opts; const projectName = cwd.split("/").pop() || "unknown"; - const tmpDir = join5(tmpdir(), `deeplake-wiki-${sessionId}-${Date.now()}`); - mkdirSync3(tmpDir, { recursive: true }); - const configFile = join5(tmpDir, "config.json"); - writeFileSync2(configFile, JSON.stringify({ + const tmpDir = join6(tmpdir2(), `deeplake-wiki-${sessionId}-${Date.now()}`); + mkdirSync4(tmpDir, { recursive: true }); + const configFile = join6(tmpDir, "config.json"); + writeFileSync3(configFile, JSON.stringify({ apiUrl: config.apiUrl, token: config.token, orgId: config.orgId, @@ -547,11 +640,11 @@ function spawnCodexWikiWorker(opts) { tmpDir, codexBin: findCodexBin(), wikiLog: WIKI_LOG, - hooksDir: join5(HOME, ".codex", "hooks"), + hooksDir: join6(HOME, ".codex", "hooks"), promptTemplate: WIKI_PROMPT_TEMPLATE })); wikiLog(`${reason}: spawning summary worker for ${sessionId}`); - const workerPath = join5(bundleDir, "wiki-worker.js"); + const workerPath = join6(bundleDir, "wiki-worker.js"); spawn("nohup", ["node", workerPath, configFile], { detached: true, stdio: ["ignore", "ignore", "ignore"] diff --git a/codex/bundle/commands/auth-login.js b/codex/bundle/commands/auth-login.js index 6d4cb13..064f11e 100755 --- a/codex/bundle/commands/auth-login.js +++ b/codex/bundle/commands/auth-login.js @@ -239,6 +239,9 @@ function loadConfig() { // dist/src/deeplake-api.js import { randomUUID } from "node:crypto"; +import { existsSync as existsSync3, mkdirSync as mkdirSync2, readFileSync as readFileSync3, writeFileSync as writeFileSync2 } from "node:fs"; +import { join as join4 } from "node:path"; +import { tmpdir } from "node:os"; // dist/src/utils/debug.js import { appendFileSync } from "node:fs"; @@ -260,27 +263,48 @@ function sqlStr(value) { // dist/src/deeplake-api.js var log2 = (msg) => log("sdk", msg); -var TRACE_SQL = (process.env.HIVEMIND_TRACE_SQL ?? process.env.DEEPLAKE_TRACE_SQL) === "1" || (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; -var DEBUG_FILE_LOG = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; function summarizeSql(sql, maxLen = 220) { const compact = sql.replace(/\s+/g, " ").trim(); return compact.length > maxLen ? `${compact.slice(0, maxLen)}...` : compact; } function traceSql(msg) { - if (!TRACE_SQL) + const traceEnabled = (process.env.HIVEMIND_TRACE_SQL ?? process.env.DEEPLAKE_TRACE_SQL) === "1" || (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; + if (!traceEnabled) return; process.stderr.write(`[deeplake-sql] ${msg} `); - if (DEBUG_FILE_LOG) + const debugFileLog = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; + if (debugFileLog) log2(msg); } var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; var MAX_CONCURRENCY = 5; +var QUERY_TIMEOUT_MS = Number(process.env["HIVEMIND_QUERY_TIMEOUT_MS"] ?? process.env["DEEPLAKE_QUERY_TIMEOUT_MS"] ?? 1e4); +var INDEX_MARKER_TTL_MS = Number(process.env["HIVEMIND_INDEX_MARKER_TTL_MS"] ?? 6 * 60 * 6e4); function sleep(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } +function isTimeoutError(error) { + const name = error instanceof Error ? error.name.toLowerCase() : ""; + const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase(); + return name.includes("timeout") || name === "aborterror" || message.includes("timeout") || message.includes("timed out"); +} +function isDuplicateIndexError(error) { + const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase(); + return message.includes("duplicate key value violates unique constraint") || message.includes("pg_class_relname_nsp_index") || message.includes("already exists"); +} +function isSessionInsertQuery(sql) { + return /^\s*insert\s+into\s+"[^"]+"\s*\(\s*id\s*,\s*path\s*,\s*filename\s*,\s*message\s*,/i.test(sql); +} +function isTransientHtml403(text) { + const body = text.toLowerCase(); + return body.includes(" Object.fromEntries(raw.columns.map((col, i) => [col, row[i]]))); } const text = await resp.text().catch(() => ""); - if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { + const retryable403 = isSessionInsertQuery(sql) && (resp.status === 401 || resp.status === 403 && (text.length === 0 || isTransientHtml403(text))); + if (attempt < MAX_RETRIES && (RETRYABLE_CODES.has(resp.status) || retryable403)) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`); await sleep(delay); @@ -433,8 +465,61 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + buildLookupIndexName(table, suffix) { + return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); + } + getLookupIndexMarkerPath(table, suffix) { + const markerKey = [ + this.workspaceId, + this.orgId, + table, + suffix + ].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_"); + return join4(getIndexMarkerDir(), `${markerKey}.json`); + } + hasFreshLookupIndexMarker(table, suffix) { + const markerPath = this.getLookupIndexMarkerPath(table, suffix); + if (!existsSync3(markerPath)) + return false; + try { + const raw = JSON.parse(readFileSync3(markerPath, "utf-8")); + const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN; + if (!Number.isFinite(updatedAt) || Date.now() - updatedAt > INDEX_MARKER_TTL_MS) + return false; + return true; + } catch { + return false; + } + } + markLookupIndexReady(table, suffix) { + mkdirSync2(getIndexMarkerDir(), { recursive: true }); + writeFileSync2(this.getLookupIndexMarkerPath(table, suffix), JSON.stringify({ updatedAt: (/* @__PURE__ */ new Date()).toISOString() }), "utf-8"); + } + async ensureLookupIndex(table, suffix, columnsSql) { + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + const indexName = this.buildLookupIndexName(table, suffix); + try { + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" ${columnsSql}`); + this.markLookupIndexReady(table, suffix); + } catch (e) { + if (isDuplicateIndexError(e)) { + this.markLookupIndexReady(table, suffix); + return; + } + log2(`index "${indexName}" skipped: ${e.message}`); + } + } /** List all tables in the workspace (with retry). */ - async listTables() { + async listTables(forceRefresh = false) { + if (!forceRefresh && this._tablesCache) + return [...this._tablesCache]; + const { tables, cacheable } = await this._fetchTables(); + if (cacheable) + this._tablesCache = [...tables]; + return tables; + } + async _fetchTables() { for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { try { const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, { @@ -445,22 +530,25 @@ var DeeplakeApi = class { }); if (resp.ok) { const data = await resp.json(); - return (data.tables ?? []).map((t) => t.table_name); + return { + tables: (data.tables ?? []).map((t) => t.table_name), + cacheable: true + }; } if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { await sleep(BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200); continue; } - return []; + return { tables: [], cacheable: false }; } catch { if (attempt < MAX_RETRIES) { await sleep(BASE_DELAY_MS * Math.pow(2, attempt)); continue; } - return []; + return { tables: [], cacheable: false }; } } - return []; + return { tables: [], cacheable: false }; } /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */ async ensureTable(name) { @@ -470,6 +558,8 @@ var DeeplakeApi = class { log2(`table "${tbl}" not found, creating`); await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); + if (!tables.includes(tbl)) + this._tablesCache = [...tables, tbl]; } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ @@ -479,7 +569,10 @@ var DeeplakeApi = class { log2(`table "${name}" not found, creating`); await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${name}" created`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; } + await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); } }; diff --git a/codex/bundle/pre-tool-use.js b/codex/bundle/pre-tool-use.js index 4f3873b..28cf31d 100755 --- a/codex/bundle/pre-tool-use.js +++ b/codex/bundle/pre-tool-use.js @@ -1,22 +1,20 @@ #!/usr/bin/env node // dist/src/hooks/codex/pre-tool-use.js -import { existsSync as existsSync2 } from "node:fs"; import { execFileSync } from "node:child_process"; -import { join as join3 } from "node:path"; -import { homedir as homedir3 } from "node:os"; -import { fileURLToPath } from "node:url"; -import { dirname } from "node:path"; +import { existsSync as existsSync3 } from "node:fs"; +import { join as join6, dirname } from "node:path"; +import { fileURLToPath as fileURLToPath2 } from "node:url"; // dist/src/utils/stdin.js function readStdin() { - return new Promise((resolve, reject) => { + return new Promise((resolve2, reject) => { let data = ""; process.stdin.setEncoding("utf-8"); process.stdin.on("data", (chunk) => data += chunk); process.stdin.on("end", () => { try { - resolve(JSON.parse(data)); + resolve2(JSON.parse(data)); } catch (err) { reject(new Error(`Failed to parse hook input: ${err}`)); } @@ -63,6 +61,9 @@ function loadConfig() { // dist/src/deeplake-api.js import { randomUUID } from "node:crypto"; +import { existsSync as existsSync2, mkdirSync, readFileSync as readFileSync2, writeFileSync } from "node:fs"; +import { join as join3 } from "node:path"; +import { tmpdir } from "node:os"; // dist/src/utils/debug.js import { appendFileSync } from "node:fs"; @@ -87,26 +88,47 @@ function sqlLike(value) { // dist/src/deeplake-api.js var log2 = (msg) => log("sdk", msg); -var TRACE_SQL = (process.env.HIVEMIND_TRACE_SQL ?? process.env.DEEPLAKE_TRACE_SQL) === "1" || (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; -var DEBUG_FILE_LOG = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; function summarizeSql(sql, maxLen = 220) { const compact = sql.replace(/\s+/g, " ").trim(); return compact.length > maxLen ? `${compact.slice(0, maxLen)}...` : compact; } function traceSql(msg) { - if (!TRACE_SQL) + const traceEnabled = (process.env.HIVEMIND_TRACE_SQL ?? process.env.DEEPLAKE_TRACE_SQL) === "1" || (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; + if (!traceEnabled) return; process.stderr.write(`[deeplake-sql] ${msg} `); - if (DEBUG_FILE_LOG) + const debugFileLog = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; + if (debugFileLog) log2(msg); } var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; var MAX_CONCURRENCY = 5; +var QUERY_TIMEOUT_MS = Number(process.env["HIVEMIND_QUERY_TIMEOUT_MS"] ?? process.env["DEEPLAKE_QUERY_TIMEOUT_MS"] ?? 1e4); +var INDEX_MARKER_TTL_MS = Number(process.env["HIVEMIND_INDEX_MARKER_TTL_MS"] ?? 6 * 60 * 6e4); function sleep(ms) { - return new Promise((resolve) => setTimeout(resolve, ms)); + return new Promise((resolve2) => setTimeout(resolve2, ms)); +} +function isTimeoutError(error) { + const name = error instanceof Error ? error.name.toLowerCase() : ""; + const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase(); + return name.includes("timeout") || name === "aborterror" || message.includes("timeout") || message.includes("timed out"); +} +function isDuplicateIndexError(error) { + const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase(); + return message.includes("duplicate key value violates unique constraint") || message.includes("pg_class_relname_nsp_index") || message.includes("already exists"); +} +function isSessionInsertQuery(sql) { + return /^\s*insert\s+into\s+"[^"]+"\s*\(\s*id\s*,\s*path\s*,\s*filename\s*,\s*message\s*,/i.test(sql); +} +function isTransientHtml403(text) { + const body = text.toLowerCase(); + return body.includes(" this.waiting.push(resolve)); + await new Promise((resolve2) => this.waiting.push(resolve2)); } release() { this.active--; @@ -139,6 +161,7 @@ var DeeplakeApi = class { tableName; _pendingRows = []; _sem = new Semaphore(MAX_CONCURRENCY); + _tablesCache = null; constructor(token, apiUrl, orgId, workspaceId, tableName) { this.token = token; this.apiUrl = apiUrl; @@ -169,6 +192,7 @@ var DeeplakeApi = class { for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { let resp; try { + const signal = AbortSignal.timeout(QUERY_TIMEOUT_MS); resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables/query`, { method: "POST", headers: { @@ -176,9 +200,14 @@ var DeeplakeApi = class { "Content-Type": "application/json", "X-Activeloop-Org-Id": this.orgId }, + signal, body: JSON.stringify({ query: sql }) }); } catch (e) { + if (isTimeoutError(e)) { + lastError = new Error(`Query timeout after ${QUERY_TIMEOUT_MS}ms`); + throw lastError; + } lastError = e instanceof Error ? e : new Error(String(e)); if (attempt < MAX_RETRIES) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; @@ -195,7 +224,8 @@ var DeeplakeApi = class { return raw.rows.map((row) => Object.fromEntries(raw.columns.map((col, i) => [col, row[i]]))); } const text = await resp.text().catch(() => ""); - if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { + const retryable403 = isSessionInsertQuery(sql) && (resp.status === 401 || resp.status === 403 && (text.length === 0 || isTransientHtml403(text))); + if (attempt < MAX_RETRIES && (RETRYABLE_CODES.has(resp.status) || retryable403)) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`); await sleep(delay); @@ -260,8 +290,61 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + buildLookupIndexName(table, suffix) { + return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); + } + getLookupIndexMarkerPath(table, suffix) { + const markerKey = [ + this.workspaceId, + this.orgId, + table, + suffix + ].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_"); + return join3(getIndexMarkerDir(), `${markerKey}.json`); + } + hasFreshLookupIndexMarker(table, suffix) { + const markerPath = this.getLookupIndexMarkerPath(table, suffix); + if (!existsSync2(markerPath)) + return false; + try { + const raw = JSON.parse(readFileSync2(markerPath, "utf-8")); + const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN; + if (!Number.isFinite(updatedAt) || Date.now() - updatedAt > INDEX_MARKER_TTL_MS) + return false; + return true; + } catch { + return false; + } + } + markLookupIndexReady(table, suffix) { + mkdirSync(getIndexMarkerDir(), { recursive: true }); + writeFileSync(this.getLookupIndexMarkerPath(table, suffix), JSON.stringify({ updatedAt: (/* @__PURE__ */ new Date()).toISOString() }), "utf-8"); + } + async ensureLookupIndex(table, suffix, columnsSql) { + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + const indexName = this.buildLookupIndexName(table, suffix); + try { + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" ${columnsSql}`); + this.markLookupIndexReady(table, suffix); + } catch (e) { + if (isDuplicateIndexError(e)) { + this.markLookupIndexReady(table, suffix); + return; + } + log2(`index "${indexName}" skipped: ${e.message}`); + } + } /** List all tables in the workspace (with retry). */ - async listTables() { + async listTables(forceRefresh = false) { + if (!forceRefresh && this._tablesCache) + return [...this._tablesCache]; + const { tables, cacheable } = await this._fetchTables(); + if (cacheable) + this._tablesCache = [...tables]; + return tables; + } + async _fetchTables() { for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { try { const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, { @@ -272,22 +355,25 @@ var DeeplakeApi = class { }); if (resp.ok) { const data = await resp.json(); - return (data.tables ?? []).map((t) => t.table_name); + return { + tables: (data.tables ?? []).map((t) => t.table_name), + cacheable: true + }; } if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { await sleep(BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200); continue; } - return []; + return { tables: [], cacheable: false }; } catch { if (attempt < MAX_RETRIES) { await sleep(BASE_DELAY_MS * Math.pow(2, attempt)); continue; } - return []; + return { tables: [], cacheable: false }; } } - return []; + return { tables: [], cacheable: false }; } /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */ async ensureTable(name) { @@ -297,6 +383,8 @@ var DeeplakeApi = class { log2(`table "${tbl}" not found, creating`); await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); + if (!tables.includes(tbl)) + this._tablesCache = [...tables, tbl]; } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ @@ -306,7 +394,10 @@ var DeeplakeApi = class { log2(`table "${name}" not found, creating`); await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${name}" created`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; } + await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); } }; @@ -519,29 +610,127 @@ function normalizeContent(path, raw) { return raw; return out; } +function buildPathCondition(targetPath) { + if (!targetPath || targetPath === "/") + return ""; + const clean = targetPath.replace(/\/+$/, ""); + if (/[*?]/.test(clean)) { + const likePattern = sqlLike(clean).replace(/\*/g, "%").replace(/\?/g, "_"); + return `path LIKE '${likePattern}' ESCAPE '\\'`; + } + const base = clean.split("/").pop() ?? ""; + if (base.includes(".")) { + return `path = '${sqlStr(clean)}'`; + } + return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%' ESCAPE '\\')`; +} async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { - const { pathFilter, contentScanOnly, likeOp, escapedPattern } = opts; + const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns } = opts; const limit = opts.limit ?? 100; - const memFilter = contentScanOnly ? "" : ` AND summary::text ${likeOp} '%${escapedPattern}%'`; - const sessFilter = contentScanOnly ? "" : ` AND message::text ${likeOp} '%${escapedPattern}%'`; - const memQuery = `SELECT path, summary::text AS content FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; - const sessQuery = `SELECT path, message::text AS content FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; - const [memRows, sessRows] = await Promise.all([ - api.query(memQuery).catch(() => []), - api.query(sessQuery).catch(() => []) - ]); - const rows = []; - for (const r of memRows) - rows.push({ path: String(r.path), content: String(r.content ?? "") }); - for (const r of sessRows) - rows.push({ path: String(r.path), content: String(r.content ?? "") }); - return rows; + const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : [escapedPattern]; + const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns); + const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns); + const memQuery = `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; + const sessQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; + const rows = await api.query(`SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`); + return rows.map((row) => ({ + path: String(row["path"]), + content: String(row["content"] ?? "") + })); } function buildPathFilter(targetPath) { - if (!targetPath || targetPath === "/") + const condition = buildPathCondition(targetPath); + return condition ? ` AND ${condition}` : ""; +} +function extractRegexLiteralPrefilter(pattern) { + if (!pattern) + return null; + const parts = []; + let current = ""; + for (let i = 0; i < pattern.length; i++) { + const ch = pattern[i]; + if (ch === "\\") { + const next = pattern[i + 1]; + if (!next) + return null; + if (/[dDsSwWbBAZzGkKpP]/.test(next)) + return null; + current += next; + i++; + continue; + } + if (ch === ".") { + if (pattern[i + 1] === "*") { + if (current) + parts.push(current); + current = ""; + i++; + continue; + } + return null; + } + if ("|()[]{}+?^$".includes(ch) || ch === "*") + return null; + current += ch; + } + if (current) + parts.push(current); + const literal = parts.reduce((best, part) => part.length > best.length ? part : best, ""); + return literal.length >= 2 ? literal : null; +} +function extractRegexAlternationPrefilters(pattern) { + if (!pattern.includes("|")) + return null; + const parts = []; + let current = ""; + let escaped = false; + for (let i = 0; i < pattern.length; i++) { + const ch = pattern[i]; + if (escaped) { + current += `\\${ch}`; + escaped = false; + continue; + } + if (ch === "\\") { + escaped = true; + continue; + } + if (ch === "|") { + if (!current) + return null; + parts.push(current); + current = ""; + continue; + } + if ("()[]{}^$".includes(ch)) + return null; + current += ch; + } + if (escaped || !current) + return null; + parts.push(current); + const literals = [...new Set(parts.map((part) => extractRegexLiteralPrefilter(part)).filter((part) => typeof part === "string" && part.length >= 2))]; + return literals.length > 0 ? literals : null; +} +function buildGrepSearchOptions(params, targetPath) { + const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(params.pattern); + const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(params.pattern) : null; + const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(params.pattern) : null; + return { + pathFilter: buildPathFilter(targetPath), + contentScanOnly: hasRegexMeta, + likeOp: params.ignoreCase ? "ILIKE" : "LIKE", + escapedPattern: sqlLike(params.pattern), + prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : void 0, + prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)) + }; +} +function buildContentFilter(column, likeOp, patterns) { + if (patterns.length === 0) return ""; - const clean = targetPath.replace(/\/+$/, ""); - return ` AND (path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%')`; + if (patterns.length === 1) + return ` AND ${column} ${likeOp} '%${patterns[0]}%'`; + return ` AND (${patterns.map((pattern) => `${column} ${likeOp} '%${pattern}%'`).join(" OR ")})`; } function compileGrepRegex(params) { let reStr = params.fixedString ? params.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") : params.pattern; @@ -585,81 +774,205 @@ function refineGrepMatches(rows, params, forceMultiFilePrefix) { return output; } async function grepBothTables(api, memoryTable, sessionsTable, params, targetPath) { - const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(params.pattern); - const rows = await searchDeeplakeTables(api, memoryTable, sessionsTable, { - pathFilter: buildPathFilter(targetPath), - contentScanOnly: hasRegexMeta, - likeOp: params.ignoreCase ? "ILIKE" : "LIKE", - escapedPattern: sqlLike(params.pattern) - }); + const rows = await searchDeeplakeTables(api, memoryTable, sessionsTable, buildGrepSearchOptions(params, targetPath)); const seen = /* @__PURE__ */ new Set(); const unique = rows.filter((r) => seen.has(r.path) ? false : (seen.add(r.path), true)); const normalized = unique.map((r) => ({ path: r.path, content: normalizeContent(r.path, r.content) })); return refineGrepMatches(normalized, params); } +// dist/src/utils/output-cap.js +var CLAUDE_OUTPUT_CAP_BYTES = 8 * 1024; +function byteLen(str) { + return Buffer.byteLength(str, "utf8"); +} +function capOutputForClaude(output, options = {}) { + const maxBytes = options.maxBytes ?? CLAUDE_OUTPUT_CAP_BYTES; + if (byteLen(output) <= maxBytes) + return output; + const kind = options.kind ?? "output"; + const footerReserve = 220; + const budget = Math.max(1, maxBytes - footerReserve); + let running = 0; + const lines = output.split("\n"); + const keptLines = []; + for (const line of lines) { + const lineBytes = byteLen(line) + 1; + if (running + lineBytes > budget) + break; + keptLines.push(line); + running += lineBytes; + } + if (keptLines.length === 0) { + const buf = Buffer.from(output, "utf8"); + let cutByte = Math.min(budget, buf.length); + while (cutByte > 0 && (buf[cutByte] & 192) === 128) + cutByte--; + const slice = buf.subarray(0, cutByte).toString("utf8"); + const footer2 = ` +... [${kind} truncated: ${(byteLen(output) / 1024).toFixed(1)} KB total; refine with '| head -N' or a tighter pattern]`; + return slice + footer2; + } + const totalLines = lines.length - (lines[lines.length - 1] === "" ? 1 : 0); + const elidedLines = Math.max(0, totalLines - keptLines.length); + const elidedBytes = byteLen(output) - byteLen(keptLines.join("\n")); + const footer = ` +... [${kind} truncated: ${elidedLines} more lines (${(elidedBytes / 1024).toFixed(1)} KB) elided \u2014 refine with '| head -N' or a tighter pattern]`; + return keptLines.join("\n") + footer; +} + // dist/src/hooks/grep-direct.js -function parseBashGrep(cmd) { - const first = cmd.trim().split(/\s*\|\s*/)[0]; - if (!/^(grep|egrep|fgrep)\b/.test(first)) - return null; - const isFixed = first.startsWith("fgrep"); +function splitFirstPipelineStage(cmd) { + const input = cmd.trim(); + let quote = null; + let escaped = false; + for (let i = 0; i < input.length; i++) { + const ch = input[i]; + if (escaped) { + escaped = false; + continue; + } + if (quote) { + if (ch === quote) { + quote = null; + continue; + } + if (ch === "\\" && quote === '"') { + escaped = true; + } + continue; + } + if (ch === "\\") { + escaped = true; + continue; + } + if (ch === "'" || ch === '"') { + quote = ch; + continue; + } + if (ch === "|") + return input.slice(0, i).trim(); + } + return quote ? null : input; +} +function tokenizeGrepStage(input) { const tokens = []; - let pos = 0; - while (pos < first.length) { - if (first[pos] === " " || first[pos] === " ") { - pos++; + let current = ""; + let quote = null; + for (let i = 0; i < input.length; i++) { + const ch = input[i]; + if (quote) { + if (ch === quote) { + quote = null; + } else if (ch === "\\" && quote === '"' && i + 1 < input.length) { + current += input[++i]; + } else { + current += ch; + } continue; } - if (first[pos] === "'" || first[pos] === '"') { - const q = first[pos]; - let end = pos + 1; - while (end < first.length && first[end] !== q) - end++; - tokens.push(first.slice(pos + 1, end)); - pos = end + 1; - } else { - let end = pos; - while (end < first.length && first[end] !== " " && first[end] !== " ") - end++; - tokens.push(first.slice(pos, end)); - pos = end; + if (ch === "'" || ch === '"') { + quote = ch; + continue; + } + if (ch === "\\" && i + 1 < input.length) { + current += input[++i]; + continue; + } + if (/\s/.test(ch)) { + if (current) { + tokens.push(current); + current = ""; + } + continue; } + current += ch; } + if (quote) + return null; + if (current) + tokens.push(current); + return tokens; +} +function parseBashGrep(cmd) { + const first = splitFirstPipelineStage(cmd); + if (!first) + return null; + if (!/^(grep|egrep|fgrep)\b/.test(first)) + return null; + const isFixed = first.startsWith("fgrep"); + const tokens = tokenizeGrepStage(first); + if (!tokens || tokens.length === 0) + return null; let ignoreCase = false, wordMatch = false, filesOnly = false, countOnly = false, lineNumber = false, invertMatch = false, fixedString = isFixed; + const explicitPatterns = []; let ti = 1; - while (ti < tokens.length && tokens[ti].startsWith("-") && tokens[ti] !== "--") { - const flag = tokens[ti]; - if (flag.startsWith("--")) { + while (ti < tokens.length) { + const token = tokens[ti]; + if (token === "--") { + ti++; + break; + } + if (!token.startsWith("-") || token === "-") + break; + if (token.startsWith("--")) { + const [flag, inlineValue] = token.split("=", 2); const handlers = { "--ignore-case": () => { ignoreCase = true; + return false; }, "--word-regexp": () => { wordMatch = true; + return false; }, "--files-with-matches": () => { filesOnly = true; + return false; }, "--count": () => { countOnly = true; + return false; }, "--line-number": () => { lineNumber = true; + return false; }, "--invert-match": () => { invertMatch = true; + return false; }, "--fixed-strings": () => { fixedString = true; + return false; + }, + "--after-context": () => inlineValue === void 0, + "--before-context": () => inlineValue === void 0, + "--context": () => inlineValue === void 0, + "--max-count": () => inlineValue === void 0, + "--regexp": () => { + if (inlineValue !== void 0) { + explicitPatterns.push(inlineValue); + return false; + } + return true; } }; - handlers[flag]?.(); + const consumeNext = handlers[flag]?.() ?? false; + if (consumeNext) { + ti++; + if (ti >= tokens.length) + return null; + if (flag === "--regexp") + explicitPatterns.push(tokens[ti]); + } ti++; continue; } - for (const c of flag.slice(1)) { - switch (c) { + const shortFlags = token.slice(1); + for (let i = 0; i < shortFlags.length; i++) { + const flag = shortFlags[i]; + switch (flag) { case "i": ignoreCase = true; break; @@ -681,19 +994,48 @@ function parseBashGrep(cmd) { case "F": fixedString = true; break; + case "r": + case "R": + case "E": + break; + case "A": + case "B": + case "C": + case "m": + if (i === shortFlags.length - 1) { + ti++; + if (ti >= tokens.length) + return null; + } + i = shortFlags.length; + break; + case "e": { + const inlineValue = shortFlags.slice(i + 1); + if (inlineValue) { + explicitPatterns.push(inlineValue); + } else { + ti++; + if (ti >= tokens.length) + return null; + explicitPatterns.push(tokens[ti]); + } + i = shortFlags.length; + break; + } + default: + break; } } ti++; } - if (ti < tokens.length && tokens[ti] === "--") - ti++; - if (ti >= tokens.length) + const pattern = explicitPatterns.length > 0 ? explicitPatterns[0] : tokens[ti]; + if (!pattern) return null; - let target = tokens[ti + 1] ?? "/"; + let target = explicitPatterns.length > 0 ? tokens[ti] ?? "/" : tokens[ti + 1] ?? "/"; if (target === "." || target === "./") target = "/"; return { - pattern: tokens[ti], + pattern, targetPath: target, ignoreCase, wordMatch, @@ -718,16 +1060,678 @@ async function handleGrepDirect(api, table, sessionsTable, params) { fixedString: params.fixedString }; const output = await grepBothTables(api, table, sessionsTable, matchParams, params.targetPath); - return output.join("\n") || "(no matches)"; + const joined = output.join("\n") || "(no matches)"; + return capOutputForClaude(joined, { kind: "grep" }); } -// dist/src/hooks/codex/pre-tool-use.js -var log3 = (msg) => log("codex-pre", msg); -var MEMORY_PATH = join3(homedir3(), ".deeplake", "memory"); +// dist/src/hooks/virtual-table-query.js +function normalizeSessionPart(path, content) { + return normalizeContent(path, content); +} +function buildVirtualIndexContent(summaryRows, sessionRows = []) { + const total = summaryRows.length + sessionRows.length; + const lines = [ + "# Memory Index", + "", + `${total} entries (${summaryRows.length} summaries, ${sessionRows.length} sessions):`, + "" + ]; + if (summaryRows.length > 0) { + lines.push("## Summaries", ""); + for (const row of summaryRows) { + const path = row["path"]; + const project = row["project"] || ""; + const description = (row["description"] || "").slice(0, 120); + const date = (row["creation_date"] || "").slice(0, 10); + lines.push(`- [${path}](${path}) ${date} ${project ? `[${project}]` : ""} ${description}`); + } + lines.push(""); + } + if (sessionRows.length > 0) { + lines.push("## Sessions", ""); + for (const row of sessionRows) { + const path = row["path"]; + const description = (row["description"] || "").slice(0, 120); + lines.push(`- [${path}](${path}) ${description}`); + } + } + return lines.join("\n"); +} +function buildUnionQuery(memoryQuery, sessionsQuery) { + return `SELECT path, content, size_bytes, creation_date, source_order FROM ((${memoryQuery}) UNION ALL (${sessionsQuery})) AS combined ORDER BY path, source_order, creation_date`; +} +function buildInList(paths) { + return paths.map((path) => `'${sqlStr(path)}'`).join(", "); +} +function buildDirFilter(dirs) { + const cleaned = [...new Set(dirs.map((dir) => dir.replace(/\/+$/, "") || "/"))]; + if (cleaned.length === 0 || cleaned.includes("/")) + return ""; + const clauses = cleaned.map((dir) => `path LIKE '${sqlLike(dir)}/%' ESCAPE '\\'`); + return ` WHERE ${clauses.join(" OR ")}`; +} +async function queryUnionRows(api, memoryQuery, sessionsQuery) { + const unionQuery = buildUnionQuery(memoryQuery, sessionsQuery); + try { + return await api.query(unionQuery); + } catch { + const [memoryRows, sessionRows] = await Promise.all([ + api.query(memoryQuery).catch(() => []), + api.query(sessionsQuery).catch(() => []) + ]); + return [...memoryRows, ...sessionRows]; + } +} +async function readVirtualPathContents(api, memoryTable, sessionsTable, virtualPaths) { + const uniquePaths = [...new Set(virtualPaths)]; + const result = new Map(uniquePaths.map((path) => [path, null])); + if (uniquePaths.length === 0) + return result; + const inList = buildInList(uniquePaths); + const rows = await queryUnionRows(api, `SELECT path, summary::text AS content, NULL::bigint AS size_bytes, '' AS creation_date, 0 AS source_order FROM "${memoryTable}" WHERE path IN (${inList})`, `SELECT path, message::text AS content, NULL::bigint AS size_bytes, COALESCE(creation_date::text, '') AS creation_date, 1 AS source_order FROM "${sessionsTable}" WHERE path IN (${inList})`); + const memoryHits = /* @__PURE__ */ new Map(); + const sessionHits = /* @__PURE__ */ new Map(); + for (const row of rows) { + const path = row["path"]; + const content = row["content"]; + const sourceOrder = Number(row["source_order"] ?? 0); + if (typeof path !== "string" || typeof content !== "string") + continue; + if (sourceOrder === 0) { + memoryHits.set(path, content); + } else { + const current = sessionHits.get(path) ?? []; + current.push(normalizeSessionPart(path, content)); + sessionHits.set(path, current); + } + } + for (const path of uniquePaths) { + if (memoryHits.has(path)) { + result.set(path, memoryHits.get(path) ?? null); + continue; + } + const sessionParts = sessionHits.get(path) ?? []; + if (sessionParts.length > 0) { + result.set(path, sessionParts.join("\n")); + } + } + if (result.get("/index.md") === null && uniquePaths.includes("/index.md")) { + const [summaryRows, sessionRows] = await Promise.all([ + api.query(`SELECT path, project, description, creation_date FROM "${memoryTable}" WHERE path LIKE '/summaries/%' ORDER BY creation_date DESC`).catch(() => []), + api.query(`SELECT path, description FROM "${sessionsTable}" WHERE path LIKE '/sessions/%' ORDER BY path`).catch(() => []) + ]); + result.set("/index.md", buildVirtualIndexContent(summaryRows, sessionRows)); + } + return result; +} +async function listVirtualPathRowsForDirs(api, memoryTable, sessionsTable, dirs) { + const uniqueDirs = [...new Set(dirs.map((dir) => dir.replace(/\/+$/, "") || "/"))]; + const filter = buildDirFilter(uniqueDirs); + const rows = await queryUnionRows(api, `SELECT path, NULL::text AS content, size_bytes, '' AS creation_date, 0 AS source_order FROM "${memoryTable}"${filter}`, `SELECT path, NULL::text AS content, size_bytes, '' AS creation_date, 1 AS source_order FROM "${sessionsTable}"${filter}`); + const deduped = dedupeRowsByPath(rows.map((row) => ({ + path: row["path"], + size_bytes: row["size_bytes"] + }))); + const byDir = /* @__PURE__ */ new Map(); + for (const dir of uniqueDirs) + byDir.set(dir, []); + for (const row of deduped) { + const path = row["path"]; + if (typeof path !== "string") + continue; + for (const dir of uniqueDirs) { + const prefix = dir === "/" ? "/" : `${dir}/`; + if (dir === "/" || path.startsWith(prefix)) { + byDir.get(dir)?.push(row); + } + } + } + return byDir; +} +async function readVirtualPathContent(api, memoryTable, sessionsTable, virtualPath) { + return (await readVirtualPathContents(api, memoryTable, sessionsTable, [virtualPath])).get(virtualPath) ?? null; +} +async function listVirtualPathRows(api, memoryTable, sessionsTable, dir) { + return (await listVirtualPathRowsForDirs(api, memoryTable, sessionsTable, [dir])).get(dir.replace(/\/+$/, "") || "/") ?? []; +} +async function findVirtualPaths(api, memoryTable, sessionsTable, dir, filenamePattern) { + const normalizedDir = dir.replace(/\/+$/, "") || "/"; + const likePath = `${sqlLike(normalizedDir === "/" ? "" : normalizedDir)}/%`; + const rows = await queryUnionRows(api, `SELECT path, NULL::text AS content, NULL::bigint AS size_bytes, '' AS creation_date, 0 AS source_order FROM "${memoryTable}" WHERE path LIKE '${likePath}' ESCAPE '\\' AND filename LIKE '${filenamePattern}' ESCAPE '\\'`, `SELECT path, NULL::text AS content, NULL::bigint AS size_bytes, '' AS creation_date, 1 AS source_order FROM "${sessionsTable}" WHERE path LIKE '${likePath}' ESCAPE '\\' AND filename LIKE '${filenamePattern}' ESCAPE '\\'`); + return [...new Set(rows.map((row) => row["path"]).filter((value) => typeof value === "string" && value.length > 0))]; +} +function dedupeRowsByPath(rows) { + const seen = /* @__PURE__ */ new Set(); + const unique = []; + for (const row of rows) { + const path = typeof row["path"] === "string" ? row["path"] : ""; + if (!path || seen.has(path)) + continue; + seen.add(path); + unique.push(row); + } + return unique; +} + +// dist/src/hooks/bash-command-compiler.js +function isQuoted(ch) { + return ch === "'" || ch === '"'; +} +function splitTopLevel(input, operators) { + const parts = []; + let current = ""; + let quote = null; + for (let i = 0; i < input.length; i++) { + const ch = input[i]; + if (quote) { + if (ch === quote) + quote = null; + current += ch; + continue; + } + if (isQuoted(ch)) { + quote = ch; + current += ch; + continue; + } + const matched = operators.find((op) => input.startsWith(op, i)); + if (matched) { + const trimmed2 = current.trim(); + if (trimmed2) + parts.push(trimmed2); + current = ""; + i += matched.length - 1; + continue; + } + current += ch; + } + if (quote) + return null; + const trimmed = current.trim(); + if (trimmed) + parts.push(trimmed); + return parts; +} +function tokenizeShellWords(input) { + const tokens = []; + let current = ""; + let quote = null; + for (let i = 0; i < input.length; i++) { + const ch = input[i]; + if (quote) { + if (ch === quote) { + quote = null; + } else if (ch === "\\" && quote === '"' && i + 1 < input.length) { + current += input[++i]; + } else { + current += ch; + } + continue; + } + if (isQuoted(ch)) { + quote = ch; + continue; + } + if (/\s/.test(ch)) { + if (current) { + tokens.push(current); + current = ""; + } + continue; + } + current += ch; + } + if (quote) + return null; + if (current) + tokens.push(current); + return tokens; +} +function expandBraceToken(token) { + const match = token.match(/\{([^{}]+)\}/); + if (!match) + return [token]; + const [expr] = match; + const prefix = token.slice(0, match.index); + const suffix = token.slice((match.index ?? 0) + expr.length); + let variants = []; + const numericRange = match[1].match(/^(-?\d+)\.\.(-?\d+)$/); + if (numericRange) { + const start = Number(numericRange[1]); + const end = Number(numericRange[2]); + const step = start <= end ? 1 : -1; + for (let value = start; step > 0 ? value <= end : value >= end; value += step) { + variants.push(String(value)); + } + } else { + variants = match[1].split(","); + } + return variants.flatMap((variant) => expandBraceToken(`${prefix}${variant}${suffix}`)); +} +function stripAllowedModifiers(segment) { + const ignoreMissing = /\s2>\/dev\/null\s*$/.test(segment); + const clean = segment.replace(/\s2>\/dev\/null\s*$/g, "").replace(/\s2>&1\s*/g, " ").trim(); + return { clean, ignoreMissing }; +} +function hasUnsupportedRedirection(segment) { + let quote = null; + for (let i = 0; i < segment.length; i++) { + const ch = segment[i]; + if (quote) { + if (ch === quote) + quote = null; + continue; + } + if (isQuoted(ch)) { + quote = ch; + continue; + } + if (ch === ">" || ch === "<") + return true; + } + return false; +} +function parseHeadTailStage(stage) { + const tokens = tokenizeShellWords(stage); + if (!tokens || tokens.length === 0) + return null; + const [cmd, ...rest] = tokens; + if (cmd !== "head" && cmd !== "tail") + return null; + if (rest.length === 0) + return { lineLimit: 10, fromEnd: cmd === "tail" }; + if (rest.length === 1) { + const count = Number(rest[0]); + if (!Number.isFinite(count)) { + return { lineLimit: 10, fromEnd: cmd === "tail" }; + } + return { lineLimit: Math.abs(count), fromEnd: cmd === "tail" }; + } + if (rest.length === 2 && /^-\d+$/.test(rest[0])) { + const count = Number(rest[0]); + if (!Number.isFinite(count)) + return null; + return { lineLimit: Math.abs(count), fromEnd: cmd === "tail" }; + } + if (rest.length === 2 && rest[0] === "-n") { + const count = Number(rest[1]); + if (!Number.isFinite(count)) + return null; + return { lineLimit: Math.abs(count), fromEnd: cmd === "tail" }; + } + if (rest.length === 3 && rest[0] === "-n") { + const count = Number(rest[1]); + if (!Number.isFinite(count)) + return null; + return { lineLimit: Math.abs(count), fromEnd: cmd === "tail" }; + } + return null; +} +function isValidPipelineHeadTailStage(stage) { + const tokens = tokenizeShellWords(stage); + if (!tokens || tokens[0] !== "head" && tokens[0] !== "tail") + return false; + if (tokens.length === 1) + return true; + if (tokens.length === 2) + return /^-\d+$/.test(tokens[1]); + if (tokens.length === 3) + return tokens[1] === "-n" && /^-?\d+$/.test(tokens[2]); + return false; +} +function parseFindNamePatterns(tokens) { + const patterns = []; + for (let i = 2; i < tokens.length; i++) { + const token = tokens[i]; + if (token === "-type") { + i += 1; + continue; + } + if (token === "-o") + continue; + if (token === "-name") { + const pattern = tokens[i + 1]; + if (!pattern) + return null; + patterns.push(pattern); + i += 1; + continue; + } + return null; + } + return patterns.length > 0 ? patterns : null; +} +function parseCompiledSegment(segment) { + const { clean, ignoreMissing } = stripAllowedModifiers(segment); + if (hasUnsupportedRedirection(clean)) + return null; + const pipeline = splitTopLevel(clean, ["|"]); + if (!pipeline || pipeline.length === 0) + return null; + const tokens = tokenizeShellWords(pipeline[0]); + if (!tokens || tokens.length === 0) + return null; + if (tokens[0] === "echo" && pipeline.length === 1) { + const text = tokens.slice(1).join(" "); + return { kind: "echo", text }; + } + if (tokens[0] === "cat") { + const paths = tokens.slice(1).flatMap(expandBraceToken); + if (paths.length === 0) + return null; + let lineLimit = 0; + let fromEnd = false; + let countLines2 = false; + if (pipeline.length > 1) { + if (pipeline.length !== 2) + return null; + const pipeStage = pipeline[1].trim(); + if (/^wc\s+-l\s*$/.test(pipeStage)) { + if (paths.length !== 1) + return null; + countLines2 = true; + } else { + if (!isValidPipelineHeadTailStage(pipeStage)) + return null; + const headTail = parseHeadTailStage(pipeStage); + if (!headTail) + return null; + lineLimit = headTail.lineLimit; + fromEnd = headTail.fromEnd; + } + } + return { kind: "cat", paths, lineLimit, fromEnd, countLines: countLines2, ignoreMissing }; + } + if (tokens[0] === "head" || tokens[0] === "tail") { + if (pipeline.length !== 1) + return null; + const parsed = parseHeadTailStage(clean); + if (!parsed) + return null; + const headTokens = tokenizeShellWords(clean); + if (!headTokens) + return null; + if (headTokens[1] === "-n" && headTokens.length < 4 || /^-\d+$/.test(headTokens[1] ?? "") && headTokens.length < 3 || headTokens.length === 2 && /^-?\d+$/.test(headTokens[1] ?? "")) + return null; + const path = headTokens[headTokens.length - 1]; + if (path === "head" || path === "tail" || path === "-n") + return null; + return { + kind: "cat", + paths: expandBraceToken(path), + lineLimit: parsed.lineLimit, + fromEnd: parsed.fromEnd, + countLines: false, + ignoreMissing + }; + } + if (tokens[0] === "wc" && tokens[1] === "-l" && pipeline.length === 1 && tokens[2]) { + return { + kind: "cat", + paths: expandBraceToken(tokens[2]), + lineLimit: 0, + fromEnd: false, + countLines: true, + ignoreMissing + }; + } + if (tokens[0] === "ls" && pipeline.length === 1) { + const dirs = tokens.slice(1).filter((token) => !token.startsWith("-")).flatMap(expandBraceToken); + const longFormat = tokens.some((token) => token.startsWith("-") && token.includes("l")); + return { kind: "ls", dirs: dirs.length > 0 ? dirs : ["/"], longFormat }; + } + if (tokens[0] === "find") { + if (pipeline.length > 3) + return null; + const dir = tokens[1]; + if (!dir) + return null; + const patterns = parseFindNamePatterns(tokens); + if (!patterns) + return null; + const countOnly = pipeline.length === 2 && /^wc\s+-l\s*$/.test(pipeline[1].trim()); + if (countOnly) { + if (patterns.length !== 1) + return null; + return { kind: "find", dir, pattern: patterns[0], countOnly }; + } + if (pipeline.length >= 2) { + const xargsTokens = tokenizeShellWords(pipeline[1].trim()); + if (!xargsTokens || xargsTokens[0] !== "xargs") + return null; + const xargsArgs = xargsTokens.slice(1); + while (xargsArgs[0] && xargsArgs[0].startsWith("-")) { + if (xargsArgs[0] === "-r") { + xargsArgs.shift(); + continue; + } + return null; + } + const grepCmd = xargsArgs.join(" "); + const grepParams2 = parseBashGrep(grepCmd); + if (!grepParams2) + return null; + let lineLimit = 0; + if (pipeline.length === 3) { + const headStage = pipeline[2].trim(); + if (!isValidPipelineHeadTailStage(headStage)) + return null; + const headTail = parseHeadTailStage(headStage); + if (!headTail || headTail.fromEnd) + return null; + lineLimit = headTail.lineLimit; + } + return { kind: "find_grep", dir, patterns, params: grepParams2, lineLimit }; + } + if (patterns.length !== 1) + return null; + return { kind: "find", dir, pattern: patterns[0], countOnly }; + } + const grepParams = parseBashGrep(clean); + if (grepParams) { + let lineLimit = 0; + if (pipeline.length > 1) { + if (pipeline.length !== 2) + return null; + const headStage = pipeline[1].trim(); + if (!isValidPipelineHeadTailStage(headStage)) + return null; + const headTail = parseHeadTailStage(headStage); + if (!headTail || headTail.fromEnd) + return null; + lineLimit = headTail.lineLimit; + } + return { kind: "grep", params: grepParams, lineLimit }; + } + return null; +} +function parseCompiledBashCommand(cmd) { + if (cmd.includes("||")) + return null; + const segments = splitTopLevel(cmd, ["&&", ";", "\n"]); + if (!segments || segments.length === 0) + return null; + const parsed = segments.map(parseCompiledSegment); + if (parsed.some((segment) => segment === null)) + return null; + return parsed; +} +function applyLineWindow(content, lineLimit, fromEnd) { + if (lineLimit <= 0) + return content; + const lines = content.split("\n"); + return (fromEnd ? lines.slice(-lineLimit) : lines.slice(0, lineLimit)).join("\n"); +} +function countLines(content) { + return content === "" ? 0 : content.split("\n").length; +} +function renderDirectoryListing(dir, rows, longFormat) { + const entries = /* @__PURE__ */ new Map(); + const prefix = dir === "/" ? "/" : `${dir}/`; + for (const row of rows) { + const path = row["path"]; + if (!path.startsWith(prefix) && dir !== "/") + continue; + const rest = dir === "/" ? path.slice(1) : path.slice(prefix.length); + const slash = rest.indexOf("/"); + const name = slash === -1 ? rest : rest.slice(0, slash); + if (!name) + continue; + const existing = entries.get(name); + if (slash !== -1) { + if (!existing) + entries.set(name, { isDir: true, size: 0 }); + } else { + entries.set(name, { isDir: false, size: Number(row["size_bytes"] ?? 0) }); + } + } + if (entries.size === 0) + return `ls: cannot access '${dir}': No such file or directory`; + const lines = []; + for (const [name, info] of [...entries].sort((a, b) => a[0].localeCompare(b[0]))) { + if (longFormat) { + const type = info.isDir ? "drwxr-xr-x" : "-rw-r--r--"; + const size = String(info.isDir ? 0 : info.size).padStart(6); + lines.push(`${type} 1 user user ${size} ${name}${info.isDir ? "/" : ""}`); + } else { + lines.push(name + (info.isDir ? "/" : "")); + } + } + return lines.join("\n"); +} +async function executeCompiledBashCommand(api, memoryTable, sessionsTable, cmd, deps = {}) { + const { readVirtualPathContentsFn = readVirtualPathContents, listVirtualPathRowsForDirsFn = listVirtualPathRowsForDirs, findVirtualPathsFn = findVirtualPaths, handleGrepDirectFn = handleGrepDirect } = deps; + const plan = parseCompiledBashCommand(cmd); + if (!plan) + return null; + const readPaths = [...new Set(plan.flatMap((segment) => segment.kind === "cat" ? segment.paths : []))]; + const listDirs = [...new Set(plan.flatMap((segment) => segment.kind === "ls" ? segment.dirs.map((dir) => dir.replace(/\/+$/, "") || "/") : []))]; + const contentMap = readPaths.length > 0 ? await readVirtualPathContentsFn(api, memoryTable, sessionsTable, readPaths) : /* @__PURE__ */ new Map(); + const dirRowsMap = listDirs.length > 0 ? await listVirtualPathRowsForDirsFn(api, memoryTable, sessionsTable, listDirs) : /* @__PURE__ */ new Map(); + const outputs = []; + for (const segment of plan) { + if (segment.kind === "echo") { + outputs.push(segment.text); + continue; + } + if (segment.kind === "cat") { + const contents = []; + for (const path of segment.paths) { + const content = contentMap.get(path) ?? null; + if (content === null) { + if (segment.ignoreMissing) + continue; + return null; + } + contents.push(content); + } + const combined = contents.join(""); + if (segment.countLines) { + outputs.push(`${countLines(combined)} ${segment.paths[0]}`); + } else { + outputs.push(applyLineWindow(combined, segment.lineLimit, segment.fromEnd)); + } + continue; + } + if (segment.kind === "ls") { + for (const dir of segment.dirs) { + outputs.push(renderDirectoryListing(dir.replace(/\/+$/, "") || "/", dirRowsMap.get(dir.replace(/\/+$/, "") || "/") ?? [], segment.longFormat)); + } + continue; + } + if (segment.kind === "find") { + const filenamePattern = sqlLike(segment.pattern).replace(/\*/g, "%").replace(/\?/g, "_"); + const paths = await findVirtualPathsFn(api, memoryTable, sessionsTable, segment.dir.replace(/\/+$/, "") || "/", filenamePattern); + outputs.push(segment.countOnly ? String(paths.length) : paths.join("\n") || "(no matches)"); + continue; + } + if (segment.kind === "find_grep") { + const dir = segment.dir.replace(/\/+$/, "") || "/"; + const candidateBatches = await Promise.all(segment.patterns.map((pattern) => findVirtualPathsFn(api, memoryTable, sessionsTable, dir, sqlLike(pattern).replace(/\*/g, "%").replace(/\?/g, "_")))); + const candidatePaths = [...new Set(candidateBatches.flat())]; + if (candidatePaths.length === 0) { + outputs.push("(no matches)"); + continue; + } + const candidateContents = await readVirtualPathContentsFn(api, memoryTable, sessionsTable, candidatePaths); + const matched = refineGrepMatches(candidatePaths.flatMap((path) => { + const content = candidateContents.get(path); + if (content === null || content === void 0) + return []; + return [{ path, content: normalizeContent(path, content) }]; + }), segment.params); + const limited = segment.lineLimit > 0 ? matched.slice(0, segment.lineLimit) : matched; + outputs.push(limited.join("\n") || "(no matches)"); + continue; + } + if (segment.kind === "grep") { + const result = await handleGrepDirectFn(api, memoryTable, sessionsTable, segment.params); + if (result === null) + return null; + if (segment.lineLimit > 0) { + outputs.push(result.split("\n").slice(0, segment.lineLimit).join("\n")); + } else { + outputs.push(result); + } + continue; + } + } + return capOutputForClaude(outputs.join("\n"), { kind: "bash" }); +} + +// dist/src/hooks/query-cache.js +import { mkdirSync as mkdirSync2, readFileSync as readFileSync3, rmSync, writeFileSync as writeFileSync2 } from "node:fs"; +import { join as join4 } from "node:path"; +import { homedir as homedir3 } from "node:os"; +var log3 = (msg) => log("query-cache", msg); +var DEFAULT_CACHE_ROOT = join4(homedir3(), ".deeplake", "query-cache"); +var INDEX_CACHE_FILE = "index.md"; +function getSessionQueryCacheDir(sessionId, deps = {}) { + const { cacheRoot = DEFAULT_CACHE_ROOT } = deps; + return join4(cacheRoot, sessionId); +} +function readCachedIndexContent(sessionId, deps = {}) { + const { logFn = log3 } = deps; + try { + return readFileSync3(join4(getSessionQueryCacheDir(sessionId, deps), INDEX_CACHE_FILE), "utf-8"); + } catch (e) { + if (e?.code === "ENOENT") + return null; + logFn(`read failed for session=${sessionId}: ${e.message}`); + return null; + } +} +function writeCachedIndexContent(sessionId, content, deps = {}) { + const { logFn = log3 } = deps; + try { + const dir = getSessionQueryCacheDir(sessionId, deps); + mkdirSync2(dir, { recursive: true }); + writeFileSync2(join4(dir, INDEX_CACHE_FILE), content, "utf-8"); + } catch (e) { + logFn(`write failed for session=${sessionId}: ${e.message}`); + } +} + +// dist/src/utils/direct-run.js +import { resolve } from "node:path"; +import { fileURLToPath } from "node:url"; +function isDirectRun(metaUrl) { + const entry = process.argv[1]; + if (!entry) + return false; + try { + return resolve(fileURLToPath(metaUrl)) === resolve(entry); + } catch { + return false; + } +} + +// dist/src/hooks/memory-path-utils.js +import { homedir as homedir4 } from "node:os"; +import { join as join5 } from "node:path"; +var MEMORY_PATH = join5(homedir4(), ".deeplake", "memory"); var TILDE_PATH = "~/.deeplake/memory"; var HOME_VAR_PATH = "$HOME/.deeplake/memory"; -var __bundleDir = dirname(fileURLToPath(import.meta.url)); -var SHELL_BUNDLE = existsSync2(join3(__bundleDir, "shell", "deeplake-shell.js")) ? join3(__bundleDir, "shell", "deeplake-shell.js") : join3(__bundleDir, "..", "shell", "deeplake-shell.js"); var SAFE_BUILTINS = /* @__PURE__ */ new Set([ "cat", "ls", @@ -833,146 +1837,173 @@ function isSafe(cmd) { } return true; } -function touchesMemory(cmd) { - return cmd.includes(MEMORY_PATH) || cmd.includes(TILDE_PATH) || cmd.includes(HOME_VAR_PATH); +function touchesMemory(p) { + return p.includes(MEMORY_PATH) || p.includes(TILDE_PATH) || p.includes(HOME_VAR_PATH); } function rewritePaths(cmd) { return cmd.replace(new RegExp(MEMORY_PATH.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") + "/?", "g"), "/").replace(/~\/.deeplake\/memory\/?/g, "/").replace(/\$HOME\/.deeplake\/memory\/?/g, "/").replace(/"\$HOME\/.deeplake\/memory\/?"/g, '"/"'); } -function blockWithContent(content) { - process.stderr.write(content); - process.exit(2); + +// dist/src/hooks/codex/pre-tool-use.js +var log4 = (msg) => log("codex-pre", msg); +var __bundleDir = dirname(fileURLToPath2(import.meta.url)); +var SHELL_BUNDLE = existsSync3(join6(__bundleDir, "shell", "deeplake-shell.js")) ? join6(__bundleDir, "shell", "deeplake-shell.js") : join6(__bundleDir, "..", "shell", "deeplake-shell.js"); +function buildUnsupportedGuidance() { + return "This command is not supported for ~/.deeplake/memory/ operations. Only bash builtins are available: cat, ls, grep, echo, jq, head, tail, sed, awk, wc, sort, find, etc. Do NOT use python, python3, node, curl, or other interpreters. Rewrite your command using only bash tools and retry."; } -function runVirtualShell(cmd) { +function runVirtualShell(cmd, shellBundle = SHELL_BUNDLE, logFn = log4) { try { - return execFileSync("node", [SHELL_BUNDLE, "-c", cmd], { + return execFileSync("node", [shellBundle, "-c", cmd], { encoding: "utf-8", timeout: 1e4, env: { ...process.env }, stdio: ["pipe", "pipe", "pipe"] - // capture stderr instead of inheriting }).trim(); } catch (e) { - log3(`virtual shell failed: ${e.message}`); + logFn(`virtual shell failed: ${e.message}`); return ""; } } -async function main() { - const input = await readStdin(); +function buildIndexContent(rows) { + const lines = ["# Memory Index", "", `${rows.length} sessions:`, ""]; + for (const row of rows) { + const path = row["path"]; + const project = row["project"] || ""; + const description = (row["description"] || "").slice(0, 120); + const date = (row["creation_date"] || "").slice(0, 10); + lines.push(`- [${path}](${path}) ${date} ${project ? `[${project}]` : ""} ${description}`); + } + return lines.join("\n"); +} +async function processCodexPreToolUse(input, deps = {}) { + const { config = loadConfig(), createApi = (table, activeConfig) => new DeeplakeApi(activeConfig.token, activeConfig.apiUrl, activeConfig.orgId, activeConfig.workspaceId, table), executeCompiledBashCommandFn = executeCompiledBashCommand, readVirtualPathContentsFn = readVirtualPathContents, readVirtualPathContentFn = readVirtualPathContent, listVirtualPathRowsFn = listVirtualPathRows, findVirtualPathsFn = findVirtualPaths, handleGrepDirectFn = handleGrepDirect, readCachedIndexContentFn = readCachedIndexContent, writeCachedIndexContentFn = writeCachedIndexContent, runVirtualShellFn = runVirtualShell, shellBundle = SHELL_BUNDLE, logFn = log4 } = deps; const cmd = input.tool_input?.command ?? ""; - log3(`hook fired: cmd=${cmd}`); + logFn(`hook fired: cmd=${cmd}`); if (!touchesMemory(cmd)) - return; + return { action: "pass" }; const rewritten = rewritePaths(cmd); if (!isSafe(rewritten)) { - const guidance = "This command is not supported for ~/.deeplake/memory/ operations. Only bash builtins are available: cat, ls, grep, echo, jq, head, tail, sed, awk, wc, sort, find, etc. Do NOT use python, python3, node, curl, or other interpreters. Rewrite your command using only bash tools and retry."; - log3(`unsupported command, returning guidance: ${rewritten}`); - process.stdout.write(guidance); - process.exit(0); + const guidance = buildUnsupportedGuidance(); + logFn(`unsupported command, returning guidance: ${rewritten}`); + return { + action: "guide", + output: guidance, + rewrittenCommand: rewritten + }; } - const config = loadConfig(); if (config) { const table = process.env["HIVEMIND_TABLE"] ?? "memory"; - const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, table); + const sessionsTable = process.env["HIVEMIND_SESSIONS_TABLE"] ?? "sessions"; + const api = createApi(table, config); + const readVirtualPathContentsWithCache = async (cachePaths) => { + const uniquePaths = [...new Set(cachePaths)]; + const result2 = new Map(uniquePaths.map((path) => [path, null])); + const cachedIndex = uniquePaths.includes("/index.md") ? readCachedIndexContentFn(input.session_id) : null; + const remainingPaths = cachedIndex === null ? uniquePaths : uniquePaths.filter((path) => path !== "/index.md"); + if (cachedIndex !== null) { + result2.set("/index.md", cachedIndex); + } + if (remainingPaths.length > 0) { + const fetched = await readVirtualPathContentsFn(api, table, sessionsTable, remainingPaths); + for (const [path, content] of fetched) + result2.set(path, content); + } + const fetchedIndex = result2.get("/index.md"); + if (typeof fetchedIndex === "string") { + writeCachedIndexContentFn(input.session_id, fetchedIndex); + } + return result2; + }; try { - { - let virtualPath = null; - let lineLimit = 0; - let fromEnd = false; - const catCmd = rewritten.replace(/\s+2>\S+/g, "").trim(); - const catPipeHead = catCmd.match(/^cat\s+(\S+?)\s*(?:\|[^|]*)*\|\s*head\s+(?:-n?\s*)?(-?\d+)\s*$/); - if (catPipeHead) { - virtualPath = catPipeHead[1]; - lineLimit = Math.abs(parseInt(catPipeHead[2], 10)); - } - if (!virtualPath) { - const catMatch = catCmd.match(/^cat\s+(\S+)\s*$/); - if (catMatch) - virtualPath = catMatch[1]; - } - if (!virtualPath) { - const headMatch = rewritten.match(/^head\s+(?:-n\s*)?(-?\d+)\s+(\S+)\s*$/) ?? rewritten.match(/^head\s+(\S+)\s*$/); - if (headMatch) { - if (headMatch[2]) { - virtualPath = headMatch[2]; - lineLimit = Math.abs(parseInt(headMatch[1], 10)); - } else { - virtualPath = headMatch[1]; - lineLimit = 10; - } + const compiled = await executeCompiledBashCommandFn(api, table, sessionsTable, rewritten, { + readVirtualPathContentsFn: async (_api, _memoryTable, _sessionsTable, cachePaths) => readVirtualPathContentsWithCache(cachePaths) + }); + if (compiled !== null) { + return { action: "block", output: compiled, rewrittenCommand: rewritten }; + } + let virtualPath = null; + let lineLimit = 0; + let fromEnd = false; + const catCmd = rewritten.replace(/\s+2>\S+/g, "").trim(); + const catPipeHead = catCmd.match(/^cat\s+(\S+?)\s*(?:\|[^|]*)*\|\s*head\s+(?:-n?\s*)?(-?\d+)\s*$/); + if (catPipeHead) { + virtualPath = catPipeHead[1]; + lineLimit = Math.abs(parseInt(catPipeHead[2], 10)); + } + if (!virtualPath) { + const catMatch = catCmd.match(/^cat\s+(\S+)\s*$/); + if (catMatch) + virtualPath = catMatch[1]; + } + if (!virtualPath) { + const headMatch = rewritten.match(/^head\s+(?:-n\s*)?(-?\d+)\s+(\S+)\s*$/) ?? rewritten.match(/^head\s+(\S+)\s*$/); + if (headMatch) { + if (headMatch[2]) { + virtualPath = headMatch[2]; + lineLimit = Math.abs(parseInt(headMatch[1], 10)); + } else { + virtualPath = headMatch[1]; + lineLimit = 10; } } - if (!virtualPath) { - const tailMatch = rewritten.match(/^tail\s+(?:-n\s*)?(-?\d+)\s+(\S+)\s*$/) ?? rewritten.match(/^tail\s+(\S+)\s*$/); - if (tailMatch) { - fromEnd = true; - if (tailMatch[2]) { - virtualPath = tailMatch[2]; - lineLimit = Math.abs(parseInt(tailMatch[1], 10)); - } else { - virtualPath = tailMatch[1]; - lineLimit = 10; - } + } + if (!virtualPath) { + const tailMatch = rewritten.match(/^tail\s+(?:-n\s*)?(-?\d+)\s+(\S+)\s*$/) ?? rewritten.match(/^tail\s+(\S+)\s*$/); + if (tailMatch) { + fromEnd = true; + if (tailMatch[2]) { + virtualPath = tailMatch[2]; + lineLimit = Math.abs(parseInt(tailMatch[1], 10)); + } else { + virtualPath = tailMatch[1]; + lineLimit = 10; } } - if (!virtualPath) { - const wcMatch = rewritten.match(/^wc\s+-l\s+(\S+)\s*$/); - if (wcMatch) { - virtualPath = wcMatch[1]; - lineLimit = -1; - } + } + if (!virtualPath) { + const wcMatch = rewritten.match(/^wc\s+-l\s+(\S+)\s*$/); + if (wcMatch) { + virtualPath = wcMatch[1]; + lineLimit = -1; } - if (virtualPath && !virtualPath.endsWith("/")) { - const sessionsTable = process.env["HIVEMIND_SESSIONS_TABLE"] ?? "sessions"; - const isSession = virtualPath.startsWith("/sessions/"); - log3(`direct read: ${virtualPath}`); - let content = null; - if (isSession) { - const rows = await api.query(`SELECT message::text AS content FROM "${sessionsTable}" WHERE path = '${sqlStr(virtualPath)}' LIMIT 1`); - if (rows.length > 0 && rows[0]["content"]) - content = rows[0]["content"]; - } else { - const rows = await api.query(`SELECT summary FROM "${table}" WHERE path = '${sqlStr(virtualPath)}' LIMIT 1`); - if (rows.length > 0 && rows[0]["summary"]) { - content = rows[0]["summary"]; - } else if (virtualPath === "/index.md") { - const idxRows = await api.query(`SELECT path, project, description, creation_date FROM "${table}" WHERE path LIKE '/summaries/%' ORDER BY creation_date DESC`); - const lines = ["# Memory Index", "", `${idxRows.length} sessions:`, ""]; - for (const r of idxRows) { - const p = r["path"]; - const proj = r["project"] || ""; - const desc = (r["description"] || "").slice(0, 120); - const date = (r["creation_date"] || "").slice(0, 10); - lines.push(`- [${p}](${p}) ${date} ${proj ? `[${proj}]` : ""} ${desc}`); - } - content = lines.join("\n"); - } + } + if (virtualPath && !virtualPath.endsWith("/")) { + logFn(`direct read: ${virtualPath}`); + let content = virtualPath === "/index.md" ? readCachedIndexContentFn(input.session_id) : null; + if (content === null) { + content = await readVirtualPathContentFn(api, table, sessionsTable, virtualPath); + } + if (content === null && virtualPath === "/index.md") { + const idxRows = await api.query(`SELECT path, project, description, creation_date FROM "${table}" WHERE path LIKE '/summaries/%' ORDER BY creation_date DESC`); + content = buildIndexContent(idxRows); + } + if (content !== null) { + if (virtualPath === "/index.md") { + writeCachedIndexContentFn(input.session_id, content); } - if (content !== null) { - if (lineLimit === -1) { - blockWithContent(`${content.split("\n").length} ${virtualPath}`); - } - if (lineLimit > 0) { - const lines = content.split("\n"); - content = fromEnd ? lines.slice(-lineLimit).join("\n") : lines.slice(0, lineLimit).join("\n"); - } - blockWithContent(content); + if (lineLimit === -1) { + return { action: "block", output: `${content.split("\n").length} ${virtualPath}`, rewrittenCommand: rewritten }; } + if (lineLimit > 0) { + const lines = content.split("\n"); + content = fromEnd ? lines.slice(-lineLimit).join("\n") : lines.slice(0, lineLimit).join("\n"); + } + return { action: "block", output: content, rewrittenCommand: rewritten }; } } const lsMatch = rewritten.match(/^ls\s+(?:-[a-zA-Z]+\s+)*(\S+)?\s*$/); if (lsMatch) { const dir = (lsMatch[1] ?? "/").replace(/\/+$/, "") || "/"; const isLong = /\s-[a-zA-Z]*l/.test(rewritten); - log3(`direct ls: ${dir}`); - const rows = await api.query(`SELECT path, size_bytes FROM "${table}" WHERE path LIKE '${sqlLike(dir === "/" ? "" : dir)}/%' ORDER BY path`); + logFn(`direct ls: ${dir}`); + const rows = await listVirtualPathRowsFn(api, table, sessionsTable, dir); const entries = /* @__PURE__ */ new Map(); - const prefix = dir === "/" ? "/" : dir + "/"; + const prefix = dir === "/" ? "/" : `${dir}/`; for (const row of rows) { - const p = row["path"]; - if (!p.startsWith(prefix) && dir !== "/") + const path = row["path"]; + if (!path.startsWith(prefix) && dir !== "/") continue; - const rest = dir === "/" ? p.slice(1) : p.slice(prefix.length); + const rest = dir === "/" ? path.slice(1) : path.slice(prefix.length); const slash = rest.indexOf("/"); const name = slash === -1 ? rest : rest.slice(0, slash); if (!name) @@ -996,50 +2027,74 @@ async function main() { lines.push(name + (info.isDir ? "/" : "")); } } - blockWithContent(lines.join("\n")); - } else { - blockWithContent(`ls: cannot access '${dir}': No such file or directory`); + return { action: "block", output: lines.join("\n"), rewrittenCommand: rewritten }; } + return { + action: "block", + output: `ls: cannot access '${dir}': No such file or directory`, + rewrittenCommand: rewritten + }; } - { - const findMatch = rewritten.match(/^find\s+(\S+)\s+(?:-type\s+\S+\s+)?-name\s+'([^']+)'/); - if (findMatch) { - const dir = findMatch[1].replace(/\/+$/, "") || "/"; - const namePattern = sqlLike(findMatch[2]).replace(/\*/g, "%").replace(/\?/g, "_"); - const sessionsTable = process.env["HIVEMIND_SESSIONS_TABLE"] ?? "sessions"; - const isSessionDir = dir === "/sessions" || dir.startsWith("/sessions/"); - const findTable = isSessionDir ? sessionsTable : table; - log3(`direct find: ${dir} -name '${findMatch[2]}'`); - const rows = await api.query(`SELECT path FROM "${findTable}" WHERE path LIKE '${sqlLike(dir === "/" ? "" : dir)}/%' AND filename LIKE '${namePattern}' ORDER BY path`); - let result2 = rows.map((r) => r["path"]).join("\n") || ""; - if (/\|\s*wc\s+-l\s*$/.test(rewritten)) { - result2 = String(rows.length); - } - blockWithContent(result2 || "(no matches)"); - } + const findMatch = rewritten.match(/^find\s+(\S+)\s+(?:-type\s+\S+\s+)?-name\s+'([^']+)'/); + if (findMatch) { + const dir = findMatch[1].replace(/\/+$/, "") || "/"; + const namePattern = sqlLike(findMatch[2]).replace(/\*/g, "%").replace(/\?/g, "_"); + logFn(`direct find: ${dir} -name '${findMatch[2]}'`); + const paths = await findVirtualPathsFn(api, table, sessionsTable, dir, namePattern); + let result2 = paths.join("\n") || ""; + if (/\|\s*wc\s+-l\s*$/.test(rewritten)) + result2 = String(paths.length); + return { + action: "block", + output: result2 || "(no matches)", + rewrittenCommand: rewritten + }; } const grepParams = parseBashGrep(rewritten); if (grepParams) { - const sessionsTable = process.env["HIVEMIND_SESSIONS_TABLE"] ?? "sessions"; - log3(`direct grep: pattern=${grepParams.pattern} path=${grepParams.targetPath}`); - const result2 = await handleGrepDirect(api, table, sessionsTable, grepParams); + logFn(`direct grep: pattern=${grepParams.pattern} path=${grepParams.targetPath}`); + const result2 = await handleGrepDirectFn(api, table, sessionsTable, grepParams); if (result2 !== null) { - blockWithContent(result2); + return { action: "block", output: result2, rewrittenCommand: rewritten }; } } } catch (e) { - log3(`direct query failed, falling back to shell: ${e.message}`); + logFn(`direct query failed, falling back to shell: ${e.message}`); } } - log3(`intercepted \u2192 running via virtual shell: ${rewritten}`); - const result = runVirtualShell(rewritten); - if (result) { - blockWithContent(result); - } else { - blockWithContent("[Deeplake Memory] Command returned empty or the file does not exist in cloud storage."); + logFn(`intercepted \u2192 running via virtual shell: ${rewritten}`); + const result = runVirtualShellFn(rewritten, shellBundle, logFn); + return { + action: "block", + output: result || "[Deeplake Memory] Command returned empty or the file does not exist in cloud storage.", + rewrittenCommand: rewritten + }; +} +async function main() { + const input = await readStdin(); + const decision = await processCodexPreToolUse(input); + if (decision.action === "pass") + return; + if (decision.action === "guide") { + if (decision.output) + process.stdout.write(decision.output); + process.exit(0); } + if (decision.output) + process.stderr.write(decision.output); + process.exit(2); } -main().catch((e) => { - log3(`fatal: ${e.message}`); - process.exit(0); -}); +if (isDirectRun(import.meta.url)) { + main().catch((e) => { + log4(`fatal: ${e.message}`); + process.exit(0); + }); +} +export { + buildUnsupportedGuidance, + isSafe, + processCodexPreToolUse, + rewritePaths, + runVirtualShell, + touchesMemory +}; diff --git a/codex/bundle/session-start-setup.js b/codex/bundle/session-start-setup.js index 02be970..21609fa 100755 --- a/codex/bundle/session-start-setup.js +++ b/codex/bundle/session-start-setup.js @@ -2,7 +2,7 @@ // dist/src/hooks/codex/session-start-setup.js import { fileURLToPath } from "node:url"; -import { dirname as dirname2, join as join6 } from "node:path"; +import { dirname as dirname2, join as join7 } from "node:path"; import { execSync as execSync2 } from "node:child_process"; import { homedir as homedir4 } from "node:os"; @@ -66,6 +66,9 @@ function loadConfig() { // dist/src/deeplake-api.js import { randomUUID } from "node:crypto"; +import { existsSync as existsSync3, mkdirSync as mkdirSync2, readFileSync as readFileSync3, writeFileSync as writeFileSync2 } from "node:fs"; +import { join as join4 } from "node:path"; +import { tmpdir } from "node:os"; // dist/src/utils/debug.js import { appendFileSync } from "node:fs"; @@ -90,27 +93,48 @@ function sqlStr(value) { // dist/src/deeplake-api.js var log2 = (msg) => log("sdk", msg); -var TRACE_SQL = (process.env.HIVEMIND_TRACE_SQL ?? process.env.DEEPLAKE_TRACE_SQL) === "1" || (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; -var DEBUG_FILE_LOG = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; function summarizeSql(sql, maxLen = 220) { const compact = sql.replace(/\s+/g, " ").trim(); return compact.length > maxLen ? `${compact.slice(0, maxLen)}...` : compact; } function traceSql(msg) { - if (!TRACE_SQL) + const traceEnabled = (process.env.HIVEMIND_TRACE_SQL ?? process.env.DEEPLAKE_TRACE_SQL) === "1" || (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; + if (!traceEnabled) return; process.stderr.write(`[deeplake-sql] ${msg} `); - if (DEBUG_FILE_LOG) + const debugFileLog = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; + if (debugFileLog) log2(msg); } var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; var MAX_CONCURRENCY = 5; +var QUERY_TIMEOUT_MS = Number(process.env["HIVEMIND_QUERY_TIMEOUT_MS"] ?? process.env["DEEPLAKE_QUERY_TIMEOUT_MS"] ?? 1e4); +var INDEX_MARKER_TTL_MS = Number(process.env["HIVEMIND_INDEX_MARKER_TTL_MS"] ?? 6 * 60 * 6e4); function sleep(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } +function isTimeoutError(error) { + const name = error instanceof Error ? error.name.toLowerCase() : ""; + const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase(); + return name.includes("timeout") || name === "aborterror" || message.includes("timeout") || message.includes("timed out"); +} +function isDuplicateIndexError(error) { + const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase(); + return message.includes("duplicate key value violates unique constraint") || message.includes("pg_class_relname_nsp_index") || message.includes("already exists"); +} +function isSessionInsertQuery(sql) { + return /^\s*insert\s+into\s+"[^"]+"\s*\(\s*id\s*,\s*path\s*,\s*filename\s*,\s*message\s*,/i.test(sql); +} +function isTransientHtml403(text) { + const body = text.toLowerCase(); + return body.includes(" Object.fromEntries(raw.columns.map((col, i) => [col, row[i]]))); } const text = await resp.text().catch(() => ""); - if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { + const retryable403 = isSessionInsertQuery(sql) && (resp.status === 401 || resp.status === 403 && (text.length === 0 || isTransientHtml403(text))); + if (attempt < MAX_RETRIES && (RETRYABLE_CODES.has(resp.status) || retryable403)) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`); await sleep(delay); @@ -263,8 +295,61 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + buildLookupIndexName(table, suffix) { + return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); + } + getLookupIndexMarkerPath(table, suffix) { + const markerKey = [ + this.workspaceId, + this.orgId, + table, + suffix + ].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_"); + return join4(getIndexMarkerDir(), `${markerKey}.json`); + } + hasFreshLookupIndexMarker(table, suffix) { + const markerPath = this.getLookupIndexMarkerPath(table, suffix); + if (!existsSync3(markerPath)) + return false; + try { + const raw = JSON.parse(readFileSync3(markerPath, "utf-8")); + const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN; + if (!Number.isFinite(updatedAt) || Date.now() - updatedAt > INDEX_MARKER_TTL_MS) + return false; + return true; + } catch { + return false; + } + } + markLookupIndexReady(table, suffix) { + mkdirSync2(getIndexMarkerDir(), { recursive: true }); + writeFileSync2(this.getLookupIndexMarkerPath(table, suffix), JSON.stringify({ updatedAt: (/* @__PURE__ */ new Date()).toISOString() }), "utf-8"); + } + async ensureLookupIndex(table, suffix, columnsSql) { + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + const indexName = this.buildLookupIndexName(table, suffix); + try { + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" ${columnsSql}`); + this.markLookupIndexReady(table, suffix); + } catch (e) { + if (isDuplicateIndexError(e)) { + this.markLookupIndexReady(table, suffix); + return; + } + log2(`index "${indexName}" skipped: ${e.message}`); + } + } /** List all tables in the workspace (with retry). */ - async listTables() { + async listTables(forceRefresh = false) { + if (!forceRefresh && this._tablesCache) + return [...this._tablesCache]; + const { tables, cacheable } = await this._fetchTables(); + if (cacheable) + this._tablesCache = [...tables]; + return tables; + } + async _fetchTables() { for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { try { const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, { @@ -275,22 +360,25 @@ var DeeplakeApi = class { }); if (resp.ok) { const data = await resp.json(); - return (data.tables ?? []).map((t) => t.table_name); + return { + tables: (data.tables ?? []).map((t) => t.table_name), + cacheable: true + }; } if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { await sleep(BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200); continue; } - return []; + return { tables: [], cacheable: false }; } catch { if (attempt < MAX_RETRIES) { await sleep(BASE_DELAY_MS * Math.pow(2, attempt)); continue; } - return []; + return { tables: [], cacheable: false }; } } - return []; + return { tables: [], cacheable: false }; } /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */ async ensureTable(name) { @@ -300,6 +388,8 @@ var DeeplakeApi = class { log2(`table "${tbl}" not found, creating`); await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); + if (!tables.includes(tbl)) + this._tablesCache = [...tables, tbl]; } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ @@ -309,7 +399,10 @@ var DeeplakeApi = class { log2(`table "${name}" not found, creating`); await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${name}" created`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; } + await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); } }; @@ -331,22 +424,22 @@ function readStdin() { } // dist/src/utils/version-check.js -import { readFileSync as readFileSync3 } from "node:fs"; -import { dirname, join as join4 } from "node:path"; +import { readFileSync as readFileSync4 } from "node:fs"; +import { dirname, join as join5 } from "node:path"; var GITHUB_RAW_PKG = "https://raw.githubusercontent.com/activeloopai/hivemind/main/package.json"; function getInstalledVersion(bundleDir, pluginManifestDir) { try { - const pluginJson = join4(bundleDir, "..", pluginManifestDir, "plugin.json"); - const plugin = JSON.parse(readFileSync3(pluginJson, "utf-8")); + const pluginJson = join5(bundleDir, "..", pluginManifestDir, "plugin.json"); + const plugin = JSON.parse(readFileSync4(pluginJson, "utf-8")); if (plugin.version) return plugin.version; } catch { } let dir = bundleDir; for (let i = 0; i < 5; i++) { - const candidate = join4(dir, "package.json"); + const candidate = join5(dir, "package.json"); try { - const pkg = JSON.parse(readFileSync3(candidate, "utf-8")); + const pkg = JSON.parse(readFileSync4(candidate, "utf-8")); if ((pkg.name === "hivemind" || pkg.name === "hivemind-codex") && pkg.version) return pkg.version; } catch { @@ -377,15 +470,15 @@ function isNewer(latest, current) { } // dist/src/utils/wiki-log.js -import { mkdirSync as mkdirSync2, appendFileSync as appendFileSync2 } from "node:fs"; -import { join as join5 } from "node:path"; +import { mkdirSync as mkdirSync3, appendFileSync as appendFileSync2 } from "node:fs"; +import { join as join6 } from "node:path"; function makeWikiLogger(hooksDir, filename = "deeplake-wiki.log") { - const path = join5(hooksDir, filename); + const path = join6(hooksDir, filename); return { path, log(msg) { try { - mkdirSync2(hooksDir, { recursive: true }); + mkdirSync3(hooksDir, { recursive: true }); appendFileSync2(path, `[${utcTimestamp()}] ${msg} `); } catch { @@ -397,7 +490,7 @@ function makeWikiLogger(hooksDir, filename = "deeplake-wiki.log") { // dist/src/hooks/codex/session-start-setup.js var log3 = (msg) => log("codex-session-setup", msg); var __bundleDir = dirname2(fileURLToPath(import.meta.url)); -var { log: wikiLog } = makeWikiLogger(join6(homedir4(), ".codex", "hooks")); +var { log: wikiLog } = makeWikiLogger(join7(homedir4(), ".codex", "hooks")); async function createPlaceholder(api, table, sessionId, cwd, userName, orgName, workspaceId) { const summaryPath = `/summaries/${userName}/${sessionId}.md`; const existing = await api.query(`SELECT path FROM "${table}" WHERE path = '${sqlStr(summaryPath)}' LIMIT 1`); diff --git a/codex/bundle/shell/deeplake-shell.js b/codex/bundle/shell/deeplake-shell.js index 2d0b237..0793149 100755 --- a/codex/bundle/shell/deeplake-shell.js +++ b/codex/bundle/shell/deeplake-shell.js @@ -46081,14 +46081,14 @@ var require_turndown_cjs = __commonJS({ } else if (node.nodeType === 1) { replacement = replacementForNode.call(self2, node); } - return join6(output, replacement); + return join7(output, replacement); }, ""); } function postProcess(output) { var self2 = this; this.rules.forEach(function(rule) { if (typeof rule.append === "function") { - output = join6(output, rule.append(self2.options)); + output = join7(output, rule.append(self2.options)); } }); return output.replace(/^[\t\r\n]+/, "").replace(/[\t\r\n\s]+$/, ""); @@ -46100,7 +46100,7 @@ var require_turndown_cjs = __commonJS({ if (whitespace.leading || whitespace.trailing) content = content.trim(); return whitespace.leading + rule.replacement(content, node, this.options) + whitespace.trailing; } - function join6(output, replacement) { + function join7(output, replacement) { var s12 = trimTrailingNewlines(output); var s22 = trimLeadingNewlines(replacement); var nls = Math.max(output.length - s12.length, replacement.length - s22.length); @@ -66758,6 +66758,9 @@ function loadConfig() { // dist/src/deeplake-api.js import { randomUUID } from "node:crypto"; +import { existsSync as existsSync3, mkdirSync, readFileSync as readFileSync2, writeFileSync } from "node:fs"; +import { join as join6 } from "node:path"; +import { tmpdir } from "node:os"; // dist/src/utils/debug.js import { appendFileSync } from "node:fs"; @@ -66782,27 +66785,48 @@ function sqlLike(value) { // dist/src/deeplake-api.js var log2 = (msg) => log("sdk", msg); -var TRACE_SQL = (process.env.HIVEMIND_TRACE_SQL ?? process.env.DEEPLAKE_TRACE_SQL) === "1" || (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; -var DEBUG_FILE_LOG = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; function summarizeSql(sql, maxLen = 220) { const compact = sql.replace(/\s+/g, " ").trim(); return compact.length > maxLen ? `${compact.slice(0, maxLen)}...` : compact; } function traceSql(msg) { - if (!TRACE_SQL) + const traceEnabled = (process.env.HIVEMIND_TRACE_SQL ?? process.env.DEEPLAKE_TRACE_SQL) === "1" || (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; + if (!traceEnabled) return; process.stderr.write(`[deeplake-sql] ${msg} `); - if (DEBUG_FILE_LOG) + const debugFileLog = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; + if (debugFileLog) log2(msg); } var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; var MAX_CONCURRENCY = 5; +var QUERY_TIMEOUT_MS = Number(process.env["HIVEMIND_QUERY_TIMEOUT_MS"] ?? process.env["DEEPLAKE_QUERY_TIMEOUT_MS"] ?? 1e4); +var INDEX_MARKER_TTL_MS = Number(process.env["HIVEMIND_INDEX_MARKER_TTL_MS"] ?? 6 * 60 * 6e4); function sleep(ms3) { return new Promise((resolve5) => setTimeout(resolve5, ms3)); } +function isTimeoutError(error) { + const name = error instanceof Error ? error.name.toLowerCase() : ""; + const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase(); + return name.includes("timeout") || name === "aborterror" || message.includes("timeout") || message.includes("timed out"); +} +function isDuplicateIndexError(error) { + const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase(); + return message.includes("duplicate key value violates unique constraint") || message.includes("pg_class_relname_nsp_index") || message.includes("already exists"); +} +function isSessionInsertQuery(sql) { + return /^\s*insert\s+into\s+"[^"]+"\s*\(\s*id\s*,\s*path\s*,\s*filename\s*,\s*message\s*,/i.test(sql); +} +function isTransientHtml403(text) { + const body = text.toLowerCase(); + return body.includes(" Object.fromEntries(raw.columns.map((col, i11) => [col, row[i11]]))); } const text = await resp.text().catch(() => ""); - if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { + const retryable403 = isSessionInsertQuery(sql) && (resp.status === 401 || resp.status === 403 && (text.length === 0 || isTransientHtml403(text))); + if (attempt < MAX_RETRIES && (RETRYABLE_CODES.has(resp.status) || retryable403)) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`); await sleep(delay); @@ -66955,8 +66987,61 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + buildLookupIndexName(table, suffix) { + return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); + } + getLookupIndexMarkerPath(table, suffix) { + const markerKey = [ + this.workspaceId, + this.orgId, + table, + suffix + ].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_"); + return join6(getIndexMarkerDir(), `${markerKey}.json`); + } + hasFreshLookupIndexMarker(table, suffix) { + const markerPath = this.getLookupIndexMarkerPath(table, suffix); + if (!existsSync3(markerPath)) + return false; + try { + const raw = JSON.parse(readFileSync2(markerPath, "utf-8")); + const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN; + if (!Number.isFinite(updatedAt) || Date.now() - updatedAt > INDEX_MARKER_TTL_MS) + return false; + return true; + } catch { + return false; + } + } + markLookupIndexReady(table, suffix) { + mkdirSync(getIndexMarkerDir(), { recursive: true }); + writeFileSync(this.getLookupIndexMarkerPath(table, suffix), JSON.stringify({ updatedAt: (/* @__PURE__ */ new Date()).toISOString() }), "utf-8"); + } + async ensureLookupIndex(table, suffix, columnsSql) { + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + const indexName = this.buildLookupIndexName(table, suffix); + try { + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" ${columnsSql}`); + this.markLookupIndexReady(table, suffix); + } catch (e6) { + if (isDuplicateIndexError(e6)) { + this.markLookupIndexReady(table, suffix); + return; + } + log2(`index "${indexName}" skipped: ${e6.message}`); + } + } /** List all tables in the workspace (with retry). */ - async listTables() { + async listTables(forceRefresh = false) { + if (!forceRefresh && this._tablesCache) + return [...this._tablesCache]; + const { tables, cacheable } = await this._fetchTables(); + if (cacheable) + this._tablesCache = [...tables]; + return tables; + } + async _fetchTables() { for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { try { const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, { @@ -66967,22 +67052,25 @@ var DeeplakeApi = class { }); if (resp.ok) { const data = await resp.json(); - return (data.tables ?? []).map((t6) => t6.table_name); + return { + tables: (data.tables ?? []).map((t6) => t6.table_name), + cacheable: true + }; } if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { await sleep(BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200); continue; } - return []; + return { tables: [], cacheable: false }; } catch { if (attempt < MAX_RETRIES) { await sleep(BASE_DELAY_MS * Math.pow(2, attempt)); continue; } - return []; + return { tables: [], cacheable: false }; } } - return []; + return { tables: [], cacheable: false }; } /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */ async ensureTable(name) { @@ -66992,6 +67080,8 @@ var DeeplakeApi = class { log2(`table "${tbl}" not found, creating`); await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); + if (!tables.includes(tbl)) + this._tablesCache = [...tables, tbl]; } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ @@ -67001,673 +67091,1090 @@ var DeeplakeApi = class { log2(`table "${name}" not found, creating`); await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${name}" created`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; } + await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); } }; // dist/src/shell/deeplake-fs.js import { basename as basename4, posix } from "node:path"; import { randomUUID as randomUUID2 } from "node:crypto"; -var BATCH_SIZE = 10; -var FLUSH_DEBOUNCE_MS = 200; -function normPath(p22) { - const r10 = posix.normalize(p22.startsWith("/") ? p22 : "/" + p22); - return r10 === "/" ? r10 : r10.replace(/\/$/, ""); + +// dist/src/shell/grep-core.js +var TOOL_INPUT_FIELDS = [ + "command", + "file_path", + "path", + "pattern", + "prompt", + "subagent_type", + "query", + "url", + "notebook_path", + "old_string", + "new_string", + "content", + "skill", + "args", + "taskId", + "status", + "subject", + "description", + "to", + "message", + "summary", + "max_results" +]; +var TOOL_RESPONSE_DROP = /* @__PURE__ */ new Set([ + // Note: `stderr` is intentionally NOT in this set. The `stdout` high-signal + // branch below already de-dupes it for the common case (appends as suffix + // when non-empty). If a tool response has ONLY `stderr` and no `stdout` + // (hard-failure on some tools), the generic cleanup preserves it so the + // error message reaches Claude instead of collapsing to `[ok]`. + "interrupted", + "isImage", + "noOutputExpected", + "type", + "structuredPatch", + "userModified", + "originalFile", + "replaceAll", + "totalDurationMs", + "totalTokens", + "totalToolUseCount", + "usage", + "toolStats", + "durationMs", + "durationSeconds", + "bytes", + "code", + "codeText", + "agentId", + "agentType", + "verificationNudgeNeeded", + "numLines", + "numFiles", + "truncated", + "statusChange", + "updatedFields", + "isAgent", + "success" +]); +function maybeParseJson(v27) { + if (typeof v27 !== "string") + return v27; + const s10 = v27.trim(); + if (s10[0] !== "{" && s10[0] !== "[") + return v27; + try { + return JSON.parse(s10); + } catch { + return v27; + } } -function parentOf(p22) { - const i11 = p22.lastIndexOf("/"); - return i11 <= 0 ? "/" : p22.slice(0, i11); +function snakeCase(k17) { + return k17.replace(/([A-Z])/g, "_$1").toLowerCase(); } -function guessMime(filename) { - const ext2 = filename.split(".").pop()?.toLowerCase() ?? ""; - return { - json: "application/json", - md: "text/markdown", - txt: "text/plain", - js: "text/javascript", - ts: "text/typescript", - html: "text/html", - css: "text/css" - }[ext2] ?? "text/plain"; +function camelCase(k17) { + return k17.replace(/_([a-z])/g, (_16, c15) => c15.toUpperCase()); } -function fsErr(code, msg, path2) { - return Object.assign(new Error(`${code}: ${msg}, '${path2}'`), { code }); +function formatToolInput(raw) { + const p22 = maybeParseJson(raw); + if (typeof p22 !== "object" || p22 === null) + return String(p22 ?? ""); + const parts = []; + for (const k17 of TOOL_INPUT_FIELDS) { + if (p22[k17] === void 0) + continue; + const v27 = p22[k17]; + parts.push(`${k17}: ${typeof v27 === "string" ? v27 : JSON.stringify(v27)}`); + } + for (const k17 of ["glob", "output_mode", "limit", "offset"]) { + if (p22[k17] !== void 0) + parts.push(`${k17}: ${p22[k17]}`); + } + return parts.length ? parts.join("\n") : JSON.stringify(p22); } -var DeeplakeFs = class _DeeplakeFs { - client; - table; - mountPoint; - // path → Buffer (content) or null (exists but not fetched yet) - files = /* @__PURE__ */ new Map(); - meta = /* @__PURE__ */ new Map(); - // dir path → Set of immediate child names - dirs = /* @__PURE__ */ new Map(); - // batched writes pending SQL flush - pending = /* @__PURE__ */ new Map(); - // paths that have been flushed (INSERT) at least once — subsequent flushes use UPDATE - flushed = /* @__PURE__ */ new Set(); - /** Number of files loaded from the server during bootstrap. */ - get fileCount() { - return this.files.size; +function formatToolResponse(raw, inp, toolName) { + const r10 = maybeParseJson(raw); + if (typeof r10 !== "object" || r10 === null) + return String(r10 ?? ""); + if (toolName === "Edit" || toolName === "Write" || toolName === "MultiEdit") { + return r10.filePath ? `[wrote ${r10.filePath}]` : "[ok]"; } - flushTimer = null; - // serialize flushes - flushChain = Promise.resolve(); - // Paths that live in the sessions table (multi-row, read by concatenation) - sessionPaths = /* @__PURE__ */ new Set(); - sessionsTable = null; - constructor(client, table, mountPoint) { - this.client = client; - this.table = table; - this.mountPoint = mountPoint; - this.dirs.set(mountPoint, /* @__PURE__ */ new Set()); - if (mountPoint !== "/") - this.dirs.set("/", /* @__PURE__ */ new Set([mountPoint.slice(1)])); + if (typeof r10.stdout === "string") { + const stderr = r10.stderr; + return r10.stdout + (stderr ? ` +stderr: ${stderr}` : ""); } - static async create(client, table, mount = "/memory", sessionsTable) { - const fs3 = new _DeeplakeFs(client, table, mount); - fs3.sessionsTable = sessionsTable ?? null; - await client.ensureTable(); - let sessionSyncOk = true; - const memoryBootstrap = (async () => { - const sql = `SELECT path, size_bytes, mime_type FROM "${table}" ORDER BY path`; - try { - const rows = await client.query(sql); - for (const row of rows) { - const p22 = row["path"]; - fs3.files.set(p22, null); - fs3.meta.set(p22, { - size: Number(row["size_bytes"] ?? 0), - mime: row["mime_type"] ?? "application/octet-stream", - mtime: /* @__PURE__ */ new Date() - }); - fs3.addToTree(p22); - fs3.flushed.add(p22); - } - } catch { - } - })(); - const sessionsBootstrap = sessionsTable && sessionSyncOk ? (async () => { - try { - const sessionRows = await client.query(`SELECT path, SUM(size_bytes) as total_size FROM "${sessionsTable}" GROUP BY path ORDER BY path`); - for (const row of sessionRows) { - const p22 = row["path"]; - if (!fs3.files.has(p22)) { - fs3.files.set(p22, null); - fs3.meta.set(p22, { - size: Number(row["total_size"] ?? 0), - mime: "application/x-ndjson", - mtime: /* @__PURE__ */ new Date() - }); - fs3.addToTree(p22); - } - fs3.sessionPaths.add(p22); - } - } catch { - } - })() : Promise.resolve(); - await Promise.all([memoryBootstrap, sessionsBootstrap]); - return fs3; + if (typeof r10.content === "string") + return r10.content; + if (r10.file && typeof r10.file === "object") { + const f11 = r10.file; + if (typeof f11.content === "string") + return `[${f11.filePath ?? ""}] +${f11.content}`; + if (typeof f11.base64 === "string") + return `[binary ${f11.filePath ?? ""}: ${f11.base64.length} base64 chars]`; } - // ── tree management ─────────────────────────────────────────────────────── - addToTree(filePath) { - const segs = filePath.split("/").filter(Boolean); - for (let d15 = 0; d15 < segs.length; d15++) { - const dir = d15 === 0 ? "/" : "/" + segs.slice(0, d15).join("/"); - if (!this.dirs.has(dir)) - this.dirs.set(dir, /* @__PURE__ */ new Set()); - this.dirs.get(dir).add(segs[d15]); - } + if (Array.isArray(r10.filenames)) + return r10.filenames.join("\n"); + if (Array.isArray(r10.matches)) { + return r10.matches.map((m26) => typeof m26 === "string" ? m26 : JSON.stringify(m26)).join("\n"); } - removeFromTree(filePath) { - this.files.delete(filePath); - this.meta.delete(filePath); - this.pending.delete(filePath); - this.flushed.delete(filePath); - const parent = parentOf(filePath); - this.dirs.get(parent)?.delete(basename4(filePath)); + if (Array.isArray(r10.results)) { + return r10.results.map((x28) => typeof x28 === "string" ? x28 : x28?.title ?? x28?.url ?? JSON.stringify(x28)).join("\n"); } - // ── flush / write batching ──────────────────────────────────────────────── - scheduleFlush() { - if (this.flushTimer !== null) - return; - this.flushTimer = setTimeout(() => { - this.flush().catch(() => { - }); - }, FLUSH_DEBOUNCE_MS); + const inpObj = maybeParseJson(inp); + const kept = {}; + for (const [k17, v27] of Object.entries(r10)) { + if (TOOL_RESPONSE_DROP.has(k17)) + continue; + if (v27 === "" || v27 === false || v27 == null) + continue; + if (typeof inpObj === "object" && inpObj) { + const inObj = inpObj; + if (k17 in inObj && JSON.stringify(inObj[k17]) === JSON.stringify(v27)) + continue; + const snake = snakeCase(k17); + if (snake in inObj && JSON.stringify(inObj[snake]) === JSON.stringify(v27)) + continue; + const camel = camelCase(k17); + if (camel in inObj && JSON.stringify(inObj[camel]) === JSON.stringify(v27)) + continue; + } + kept[k17] = v27; } - async flush() { - this.flushChain = this.flushChain.then(() => this._doFlush()); - return this.flushChain; + return Object.keys(kept).length ? JSON.stringify(kept) : "[ok]"; +} +function formatToolCall(obj) { + return `[tool:${obj?.tool_name ?? "?"}] +input: ${formatToolInput(obj?.tool_input)} +response: ${formatToolResponse(obj?.tool_response, obj?.tool_input, obj?.tool_name)}`; +} +function normalizeContent(path2, raw) { + if (!path2.includes("/sessions/")) + return raw; + if (!raw || raw[0] !== "{") + return raw; + let obj; + try { + obj = JSON.parse(raw); + } catch { + return raw; } - async _doFlush() { - if (this.pending.size === 0) - return; - if (this.flushTimer !== null) { - clearTimeout(this.flushTimer); - this.flushTimer = null; - } - const rows = [...this.pending.values()]; - this.pending.clear(); - const results = await Promise.allSettled(rows.map((r10) => this.upsertRow(r10))); - let failures = 0; - for (let i11 = 0; i11 < results.length; i11++) { - if (results[i11].status === "rejected") { - if (!this.pending.has(rows[i11].path)) { - this.pending.set(rows[i11].path, rows[i11]); - } - failures++; - } - } - if (failures > 0) { - throw new Error(`flush: ${failures}/${rows.length} writes failed and were re-queued`); + if (Array.isArray(obj.turns)) { + const header = []; + if (obj.date_time) + header.push(`date: ${obj.date_time}`); + if (obj.speakers) { + const s10 = obj.speakers; + const names = [s10.speaker_a, s10.speaker_b].filter(Boolean).join(", "); + if (names) + header.push(`speakers: ${names}`); } + const lines = obj.turns.map((t6) => { + const sp = String(t6?.speaker ?? t6?.name ?? "?").trim(); + const tx = String(t6?.text ?? t6?.content ?? "").replace(/\s+/g, " ").trim(); + const tag = t6?.dia_id ? `[${t6.dia_id}] ` : ""; + return `${tag}${sp}: ${tx}`; + }); + const out2 = [...header, ...lines].join("\n"); + return out2.trim() ? out2 : raw; } - async upsertRow(r10) { - const text = sqlStr(r10.contentText); - const p22 = sqlStr(r10.path); - const fname = sqlStr(r10.filename); - const mime = sqlStr(r10.mimeType); - const ts3 = (/* @__PURE__ */ new Date()).toISOString(); - const cd = r10.creationDate ?? ts3; - const lud = r10.lastUpdateDate ?? ts3; - if (this.flushed.has(r10.path)) { - let setClauses = `filename = '${fname}', summary = E'${text}', mime_type = '${mime}', size_bytes = ${r10.sizeBytes}, last_update_date = '${sqlStr(lud)}'`; - if (r10.project !== void 0) - setClauses += `, project = '${sqlStr(r10.project)}'`; - if (r10.description !== void 0) - setClauses += `, description = '${sqlStr(r10.description)}'`; - await this.client.query(`UPDATE "${this.table}" SET ${setClauses} WHERE path = '${p22}'`); - } else { - const id = randomUUID2(); - const cols = "id, path, filename, summary, mime_type, size_bytes, creation_date, last_update_date" + (r10.project !== void 0 ? ", project" : "") + (r10.description !== void 0 ? ", description" : ""); - const vals = `'${id}', '${p22}', '${fname}', E'${text}', '${mime}', ${r10.sizeBytes}, '${sqlStr(cd)}', '${sqlStr(lud)}'` + (r10.project !== void 0 ? `, '${sqlStr(r10.project)}'` : "") + (r10.description !== void 0 ? `, '${sqlStr(r10.description)}'` : ""); - await this.client.query(`INSERT INTO "${this.table}" (${cols}) VALUES (${vals})`); - this.flushed.add(r10.path); - } + const stripRecalled = (t6) => { + const i11 = t6.indexOf(""); + if (i11 === -1) + return t6; + const j14 = t6.lastIndexOf(""); + if (j14 === -1 || j14 < i11) + return t6; + const head = t6.slice(0, i11); + const tail = t6.slice(j14 + "".length); + return (head + tail).replace(/^\s+/, "").replace(/\n{3,}/g, "\n\n"); + }; + let out = null; + if (obj.type === "user_message") { + out = `[user] ${stripRecalled(String(obj.content ?? ""))}`; + } else if (obj.type === "assistant_message") { + const agent = obj.agent_type ? ` (agent=${obj.agent_type})` : ""; + out = `[assistant${agent}] ${stripRecalled(String(obj.content ?? ""))}`; + } else if (obj.type === "tool_call") { + out = formatToolCall(obj); } - // ── Virtual index.md generation ──────────────────────────────────────────── - async generateVirtualIndex() { - const rows = await this.client.query(`SELECT path, project, description, creation_date, last_update_date FROM "${this.table}" WHERE path LIKE '${sqlStr("/summaries/")}%' ORDER BY last_update_date DESC`); - const sessionPathsByKey = /* @__PURE__ */ new Map(); - for (const sp of this.sessionPaths) { - const hivemind = sp.match(/\/sessions\/[^/]+\/[^/]+_([^.]+)\.jsonl$/); - if (hivemind) { - sessionPathsByKey.set(hivemind[1], sp.slice(1)); - } else { - const fname = sp.split("/").pop() ?? ""; - const stem = fname.replace(/\.[^.]+$/, ""); - if (stem) - sessionPathsByKey.set(stem, sp.slice(1)); - } + if (out === null) + return raw; + const trimmed = out.trim(); + if (!trimmed || trimmed === "[user]" || trimmed === "[assistant]" || /^\[tool:[^\]]*\]\s+input:\s+\{\}\s+response:\s+\{\}$/.test(trimmed)) + return raw; + return out; +} +function buildPathCondition(targetPath) { + if (!targetPath || targetPath === "/") + return ""; + const clean = targetPath.replace(/\/+$/, ""); + if (/[*?]/.test(clean)) { + const likePattern = sqlLike(clean).replace(/\*/g, "%").replace(/\?/g, "_"); + return `path LIKE '${likePattern}' ESCAPE '\\'`; + } + const base = clean.split("/").pop() ?? ""; + if (base.includes(".")) { + return `path = '${sqlStr(clean)}'`; + } + return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%' ESCAPE '\\')`; +} +async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { + const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns } = opts; + const limit = opts.limit ?? 100; + const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : [escapedPattern]; + const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns); + const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns); + const memQuery = `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; + const sessQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; + const rows = await api.query(`SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`); + return rows.map((row) => ({ + path: String(row["path"]), + content: String(row["content"] ?? "") + })); +} +function buildPathFilter(targetPath) { + const condition = buildPathCondition(targetPath); + return condition ? ` AND ${condition}` : ""; +} +function buildPathFilterForTargets(targetPaths) { + if (targetPaths.some((targetPath) => !targetPath || targetPath === "/")) + return ""; + const conditions = [...new Set(targetPaths.map((targetPath) => buildPathCondition(targetPath)).filter((condition) => condition.length > 0))]; + if (conditions.length === 0) + return ""; + if (conditions.length === 1) + return ` AND ${conditions[0]}`; + return ` AND (${conditions.join(" OR ")})`; +} +function extractRegexLiteralPrefilter(pattern) { + if (!pattern) + return null; + const parts = []; + let current = ""; + for (let i11 = 0; i11 < pattern.length; i11++) { + const ch = pattern[i11]; + if (ch === "\\") { + const next = pattern[i11 + 1]; + if (!next) + return null; + if (/[dDsSwWbBAZzGkKpP]/.test(next)) + return null; + current += next; + i11++; + continue; } - const lines = [ - "# Session Index", - "", - "List of all Claude Code sessions with summaries.", - "", - "| Session | Conversation | Created | Last Updated | Project | Description |", - "|---------|-------------|---------|--------------|---------|-------------|" - ]; - for (const row of rows) { - const p22 = row["path"]; - const match2 = p22.match(/\/summaries\/([^/]+)\/([^/]+)\.md$/); - if (!match2) + if (ch === ".") { + if (pattern[i11 + 1] === "*") { + if (current) + parts.push(current); + current = ""; + i11++; continue; - const summaryUser = match2[1]; - const sessionId = match2[2]; - const relPath = `summaries/${summaryUser}/${sessionId}.md`; - const baseName = sessionId.replace(/_summary$/, ""); - const convPath = sessionPathsByKey.get(sessionId) ?? sessionPathsByKey.get(baseName); - const convLink = convPath ? `[messages](${convPath})` : ""; - const project = row["project"] || ""; - const description = row["description"] || ""; - const creationDate = row["creation_date"] || ""; - const lastUpdateDate = row["last_update_date"] || ""; - lines.push(`| [${sessionId}](${relPath}) | ${convLink} | ${creationDate} | ${lastUpdateDate} | ${project} | ${description} |`); + } + return null; } - lines.push(""); - return lines.join("\n"); + if ("|()[]{}+?^$".includes(ch) || ch === "*") + return null; + current += ch; } - // ── batch prefetch ──────────────────────────────────────────────────────── - /** - * Prefetch multiple files into the content cache with a single SQL query. - * Skips paths that are already cached, pending, or session-backed. - * After this call, subsequent readFile() calls for these paths hit cache. - */ - async prefetch(paths) { - const uncached = []; - for (const raw of paths) { - const p22 = normPath(raw); - if (this.files.get(p22) !== null && this.files.get(p22) !== void 0) - continue; - if (this.pending.has(p22)) - continue; - if (this.sessionPaths.has(p22)) - continue; - if (!this.files.has(p22)) - continue; - uncached.push(p22); - } - if (uncached.length === 0) - return; - const inList = uncached.map((p22) => `'${sqlStr(p22)}'`).join(", "); - const rows = await this.client.query(`SELECT path, summary FROM "${this.table}" WHERE path IN (${inList})`); - for (const row of rows) { - const p22 = row["path"]; - const text = row["summary"] ?? ""; - this.files.set(p22, Buffer.from(text, "utf-8")); + if (current) + parts.push(current); + const literal = parts.reduce((best, part) => part.length > best.length ? part : best, ""); + return literal.length >= 2 ? literal : null; +} +function extractRegexAlternationPrefilters(pattern) { + if (!pattern.includes("|")) + return null; + const parts = []; + let current = ""; + let escaped = false; + for (let i11 = 0; i11 < pattern.length; i11++) { + const ch = pattern[i11]; + if (escaped) { + current += `\\${ch}`; + escaped = false; + continue; } - } - // ── IFileSystem: reads ──────────────────────────────────────────────────── - async readFileBuffer(path2) { - const p22 = normPath(path2); - if (this.dirs.has(p22) && !this.files.has(p22)) - throw fsErr("EISDIR", "illegal operation on a directory", p22); - if (!this.files.has(p22)) - throw fsErr("ENOENT", "no such file or directory", p22); - const cached = this.files.get(p22); - if (cached !== null && cached !== void 0) - return cached; - const pend = this.pending.get(p22); - if (pend) { - const buf2 = Buffer.from(pend.contentText, "utf-8"); - this.files.set(p22, buf2); - return buf2; + if (ch === "\\") { + escaped = true; + continue; } - if (this.sessionPaths.has(p22) && this.sessionsTable) { - const rows2 = await this.client.query(`SELECT message FROM "${this.sessionsTable}" WHERE path = '${sqlStr(p22)}' ORDER BY creation_date ASC`); - if (rows2.length === 0) - throw fsErr("ENOENT", "no such file or directory", p22); - const text = rows2.map((r10) => typeof r10["message"] === "string" ? r10["message"] : JSON.stringify(r10["message"])).join("\n"); - const buf2 = Buffer.from(text, "utf-8"); - this.files.set(p22, buf2); - return buf2; + if (ch === "|") { + if (!current) + return null; + parts.push(current); + current = ""; + continue; } - const rows = await this.client.query(`SELECT summary FROM "${this.table}" WHERE path = '${sqlStr(p22)}' LIMIT 1`); - if (rows.length === 0) - throw fsErr("ENOENT", "no such file or directory", p22); - const buf = Buffer.from(rows[0]["summary"] ?? "", "utf-8"); - this.files.set(p22, buf); - return buf; + if ("()[]{}^$".includes(ch)) + return null; + current += ch; } - async readFile(path2, _opts) { - const p22 = normPath(path2); - if (this.dirs.has(p22) && !this.files.has(p22)) - throw fsErr("EISDIR", "illegal operation on a directory", p22); - if (p22 === "/index.md" && !this.files.has(p22)) { - const realRows = await this.client.query(`SELECT summary FROM "${this.table}" WHERE path = '${sqlStr("/index.md")}' LIMIT 1`); - if (realRows.length > 0 && realRows[0]["summary"]) { - const text2 = realRows[0]["summary"]; - const buf2 = Buffer.from(text2, "utf-8"); - this.files.set(p22, buf2); - return text2; + if (escaped || !current) + return null; + parts.push(current); + const literals = [...new Set(parts.map((part) => extractRegexLiteralPrefilter(part)).filter((part) => typeof part === "string" && part.length >= 2))]; + return literals.length > 0 ? literals : null; +} +function buildGrepSearchOptions(params, targetPath) { + const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(params.pattern); + const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(params.pattern) : null; + const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(params.pattern) : null; + return { + pathFilter: buildPathFilter(targetPath), + contentScanOnly: hasRegexMeta, + likeOp: params.ignoreCase ? "ILIKE" : "LIKE", + escapedPattern: sqlLike(params.pattern), + prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : void 0, + prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)) + }; +} +function buildContentFilter(column, likeOp, patterns) { + if (patterns.length === 0) + return ""; + if (patterns.length === 1) + return ` AND ${column} ${likeOp} '%${patterns[0]}%'`; + return ` AND (${patterns.map((pattern) => `${column} ${likeOp} '%${pattern}%'`).join(" OR ")})`; +} +function compileGrepRegex(params) { + let reStr = params.fixedString ? params.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") : params.pattern; + if (params.wordMatch) + reStr = `\\b${reStr}\\b`; + try { + return new RegExp(reStr, params.ignoreCase ? "i" : ""); + } catch { + return new RegExp(params.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), params.ignoreCase ? "i" : ""); + } +} +function refineGrepMatches(rows, params, forceMultiFilePrefix) { + const re9 = compileGrepRegex(params); + const multi = forceMultiFilePrefix ?? rows.length > 1; + const output = []; + for (const row of rows) { + if (!row.content) + continue; + const lines = row.content.split("\n"); + const matched = []; + for (let i11 = 0; i11 < lines.length; i11++) { + const hit = re9.test(lines[i11]); + if (hit !== !!params.invertMatch) { + if (params.filesOnly) { + output.push(row.path); + break; + } + const prefix = multi ? `${row.path}:` : ""; + const ln3 = params.lineNumber ? `${i11 + 1}:` : ""; + matched.push(`${prefix}${ln3}${lines[i11]}`); } - return this.generateVirtualIndex(); } - if (!this.files.has(p22)) - throw fsErr("ENOENT", "no such file or directory", p22); - const cached = this.files.get(p22); - if (cached !== null && cached !== void 0) - return cached.toString("utf-8"); - const pend = this.pending.get(p22); - if (pend) - return pend.contentText; - if (this.sessionPaths.has(p22) && this.sessionsTable) { - const rows2 = await this.client.query(`SELECT message FROM "${this.sessionsTable}" WHERE path = '${sqlStr(p22)}' ORDER BY creation_date ASC`); - if (rows2.length === 0) - throw fsErr("ENOENT", "no such file or directory", p22); - const text2 = rows2.map((r10) => typeof r10["message"] === "string" ? r10["message"] : JSON.stringify(r10["message"])).join("\n"); - const buf2 = Buffer.from(text2, "utf-8"); - this.files.set(p22, buf2); - return text2; + if (!params.filesOnly) { + if (params.countOnly) { + output.push(`${multi ? `${row.path}:` : ""}${matched.length}`); + } else { + output.push(...matched); + } } - const rows = await this.client.query(`SELECT summary FROM "${this.table}" WHERE path = '${sqlStr(p22)}' LIMIT 1`); - if (rows.length === 0) - throw fsErr("ENOENT", "no such file or directory", p22); - const text = rows[0]["summary"] ?? ""; - const buf = Buffer.from(text, "utf-8"); - this.files.set(p22, buf); - return text; } - // ── IFileSystem: writes ─────────────────────────────────────────────────── - /** Write a file with optional row-level metadata (project, description, dates). */ - async writeFileWithMeta(path2, content, meta) { - const p22 = normPath(path2); - if (this.sessionPaths.has(p22)) - throw fsErr("EPERM", "session files are read-only", p22); - if (this.dirs.has(p22) && !this.files.has(p22)) - throw fsErr("EISDIR", "illegal operation on a directory", p22); - const text = typeof content === "string" ? content : Buffer.from(content).toString("utf-8"); - const buf = Buffer.from(text, "utf-8"); - const mime = guessMime(basename4(p22)); - this.files.set(p22, buf); - this.meta.set(p22, { size: buf.length, mime, mtime: /* @__PURE__ */ new Date() }); - this.addToTree(p22); - this.pending.set(p22, { - path: p22, - filename: basename4(p22), - contentText: text, - mimeType: mime, - sizeBytes: buf.length, - ...meta - }); - if (this.pending.size >= BATCH_SIZE) - await this.flush(); - else - this.scheduleFlush(); + return output; +} + +// dist/src/shell/deeplake-fs.js +var BATCH_SIZE = 10; +var PREFETCH_BATCH_SIZE = 50; +var FLUSH_DEBOUNCE_MS = 200; +function normPath(p22) { + const r10 = posix.normalize(p22.startsWith("/") ? p22 : "/" + p22); + return r10 === "/" ? r10 : r10.replace(/\/$/, ""); +} +function parentOf(p22) { + const i11 = p22.lastIndexOf("/"); + return i11 <= 0 ? "/" : p22.slice(0, i11); +} +function guessMime(filename) { + const ext2 = filename.split(".").pop()?.toLowerCase() ?? ""; + return { + json: "application/json", + md: "text/markdown", + txt: "text/plain", + js: "text/javascript", + ts: "text/typescript", + html: "text/html", + css: "text/css" + }[ext2] ?? "text/plain"; +} +function normalizeSessionMessage(path2, message) { + const raw = typeof message === "string" ? message : JSON.stringify(message); + return normalizeContent(path2, raw); +} +function joinSessionMessages(path2, messages) { + return messages.map((message) => normalizeSessionMessage(path2, message)).join("\n"); +} +function fsErr(code, msg, path2) { + return Object.assign(new Error(`${code}: ${msg}, '${path2}'`), { code }); +} +var DeeplakeFs = class _DeeplakeFs { + client; + table; + mountPoint; + // path → Buffer (content) or null (exists but not fetched yet) + files = /* @__PURE__ */ new Map(); + meta = /* @__PURE__ */ new Map(); + // dir path → Set of immediate child names + dirs = /* @__PURE__ */ new Map(); + // batched writes pending SQL flush + pending = /* @__PURE__ */ new Map(); + // paths that have been flushed (INSERT) at least once — subsequent flushes use UPDATE + flushed = /* @__PURE__ */ new Set(); + /** Number of files loaded from the server during bootstrap. */ + get fileCount() { + return this.files.size; } - async writeFile(path2, content, _opts) { - const p22 = normPath(path2); - if (this.sessionPaths.has(p22)) - throw fsErr("EPERM", "session files are read-only", p22); - if (this.dirs.has(p22) && !this.files.has(p22)) - throw fsErr("EISDIR", "illegal operation on a directory", p22); - const text = typeof content === "string" ? content : Buffer.from(content).toString("utf-8"); - const buf = Buffer.from(text, "utf-8"); - const mime = guessMime(basename4(p22)); - this.files.set(p22, buf); - this.meta.set(p22, { size: buf.length, mime, mtime: /* @__PURE__ */ new Date() }); - this.addToTree(p22); - this.pending.set(p22, { - path: p22, - filename: basename4(p22), - contentText: text, - mimeType: mime, - sizeBytes: buf.length - }); - if (this.pending.size >= BATCH_SIZE) - await this.flush(); - else - this.scheduleFlush(); + flushTimer = null; + // serialize flushes + flushChain = Promise.resolve(); + // Paths that live in the sessions table (multi-row, read by concatenation) + sessionPaths = /* @__PURE__ */ new Set(); + sessionsTable = null; + constructor(client, table, mountPoint) { + this.client = client; + this.table = table; + this.mountPoint = mountPoint; + this.dirs.set(mountPoint, /* @__PURE__ */ new Set()); + if (mountPoint !== "/") + this.dirs.set("/", /* @__PURE__ */ new Set([mountPoint.slice(1)])); } - async appendFile(path2, content, opts) { - const p22 = normPath(path2); - const add = typeof content === "string" ? content : Buffer.from(content).toString("utf-8"); - if (this.sessionPaths.has(p22)) - throw fsErr("EPERM", "session files are read-only", p22); - if (this.files.has(p22) || await this.exists(p22).catch(() => false)) { - const ts3 = (/* @__PURE__ */ new Date()).toISOString(); - await this.client.query(`UPDATE "${this.table}" SET summary = summary || E'${sqlStr(add)}', size_bytes = size_bytes + ${Buffer.byteLength(add, "utf-8")}, last_update_date = '${ts3}' WHERE path = '${sqlStr(p22)}'`); - this.files.set(p22, null); - const m26 = this.meta.get(p22); - if (m26) { - m26.size += Buffer.byteLength(add, "utf-8"); - m26.mtime = new Date(ts3); + static async create(client, table, mount = "/memory", sessionsTable) { + const fs3 = new _DeeplakeFs(client, table, mount); + fs3.sessionsTable = sessionsTable ?? null; + await client.ensureTable(); + let sessionSyncOk = true; + const memoryBootstrap = (async () => { + const sql = `SELECT path, size_bytes, mime_type FROM "${table}" ORDER BY path`; + try { + const rows = await client.query(sql); + for (const row of rows) { + const p22 = row["path"]; + fs3.files.set(p22, null); + fs3.meta.set(p22, { + size: Number(row["size_bytes"] ?? 0), + mime: row["mime_type"] ?? "application/octet-stream", + mtime: /* @__PURE__ */ new Date() + }); + fs3.addToTree(p22); + fs3.flushed.add(p22); + } + } catch { } - } else { - await this.writeFile(p22, content, opts); - await this.flush(); - } + })(); + const sessionsBootstrap = sessionsTable && sessionSyncOk ? (async () => { + try { + const sessionRows = await client.query(`SELECT path, SUM(size_bytes) as total_size FROM "${sessionsTable}" GROUP BY path ORDER BY path`); + for (const row of sessionRows) { + const p22 = row["path"]; + if (!fs3.files.has(p22)) { + fs3.files.set(p22, null); + fs3.meta.set(p22, { + size: Number(row["total_size"] ?? 0), + mime: "application/x-ndjson", + mtime: /* @__PURE__ */ new Date() + }); + fs3.addToTree(p22); + } + fs3.sessionPaths.add(p22); + } + } catch { + } + })() : Promise.resolve(); + await Promise.all([memoryBootstrap, sessionsBootstrap]); + return fs3; } - // ── IFileSystem: metadata ───────────────────────────────────────────────── - async exists(path2) { - const p22 = normPath(path2); - if (p22 === "/index.md") - return true; - return this.files.has(p22) || this.dirs.has(p22); + // ── tree management ─────────────────────────────────────────────────────── + addToTree(filePath) { + const segs = filePath.split("/").filter(Boolean); + for (let d15 = 0; d15 < segs.length; d15++) { + const dir = d15 === 0 ? "/" : "/" + segs.slice(0, d15).join("/"); + if (!this.dirs.has(dir)) + this.dirs.set(dir, /* @__PURE__ */ new Set()); + this.dirs.get(dir).add(segs[d15]); + } } - async stat(path2) { - const p22 = normPath(path2); - const isFile = this.files.has(p22); - const isDir = this.dirs.has(p22); - if (p22 === "/index.md" && !isFile && !isDir) { - return { - isFile: true, - isDirectory: false, - isSymbolicLink: false, - mode: 420, - size: 0, - mtime: /* @__PURE__ */ new Date() - }; - } - if (!isFile && !isDir) - throw fsErr("ENOENT", "no such file or directory", p22); - const m26 = this.meta.get(p22); - return { - isFile: isFile && !isDir, - isDirectory: isDir, - isSymbolicLink: false, - mode: isDir ? 493 : 420, - size: m26?.size ?? 0, - mtime: m26?.mtime ?? /* @__PURE__ */ new Date() - }; - } - async lstat(path2) { - return this.stat(path2); - } - async chmod(_path, _mode) { - } - async utimes(_path, _atime, _mtime) { - } - async symlink(_target, linkPath) { - throw fsErr("EPERM", "operation not permitted", linkPath); - } - async link(_src, destPath) { - throw fsErr("EPERM", "operation not permitted", destPath); + removeFromTree(filePath) { + this.files.delete(filePath); + this.meta.delete(filePath); + this.pending.delete(filePath); + this.flushed.delete(filePath); + const parent = parentOf(filePath); + this.dirs.get(parent)?.delete(basename4(filePath)); } - async readlink(path2) { - throw fsErr("EINVAL", "invalid argument", path2); + // ── flush / write batching ──────────────────────────────────────────────── + scheduleFlush() { + if (this.flushTimer !== null) + return; + this.flushTimer = setTimeout(() => { + this.flush().catch(() => { + }); + }, FLUSH_DEBOUNCE_MS); } - async realpath(path2) { - const p22 = normPath(path2); - if (p22 === "/index.md") - return p22; - if (!this.files.has(p22) && !this.dirs.has(p22)) - throw fsErr("ENOENT", "no such file or directory", p22); - return p22; + async flush() { + this.flushChain = this.flushChain.then(() => this._doFlush()); + return this.flushChain; } - // ── IFileSystem: directories ────────────────────────────────────────────── - async mkdir(path2, opts) { - const p22 = normPath(path2); - if (this.files.has(p22)) - throw fsErr("EEXIST", "file exists", p22); - if (this.dirs.has(p22)) { - if (!opts?.recursive) - throw fsErr("EEXIST", "file exists", p22); + async _doFlush() { + if (this.pending.size === 0) return; + if (this.flushTimer !== null) { + clearTimeout(this.flushTimer); + this.flushTimer = null; } - if (!opts?.recursive) { - const parent2 = parentOf(p22); - if (!this.dirs.has(parent2)) - throw fsErr("ENOENT", "no such file or directory", parent2); + const rows = [...this.pending.values()]; + this.pending.clear(); + const results = await Promise.allSettled(rows.map((r10) => this.upsertRow(r10))); + let failures = 0; + for (let i11 = 0; i11 < results.length; i11++) { + if (results[i11].status === "rejected") { + if (!this.pending.has(rows[i11].path)) { + this.pending.set(rows[i11].path, rows[i11]); + } + failures++; + } } - this.dirs.set(p22, /* @__PURE__ */ new Set()); - const parent = parentOf(p22); - if (!this.dirs.has(parent)) - this.dirs.set(parent, /* @__PURE__ */ new Set()); - this.dirs.get(parent).add(basename4(p22)); - } - async readdir(path2) { - const p22 = normPath(path2); - if (!this.dirs.has(p22)) - throw fsErr("ENOTDIR", "not a directory", p22); - const entries = [...this.dirs.get(p22) ?? []]; - if (p22 === "/" && !entries.includes("index.md")) { - entries.push("index.md"); + if (failures > 0) { + throw new Error(`flush: ${failures}/${rows.length} writes failed and were re-queued`); } - return entries; } - async readdirWithFileTypes(path2) { - const names = await this.readdir(path2); - const p22 = normPath(path2); - return names.map((name) => { - const child = p22 === "/" ? `/${name}` : `${p22}/${name}`; - return { - name, - isFile: (this.files.has(child) || child === "/index.md") && !this.dirs.has(child), - isDirectory: this.dirs.has(child), - isSymbolicLink: false - }; - }); + async upsertRow(r10) { + const text = sqlStr(r10.contentText); + const p22 = sqlStr(r10.path); + const fname = sqlStr(r10.filename); + const mime = sqlStr(r10.mimeType); + const ts3 = (/* @__PURE__ */ new Date()).toISOString(); + const cd = r10.creationDate ?? ts3; + const lud = r10.lastUpdateDate ?? ts3; + if (this.flushed.has(r10.path)) { + let setClauses = `filename = '${fname}', summary = E'${text}', mime_type = '${mime}', size_bytes = ${r10.sizeBytes}, last_update_date = '${sqlStr(lud)}'`; + if (r10.project !== void 0) + setClauses += `, project = '${sqlStr(r10.project)}'`; + if (r10.description !== void 0) + setClauses += `, description = '${sqlStr(r10.description)}'`; + await this.client.query(`UPDATE "${this.table}" SET ${setClauses} WHERE path = '${p22}'`); + } else { + const id = randomUUID2(); + const cols = "id, path, filename, summary, mime_type, size_bytes, creation_date, last_update_date" + (r10.project !== void 0 ? ", project" : "") + (r10.description !== void 0 ? ", description" : ""); + const vals = `'${id}', '${p22}', '${fname}', E'${text}', '${mime}', ${r10.sizeBytes}, '${sqlStr(cd)}', '${sqlStr(lud)}'` + (r10.project !== void 0 ? `, '${sqlStr(r10.project)}'` : "") + (r10.description !== void 0 ? `, '${sqlStr(r10.description)}'` : ""); + await this.client.query(`INSERT INTO "${this.table}" (${cols}) VALUES (${vals})`); + this.flushed.add(r10.path); + } } - // ── IFileSystem: structural mutations ───────────────────────────────────── - async rm(path2, opts) { - const p22 = normPath(path2); - if (this.sessionPaths.has(p22)) - throw fsErr("EPERM", "session files are read-only", p22); - if (!this.files.has(p22) && !this.dirs.has(p22)) { - if (opts?.force) - return; - throw fsErr("ENOENT", "no such file or directory", p22); + // ── Virtual index.md generation ──────────────────────────────────────────── + async generateVirtualIndex() { + const rows = await this.client.query(`SELECT path, project, description, creation_date, last_update_date FROM "${this.table}" WHERE path LIKE '${sqlStr("/summaries/")}%' ORDER BY last_update_date DESC`); + const sessionPathsByKey = /* @__PURE__ */ new Map(); + for (const sp of this.sessionPaths) { + const hivemind = sp.match(/\/sessions\/[^/]+\/[^/]+_([^.]+)\.jsonl$/); + if (hivemind) { + sessionPathsByKey.set(hivemind[1], sp.slice(1)); + } else { + const fname = sp.split("/").pop() ?? ""; + const stem = fname.replace(/\.[^.]+$/, ""); + if (stem) + sessionPathsByKey.set(stem, sp.slice(1)); + } } - if (this.dirs.has(p22)) { - const children = this.dirs.get(p22) ?? /* @__PURE__ */ new Set(); - if (children.size > 0 && !opts?.recursive) - throw fsErr("ENOTEMPTY", "directory not empty", p22); - const toDelete = []; - const stack = [p22]; - while (stack.length) { - const cur = stack.pop(); - for (const child of [...this.dirs.get(cur) ?? []]) { - const childPath = cur === "/" ? `/${child}` : `${cur}/${child}`; - if (this.files.has(childPath)) - toDelete.push(childPath); - if (this.dirs.has(childPath)) - stack.push(childPath); - } + const lines = [ + "# Session Index", + "", + "List of all Claude Code sessions with summaries.", + "", + "| Session | Conversation | Created | Last Updated | Project | Description |", + "|---------|-------------|---------|--------------|---------|-------------|" + ]; + for (const row of rows) { + const p22 = row["path"]; + const match2 = p22.match(/\/summaries\/([^/]+)\/([^/]+)\.md$/); + if (!match2) + continue; + const summaryUser = match2[1]; + const sessionId = match2[2]; + const relPath = `summaries/${summaryUser}/${sessionId}.md`; + const baseName = sessionId.replace(/_summary$/, ""); + const convPath = sessionPathsByKey.get(sessionId) ?? sessionPathsByKey.get(baseName); + const convLink = convPath ? `[messages](${convPath})` : ""; + const project = row["project"] || ""; + const description = row["description"] || ""; + const creationDate = row["creation_date"] || ""; + const lastUpdateDate = row["last_update_date"] || ""; + lines.push(`| [${sessionId}](${relPath}) | ${convLink} | ${creationDate} | ${lastUpdateDate} | ${project} | ${description} |`); + } + lines.push(""); + return lines.join("\n"); + } + // ── batch prefetch ──────────────────────────────────────────────────────── + /** + * Prefetch multiple files into the content cache with a single SQL query. + * Skips paths that are already cached, pending, or session-backed. + * After this call, subsequent readFile() calls for these paths hit cache. + */ + async prefetch(paths) { + const uncached = []; + const uncachedSessions = []; + for (const raw of paths) { + const p22 = normPath(raw); + if (this.files.get(p22) !== null && this.files.get(p22) !== void 0) + continue; + if (this.pending.has(p22)) + continue; + if (!this.files.has(p22)) + continue; + if (this.sessionPaths.has(p22)) { + uncachedSessions.push(p22); + } else { + uncached.push(p22); } - const safeToDelete = toDelete.filter((fp) => !this.sessionPaths.has(fp)); - for (const fp of safeToDelete) - this.removeFromTree(fp); - this.dirs.delete(p22); - this.dirs.get(parentOf(p22))?.delete(basename4(p22)); - if (safeToDelete.length > 0) { - const inList = safeToDelete.map((fp) => `'${sqlStr(fp)}'`).join(", "); - await this.client.query(`DELETE FROM "${this.table}" WHERE path IN (${inList})`); + } + for (let i11 = 0; i11 < uncached.length; i11 += PREFETCH_BATCH_SIZE) { + const chunk = uncached.slice(i11, i11 + PREFETCH_BATCH_SIZE); + const inList = chunk.map((p22) => `'${sqlStr(p22)}'`).join(", "); + const rows = await this.client.query(`SELECT path, summary FROM "${this.table}" WHERE path IN (${inList})`); + for (const row of rows) { + const p22 = row["path"]; + const text = row["summary"] ?? ""; + this.files.set(p22, Buffer.from(text, "utf-8")); } - } else { - await this.client.query(`DELETE FROM "${this.table}" WHERE path = '${sqlStr(p22)}'`); - this.removeFromTree(p22); } - } - async cp(src, dest, opts) { - const s10 = normPath(src), d15 = normPath(dest); - if (this.sessionPaths.has(d15)) - throw fsErr("EPERM", "session files are read-only", d15); - if (this.dirs.has(s10) && !this.files.has(s10)) { - if (!opts?.recursive) - throw fsErr("EISDIR", "is a directory", s10); - for (const fp of [...this.files.keys()].filter((k17) => k17 === s10 || k17.startsWith(s10 + "/"))) { - await this.writeFile(d15 + fp.slice(s10.length), await this.readFileBuffer(fp)); + if (!this.sessionsTable) + return; + for (let i11 = 0; i11 < uncachedSessions.length; i11 += PREFETCH_BATCH_SIZE) { + const chunk = uncachedSessions.slice(i11, i11 + PREFETCH_BATCH_SIZE); + const inList = chunk.map((p22) => `'${sqlStr(p22)}'`).join(", "); + const rows = await this.client.query(`SELECT path, message, creation_date FROM "${this.sessionsTable}" WHERE path IN (${inList}) ORDER BY path, creation_date ASC`); + const grouped = /* @__PURE__ */ new Map(); + for (const row of rows) { + const p22 = row["path"]; + const current = grouped.get(p22) ?? []; + current.push(normalizeSessionMessage(p22, row["message"])); + grouped.set(p22, current); + } + for (const [p22, parts] of grouped) { + this.files.set(p22, Buffer.from(parts.join("\n"), "utf-8")); } - } else { - await this.writeFile(d15, await this.readFileBuffer(s10)); } } - async mv(src, dest) { - const s10 = normPath(src), d15 = normPath(dest); - if (this.sessionPaths.has(s10)) - throw fsErr("EPERM", "session files are read-only", s10); - if (this.sessionPaths.has(d15)) - throw fsErr("EPERM", "session files are read-only", d15); - await this.cp(src, dest, { recursive: true }); - await this.rm(src, { recursive: true, force: true }); + // ── IFileSystem: reads ──────────────────────────────────────────────────── + async readFileBuffer(path2) { + const p22 = normPath(path2); + if (this.dirs.has(p22) && !this.files.has(p22)) + throw fsErr("EISDIR", "illegal operation on a directory", p22); + if (!this.files.has(p22)) + throw fsErr("ENOENT", "no such file or directory", p22); + const cached = this.files.get(p22); + if (cached !== null && cached !== void 0) + return cached; + const pend = this.pending.get(p22); + if (pend) { + const buf2 = Buffer.from(pend.contentText, "utf-8"); + this.files.set(p22, buf2); + return buf2; + } + if (this.sessionPaths.has(p22) && this.sessionsTable) { + const rows2 = await this.client.query(`SELECT message FROM "${this.sessionsTable}" WHERE path = '${sqlStr(p22)}' ORDER BY creation_date ASC`); + if (rows2.length === 0) + throw fsErr("ENOENT", "no such file or directory", p22); + const text = joinSessionMessages(p22, rows2.map((row) => row["message"])); + const buf2 = Buffer.from(text, "utf-8"); + this.files.set(p22, buf2); + return buf2; + } + const rows = await this.client.query(`SELECT summary FROM "${this.table}" WHERE path = '${sqlStr(p22)}' LIMIT 1`); + if (rows.length === 0) + throw fsErr("ENOENT", "no such file or directory", p22); + const buf = Buffer.from(rows[0]["summary"] ?? "", "utf-8"); + this.files.set(p22, buf); + return buf; } - resolvePath(base, path2) { - if (path2.startsWith("/")) - return normPath(path2); - return normPath(posix.join(base, path2)); + async readFile(path2, _opts) { + const p22 = normPath(path2); + if (this.dirs.has(p22) && !this.files.has(p22)) + throw fsErr("EISDIR", "illegal operation on a directory", p22); + if (p22 === "/index.md" && !this.files.has(p22)) { + const realRows = await this.client.query(`SELECT summary FROM "${this.table}" WHERE path = '${sqlStr("/index.md")}' LIMIT 1`); + if (realRows.length > 0 && realRows[0]["summary"]) { + const text2 = realRows[0]["summary"]; + const buf2 = Buffer.from(text2, "utf-8"); + this.files.set(p22, buf2); + return text2; + } + return this.generateVirtualIndex(); + } + if (!this.files.has(p22)) + throw fsErr("ENOENT", "no such file or directory", p22); + const cached = this.files.get(p22); + if (cached !== null && cached !== void 0) + return cached.toString("utf-8"); + const pend = this.pending.get(p22); + if (pend) + return pend.contentText; + if (this.sessionPaths.has(p22) && this.sessionsTable) { + const rows2 = await this.client.query(`SELECT message FROM "${this.sessionsTable}" WHERE path = '${sqlStr(p22)}' ORDER BY creation_date ASC`); + if (rows2.length === 0) + throw fsErr("ENOENT", "no such file or directory", p22); + const text2 = joinSessionMessages(p22, rows2.map((row) => row["message"])); + const buf2 = Buffer.from(text2, "utf-8"); + this.files.set(p22, buf2); + return text2; + } + const rows = await this.client.query(`SELECT summary FROM "${this.table}" WHERE path = '${sqlStr(p22)}' LIMIT 1`); + if (rows.length === 0) + throw fsErr("ENOENT", "no such file or directory", p22); + const text = rows[0]["summary"] ?? ""; + const buf = Buffer.from(text, "utf-8"); + this.files.set(p22, buf); + return text; } - getAllPaths() { - return [.../* @__PURE__ */ new Set([...this.files.keys(), ...this.dirs.keys()])]; + // ── IFileSystem: writes ─────────────────────────────────────────────────── + /** Write a file with optional row-level metadata (project, description, dates). */ + async writeFileWithMeta(path2, content, meta) { + const p22 = normPath(path2); + if (this.sessionPaths.has(p22)) + throw fsErr("EPERM", "session files are read-only", p22); + if (this.dirs.has(p22) && !this.files.has(p22)) + throw fsErr("EISDIR", "illegal operation on a directory", p22); + const text = typeof content === "string" ? content : Buffer.from(content).toString("utf-8"); + const buf = Buffer.from(text, "utf-8"); + const mime = guessMime(basename4(p22)); + this.files.set(p22, buf); + this.meta.set(p22, { size: buf.length, mime, mtime: /* @__PURE__ */ new Date() }); + this.addToTree(p22); + this.pending.set(p22, { + path: p22, + filename: basename4(p22), + contentText: text, + mimeType: mime, + sizeBytes: buf.length, + ...meta + }); + if (this.pending.size >= BATCH_SIZE) + await this.flush(); + else + this.scheduleFlush(); } -}; - -// node_modules/yargs-parser/build/lib/index.js -import { format } from "util"; -import { normalize, resolve as resolve4 } from "path"; - -// node_modules/yargs-parser/build/lib/string-utils.js -function camelCase(str) { - const isCamelCase = str !== str.toLowerCase() && str !== str.toUpperCase(); - if (!isCamelCase) { - str = str.toLowerCase(); + async writeFile(path2, content, _opts) { + const p22 = normPath(path2); + if (this.sessionPaths.has(p22)) + throw fsErr("EPERM", "session files are read-only", p22); + if (this.dirs.has(p22) && !this.files.has(p22)) + throw fsErr("EISDIR", "illegal operation on a directory", p22); + const text = typeof content === "string" ? content : Buffer.from(content).toString("utf-8"); + const buf = Buffer.from(text, "utf-8"); + const mime = guessMime(basename4(p22)); + this.files.set(p22, buf); + this.meta.set(p22, { size: buf.length, mime, mtime: /* @__PURE__ */ new Date() }); + this.addToTree(p22); + this.pending.set(p22, { + path: p22, + filename: basename4(p22), + contentText: text, + mimeType: mime, + sizeBytes: buf.length + }); + if (this.pending.size >= BATCH_SIZE) + await this.flush(); + else + this.scheduleFlush(); } - if (str.indexOf("-") === -1 && str.indexOf("_") === -1) { - return str; - } else { - let camelcase = ""; - let nextChrUpper = false; - const leadingHyphens = str.match(/^-+/); - for (let i11 = leadingHyphens ? leadingHyphens[0].length : 0; i11 < str.length; i11++) { - let chr = str.charAt(i11); - if (nextChrUpper) { - nextChrUpper = false; - chr = chr.toUpperCase(); - } - if (i11 !== 0 && (chr === "-" || chr === "_")) { - nextChrUpper = true; - } else if (chr !== "-" && chr !== "_") { - camelcase += chr; + async appendFile(path2, content, opts) { + const p22 = normPath(path2); + const add = typeof content === "string" ? content : Buffer.from(content).toString("utf-8"); + if (this.sessionPaths.has(p22)) + throw fsErr("EPERM", "session files are read-only", p22); + if (this.files.has(p22) || await this.exists(p22).catch(() => false)) { + const ts3 = (/* @__PURE__ */ new Date()).toISOString(); + await this.client.query(`UPDATE "${this.table}" SET summary = summary || E'${sqlStr(add)}', size_bytes = size_bytes + ${Buffer.byteLength(add, "utf-8")}, last_update_date = '${ts3}' WHERE path = '${sqlStr(p22)}'`); + this.files.set(p22, null); + const m26 = this.meta.get(p22); + if (m26) { + m26.size += Buffer.byteLength(add, "utf-8"); + m26.mtime = new Date(ts3); } - } - return camelcase; - } -} -function decamelize(str, joinString) { - const lowercase = str.toLowerCase(); - joinString = joinString || "-"; - let notCamelcase = ""; - for (let i11 = 0; i11 < str.length; i11++) { - const chrLower = lowercase.charAt(i11); - const chrString = str.charAt(i11); - if (chrLower !== chrString && i11 > 0) { - notCamelcase += `${joinString}${lowercase.charAt(i11)}`; } else { - notCamelcase += chrString; + await this.writeFile(p22, content, opts); + await this.flush(); } } - return notCamelcase; -} -function looksLikeNumber(x28) { - if (x28 === null || x28 === void 0) - return false; - if (typeof x28 === "number") - return true; - if (/^0x[0-9a-f]+$/i.test(x28)) - return true; - if (/^0[^.]/.test(x28)) - return false; - return /^[-]?(?:\d+(?:\.\d*)?|\.\d+)(e[-+]?\d+)?$/.test(x28); -} - -// node_modules/yargs-parser/build/lib/tokenize-arg-string.js -function tokenizeArgString(argString) { - if (Array.isArray(argString)) { - return argString.map((e6) => typeof e6 !== "string" ? e6 + "" : e6); + // ── IFileSystem: metadata ───────────────────────────────────────────────── + async exists(path2) { + const p22 = normPath(path2); + if (p22 === "/index.md") + return true; + return this.files.has(p22) || this.dirs.has(p22); } - argString = argString.trim(); - let i11 = 0; - let prevC = null; - let c15 = null; - let opening = null; - const args = []; - for (let ii2 = 0; ii2 < argString.length; ii2++) { - prevC = c15; - c15 = argString.charAt(ii2); - if (c15 === " " && !opening) { - if (!(prevC === " ")) { - i11++; - } - continue; - } - if (c15 === opening) { - opening = null; - } else if ((c15 === "'" || c15 === '"') && !opening) { - opening = c15; + async stat(path2) { + const p22 = normPath(path2); + const isFile = this.files.has(p22); + const isDir = this.dirs.has(p22); + if (p22 === "/index.md" && !isFile && !isDir) { + return { + isFile: true, + isDirectory: false, + isSymbolicLink: false, + mode: 420, + size: 0, + mtime: /* @__PURE__ */ new Date() + }; } - if (!args[i11]) - args[i11] = ""; - args[i11] += c15; + if (!isFile && !isDir) + throw fsErr("ENOENT", "no such file or directory", p22); + const m26 = this.meta.get(p22); + return { + isFile: isFile && !isDir, + isDirectory: isDir, + isSymbolicLink: false, + mode: isDir ? 493 : 420, + size: m26?.size ?? 0, + mtime: m26?.mtime ?? /* @__PURE__ */ new Date() + }; } - return args; -} - -// node_modules/yargs-parser/build/lib/yargs-parser-types.js -var DefaultValuesForTypeKey; -(function(DefaultValuesForTypeKey2) { - DefaultValuesForTypeKey2["BOOLEAN"] = "boolean"; + async lstat(path2) { + return this.stat(path2); + } + async chmod(_path, _mode) { + } + async utimes(_path, _atime, _mtime) { + } + async symlink(_target, linkPath) { + throw fsErr("EPERM", "operation not permitted", linkPath); + } + async link(_src, destPath) { + throw fsErr("EPERM", "operation not permitted", destPath); + } + async readlink(path2) { + throw fsErr("EINVAL", "invalid argument", path2); + } + async realpath(path2) { + const p22 = normPath(path2); + if (p22 === "/index.md") + return p22; + if (!this.files.has(p22) && !this.dirs.has(p22)) + throw fsErr("ENOENT", "no such file or directory", p22); + return p22; + } + // ── IFileSystem: directories ────────────────────────────────────────────── + async mkdir(path2, opts) { + const p22 = normPath(path2); + if (this.files.has(p22)) + throw fsErr("EEXIST", "file exists", p22); + if (this.dirs.has(p22)) { + if (!opts?.recursive) + throw fsErr("EEXIST", "file exists", p22); + return; + } + if (!opts?.recursive) { + const parent2 = parentOf(p22); + if (!this.dirs.has(parent2)) + throw fsErr("ENOENT", "no such file or directory", parent2); + } + this.dirs.set(p22, /* @__PURE__ */ new Set()); + const parent = parentOf(p22); + if (!this.dirs.has(parent)) + this.dirs.set(parent, /* @__PURE__ */ new Set()); + this.dirs.get(parent).add(basename4(p22)); + } + async readdir(path2) { + const p22 = normPath(path2); + if (!this.dirs.has(p22)) + throw fsErr("ENOTDIR", "not a directory", p22); + const entries = [...this.dirs.get(p22) ?? []]; + if (p22 === "/" && !entries.includes("index.md")) { + entries.push("index.md"); + } + return entries; + } + async readdirWithFileTypes(path2) { + const names = await this.readdir(path2); + const p22 = normPath(path2); + return names.map((name) => { + const child = p22 === "/" ? `/${name}` : `${p22}/${name}`; + return { + name, + isFile: (this.files.has(child) || child === "/index.md") && !this.dirs.has(child), + isDirectory: this.dirs.has(child), + isSymbolicLink: false + }; + }); + } + // ── IFileSystem: structural mutations ───────────────────────────────────── + async rm(path2, opts) { + const p22 = normPath(path2); + if (this.sessionPaths.has(p22)) + throw fsErr("EPERM", "session files are read-only", p22); + if (!this.files.has(p22) && !this.dirs.has(p22)) { + if (opts?.force) + return; + throw fsErr("ENOENT", "no such file or directory", p22); + } + if (this.dirs.has(p22)) { + const children = this.dirs.get(p22) ?? /* @__PURE__ */ new Set(); + if (children.size > 0 && !opts?.recursive) + throw fsErr("ENOTEMPTY", "directory not empty", p22); + const toDelete = []; + const stack = [p22]; + while (stack.length) { + const cur = stack.pop(); + for (const child of [...this.dirs.get(cur) ?? []]) { + const childPath = cur === "/" ? `/${child}` : `${cur}/${child}`; + if (this.files.has(childPath)) + toDelete.push(childPath); + if (this.dirs.has(childPath)) + stack.push(childPath); + } + } + const safeToDelete = toDelete.filter((fp) => !this.sessionPaths.has(fp)); + for (const fp of safeToDelete) + this.removeFromTree(fp); + this.dirs.delete(p22); + this.dirs.get(parentOf(p22))?.delete(basename4(p22)); + if (safeToDelete.length > 0) { + const inList = safeToDelete.map((fp) => `'${sqlStr(fp)}'`).join(", "); + await this.client.query(`DELETE FROM "${this.table}" WHERE path IN (${inList})`); + } + } else { + await this.client.query(`DELETE FROM "${this.table}" WHERE path = '${sqlStr(p22)}'`); + this.removeFromTree(p22); + } + } + async cp(src, dest, opts) { + const s10 = normPath(src), d15 = normPath(dest); + if (this.sessionPaths.has(d15)) + throw fsErr("EPERM", "session files are read-only", d15); + if (this.dirs.has(s10) && !this.files.has(s10)) { + if (!opts?.recursive) + throw fsErr("EISDIR", "is a directory", s10); + for (const fp of [...this.files.keys()].filter((k17) => k17 === s10 || k17.startsWith(s10 + "/"))) { + await this.writeFile(d15 + fp.slice(s10.length), await this.readFileBuffer(fp)); + } + } else { + await this.writeFile(d15, await this.readFileBuffer(s10)); + } + } + async mv(src, dest) { + const s10 = normPath(src), d15 = normPath(dest); + if (this.sessionPaths.has(s10)) + throw fsErr("EPERM", "session files are read-only", s10); + if (this.sessionPaths.has(d15)) + throw fsErr("EPERM", "session files are read-only", d15); + await this.cp(src, dest, { recursive: true }); + await this.rm(src, { recursive: true, force: true }); + } + resolvePath(base, path2) { + if (path2.startsWith("/")) + return normPath(path2); + return normPath(posix.join(base, path2)); + } + getAllPaths() { + return [.../* @__PURE__ */ new Set([...this.files.keys(), ...this.dirs.keys()])]; + } +}; + +// node_modules/yargs-parser/build/lib/index.js +import { format } from "util"; +import { normalize, resolve as resolve4 } from "path"; + +// node_modules/yargs-parser/build/lib/string-utils.js +function camelCase2(str) { + const isCamelCase = str !== str.toLowerCase() && str !== str.toUpperCase(); + if (!isCamelCase) { + str = str.toLowerCase(); + } + if (str.indexOf("-") === -1 && str.indexOf("_") === -1) { + return str; + } else { + let camelcase = ""; + let nextChrUpper = false; + const leadingHyphens = str.match(/^-+/); + for (let i11 = leadingHyphens ? leadingHyphens[0].length : 0; i11 < str.length; i11++) { + let chr = str.charAt(i11); + if (nextChrUpper) { + nextChrUpper = false; + chr = chr.toUpperCase(); + } + if (i11 !== 0 && (chr === "-" || chr === "_")) { + nextChrUpper = true; + } else if (chr !== "-" && chr !== "_") { + camelcase += chr; + } + } + return camelcase; + } +} +function decamelize(str, joinString) { + const lowercase = str.toLowerCase(); + joinString = joinString || "-"; + let notCamelcase = ""; + for (let i11 = 0; i11 < str.length; i11++) { + const chrLower = lowercase.charAt(i11); + const chrString = str.charAt(i11); + if (chrLower !== chrString && i11 > 0) { + notCamelcase += `${joinString}${lowercase.charAt(i11)}`; + } else { + notCamelcase += chrString; + } + } + return notCamelcase; +} +function looksLikeNumber(x28) { + if (x28 === null || x28 === void 0) + return false; + if (typeof x28 === "number") + return true; + if (/^0x[0-9a-f]+$/i.test(x28)) + return true; + if (/^0[^.]/.test(x28)) + return false; + return /^[-]?(?:\d+(?:\.\d*)?|\.\d+)(e[-+]?\d+)?$/.test(x28); +} + +// node_modules/yargs-parser/build/lib/tokenize-arg-string.js +function tokenizeArgString(argString) { + if (Array.isArray(argString)) { + return argString.map((e6) => typeof e6 !== "string" ? e6 + "" : e6); + } + argString = argString.trim(); + let i11 = 0; + let prevC = null; + let c15 = null; + let opening = null; + const args = []; + for (let ii2 = 0; ii2 < argString.length; ii2++) { + prevC = c15; + c15 = argString.charAt(ii2); + if (c15 === " " && !opening) { + if (!(prevC === " ")) { + i11++; + } + continue; + } + if (c15 === opening) { + opening = null; + } else if ((c15 === "'" || c15 === '"') && !opening) { + opening = c15; + } + if (!args[i11]) + args[i11] = ""; + args[i11] += c15; + } + return args; +} + +// node_modules/yargs-parser/build/lib/yargs-parser-types.js +var DefaultValuesForTypeKey; +(function(DefaultValuesForTypeKey2) { + DefaultValuesForTypeKey2["BOOLEAN"] = "boolean"; DefaultValuesForTypeKey2["STRING"] = "string"; DefaultValuesForTypeKey2["NUMBER"] = "number"; DefaultValuesForTypeKey2["ARRAY"] = "array"; @@ -67983,7 +68490,7 @@ var YargsParser = class { ; [].concat(...Object.keys(aliases).map((k17) => aliases[k17])).forEach((alias) => { if (configuration["camel-case-expansion"] && alias.includes("-")) { - delete argv[alias.split(".").map((prop) => camelCase(prop)).join(".")]; + delete argv[alias.split(".").map((prop) => camelCase2(prop)).join(".")]; } delete argv[alias]; }); @@ -68065,7 +68572,7 @@ var YargsParser = class { function setArg(key, val, shouldStripQuotes = inputIsString) { if (/-/.test(key) && configuration["camel-case-expansion"]) { const alias = key.split(".").map(function(prop) { - return camelCase(prop); + return camelCase2(prop); }).join("."); addNewAlias(key, alias); } @@ -68213,7 +68720,7 @@ var YargsParser = class { if (i11 === 0) { key = key.substring(prefix.length); } - return camelCase(key); + return camelCase2(key); }); if ((configOnly && flags.configs[keys.join(".")] || !configOnly) && !hasKey(argv2, keys)) { setArg(keys.join("."), env2[envVar]); @@ -68333,7 +68840,7 @@ var YargsParser = class { flags.aliases[key] = [].concat(aliases[key] || []); flags.aliases[key].concat(key).forEach(function(x28) { if (/-/.test(x28) && configuration["camel-case-expansion"]) { - const c15 = camelCase(x28); + const c15 = camelCase2(x28); if (c15 !== key && flags.aliases[key].indexOf(c15) === -1) { flags.aliases[key].push(c15); newAliases[c15] = true; @@ -68397,442 +68904,167 @@ var YargsParser = class { return hasAllFlags; } function isUnknownOptionAsArg(arg) { - return configuration["unknown-options-as-args"] && isUnknownOption(arg); - } - function isUnknownOption(arg) { - arg = arg.replace(/^-{3,}/, "--"); - if (arg.match(negative)) { - return false; - } - if (hasAllShortFlags(arg)) { - return false; - } - const flagWithEquals = /^-+([^=]+?)=[\s\S]*$/; - const normalFlag = /^-+([^=]+?)$/; - const flagEndingInHyphen = /^-+([^=]+?)-$/; - const flagEndingInDigits = /^-+([^=]+?\d+)$/; - const flagEndingInNonWordCharacters = /^-+([^=]+?)\W+.*$/; - return !hasFlagsMatching(arg, flagWithEquals, negatedBoolean, normalFlag, flagEndingInHyphen, flagEndingInDigits, flagEndingInNonWordCharacters); - } - function defaultValue(key) { - if (!checkAllAliases(key, flags.bools) && !checkAllAliases(key, flags.counts) && `${key}` in defaults2) { - return defaults2[key]; - } else { - return defaultForType(guessType(key)); - } - } - function defaultForType(type) { - const def = { - [DefaultValuesForTypeKey.BOOLEAN]: true, - [DefaultValuesForTypeKey.STRING]: "", - [DefaultValuesForTypeKey.NUMBER]: void 0, - [DefaultValuesForTypeKey.ARRAY]: [] - }; - return def[type]; - } - function guessType(key) { - let type = DefaultValuesForTypeKey.BOOLEAN; - if (checkAllAliases(key, flags.strings)) - type = DefaultValuesForTypeKey.STRING; - else if (checkAllAliases(key, flags.numbers)) - type = DefaultValuesForTypeKey.NUMBER; - else if (checkAllAliases(key, flags.bools)) - type = DefaultValuesForTypeKey.BOOLEAN; - else if (checkAllAliases(key, flags.arrays)) - type = DefaultValuesForTypeKey.ARRAY; - return type; - } - function isUndefined(num) { - return num === void 0; - } - function checkConfiguration() { - Object.keys(flags.counts).find((key) => { - if (checkAllAliases(key, flags.arrays)) { - error = Error(__("Invalid configuration: %s, opts.count excludes opts.array.", key)); - return true; - } else if (checkAllAliases(key, flags.nargs)) { - error = Error(__("Invalid configuration: %s, opts.count excludes opts.narg.", key)); - return true; - } - return false; - }); - } - return { - aliases: Object.assign({}, flags.aliases), - argv: Object.assign(argvReturn, argv), - configuration, - defaulted: Object.assign({}, defaulted), - error, - newAliases: Object.assign({}, newAliases) - }; - } -}; -function combineAliases(aliases) { - const aliasArrays = []; - const combined = /* @__PURE__ */ Object.create(null); - let change = true; - Object.keys(aliases).forEach(function(key) { - aliasArrays.push([].concat(aliases[key], key)); - }); - while (change) { - change = false; - for (let i11 = 0; i11 < aliasArrays.length; i11++) { - for (let ii2 = i11 + 1; ii2 < aliasArrays.length; ii2++) { - const intersect = aliasArrays[i11].filter(function(v27) { - return aliasArrays[ii2].indexOf(v27) !== -1; - }); - if (intersect.length) { - aliasArrays[i11] = aliasArrays[i11].concat(aliasArrays[ii2]); - aliasArrays.splice(ii2, 1); - change = true; - break; - } - } - } - } - aliasArrays.forEach(function(aliasArray) { - aliasArray = aliasArray.filter(function(v27, i11, self2) { - return self2.indexOf(v27) === i11; - }); - const lastAlias = aliasArray.pop(); - if (lastAlias !== void 0 && typeof lastAlias === "string") { - combined[lastAlias] = aliasArray; - } - }); - return combined; -} -function increment(orig) { - return orig !== void 0 ? orig + 1 : 1; -} -function sanitizeKey(key) { - if (key === "__proto__") - return "___proto___"; - return key; -} -function stripQuotes(val) { - return typeof val === "string" && (val[0] === "'" || val[0] === '"') && val[val.length - 1] === val[0] ? val.substring(1, val.length - 1) : val; -} - -// node_modules/yargs-parser/build/lib/index.js -import { readFileSync as readFileSync2 } from "fs"; -import { createRequire } from "node:module"; -var _a3; -var _b; -var _c; -var minNodeVersion = process && process.env && process.env.YARGS_MIN_NODE_VERSION ? Number(process.env.YARGS_MIN_NODE_VERSION) : 20; -var nodeVersion = (_b = (_a3 = process === null || process === void 0 ? void 0 : process.versions) === null || _a3 === void 0 ? void 0 : _a3.node) !== null && _b !== void 0 ? _b : (_c = process === null || process === void 0 ? void 0 : process.version) === null || _c === void 0 ? void 0 : _c.slice(1); -if (nodeVersion) { - const major = Number(nodeVersion.match(/^([^.]+)/)[1]); - if (major < minNodeVersion) { - throw Error(`yargs parser supports a minimum Node.js version of ${minNodeVersion}. Read our version support policy: https://github.com/yargs/yargs-parser#supported-nodejs-versions`); - } -} -var env = process ? process.env : {}; -var require2 = createRequire ? createRequire(import.meta.url) : void 0; -var parser = new YargsParser({ - cwd: process.cwd, - env: () => { - return env; - }, - format, - normalize, - resolve: resolve4, - require: (path2) => { - if (typeof require2 !== "undefined") { - return require2(path2); - } else if (path2.match(/\.json$/)) { - return JSON.parse(readFileSync2(path2, "utf8")); - } else { - throw Error("only .json config files are supported in ESM"); - } - } -}); -var yargsParser = function Parser(args, opts) { - const result = parser.parse(args.slice(), opts); - return result.argv; -}; -yargsParser.detailed = function(args, opts) { - return parser.parse(args.slice(), opts); -}; -yargsParser.camelCase = camelCase; -yargsParser.decamelize = decamelize; -yargsParser.looksLikeNumber = looksLikeNumber; -var lib_default = yargsParser; - -// dist/src/shell/grep-core.js -var TOOL_INPUT_FIELDS = [ - "command", - "file_path", - "path", - "pattern", - "prompt", - "subagent_type", - "query", - "url", - "notebook_path", - "old_string", - "new_string", - "content", - "skill", - "args", - "taskId", - "status", - "subject", - "description", - "to", - "message", - "summary", - "max_results" -]; -var TOOL_RESPONSE_DROP = /* @__PURE__ */ new Set([ - // Note: `stderr` is intentionally NOT in this set. The `stdout` high-signal - // branch below already de-dupes it for the common case (appends as suffix - // when non-empty). If a tool response has ONLY `stderr` and no `stdout` - // (hard-failure on some tools), the generic cleanup preserves it so the - // error message reaches Claude instead of collapsing to `[ok]`. - "interrupted", - "isImage", - "noOutputExpected", - "type", - "structuredPatch", - "userModified", - "originalFile", - "replaceAll", - "totalDurationMs", - "totalTokens", - "totalToolUseCount", - "usage", - "toolStats", - "durationMs", - "durationSeconds", - "bytes", - "code", - "codeText", - "agentId", - "agentType", - "verificationNudgeNeeded", - "numLines", - "numFiles", - "truncated", - "statusChange", - "updatedFields", - "isAgent", - "success" -]); -function maybeParseJson(v27) { - if (typeof v27 !== "string") - return v27; - const s10 = v27.trim(); - if (s10[0] !== "{" && s10[0] !== "[") - return v27; - try { - return JSON.parse(s10); - } catch { - return v27; - } -} -function snakeCase(k17) { - return k17.replace(/([A-Z])/g, "_$1").toLowerCase(); -} -function camelCase2(k17) { - return k17.replace(/_([a-z])/g, (_16, c15) => c15.toUpperCase()); -} -function formatToolInput(raw) { - const p22 = maybeParseJson(raw); - if (typeof p22 !== "object" || p22 === null) - return String(p22 ?? ""); - const parts = []; - for (const k17 of TOOL_INPUT_FIELDS) { - if (p22[k17] === void 0) - continue; - const v27 = p22[k17]; - parts.push(`${k17}: ${typeof v27 === "string" ? v27 : JSON.stringify(v27)}`); - } - for (const k17 of ["glob", "output_mode", "limit", "offset"]) { - if (p22[k17] !== void 0) - parts.push(`${k17}: ${p22[k17]}`); - } - return parts.length ? parts.join("\n") : JSON.stringify(p22); -} -function formatToolResponse(raw, inp, toolName) { - const r10 = maybeParseJson(raw); - if (typeof r10 !== "object" || r10 === null) - return String(r10 ?? ""); - if (toolName === "Edit" || toolName === "Write" || toolName === "MultiEdit") { - return r10.filePath ? `[wrote ${r10.filePath}]` : "[ok]"; - } - if (typeof r10.stdout === "string") { - const stderr = r10.stderr; - return r10.stdout + (stderr ? ` -stderr: ${stderr}` : ""); - } - if (typeof r10.content === "string") - return r10.content; - if (r10.file && typeof r10.file === "object") { - const f11 = r10.file; - if (typeof f11.content === "string") - return `[${f11.filePath ?? ""}] -${f11.content}`; - if (typeof f11.base64 === "string") - return `[binary ${f11.filePath ?? ""}: ${f11.base64.length} base64 chars]`; - } - if (Array.isArray(r10.filenames)) - return r10.filenames.join("\n"); - if (Array.isArray(r10.matches)) { - return r10.matches.map((m26) => typeof m26 === "string" ? m26 : JSON.stringify(m26)).join("\n"); - } - if (Array.isArray(r10.results)) { - return r10.results.map((x28) => typeof x28 === "string" ? x28 : x28?.title ?? x28?.url ?? JSON.stringify(x28)).join("\n"); - } - const inpObj = maybeParseJson(inp); - const kept = {}; - for (const [k17, v27] of Object.entries(r10)) { - if (TOOL_RESPONSE_DROP.has(k17)) - continue; - if (v27 === "" || v27 === false || v27 == null) - continue; - if (typeof inpObj === "object" && inpObj) { - const inObj = inpObj; - if (k17 in inObj && JSON.stringify(inObj[k17]) === JSON.stringify(v27)) - continue; - const snake = snakeCase(k17); - if (snake in inObj && JSON.stringify(inObj[snake]) === JSON.stringify(v27)) - continue; - const camel = camelCase2(k17); - if (camel in inObj && JSON.stringify(inObj[camel]) === JSON.stringify(v27)) - continue; + return configuration["unknown-options-as-args"] && isUnknownOption(arg); } - kept[k17] = v27; - } - return Object.keys(kept).length ? JSON.stringify(kept) : "[ok]"; -} -function formatToolCall(obj) { - return `[tool:${obj?.tool_name ?? "?"}] -input: ${formatToolInput(obj?.tool_input)} -response: ${formatToolResponse(obj?.tool_response, obj?.tool_input, obj?.tool_name)}`; -} -function normalizeContent(path2, raw) { - if (!path2.includes("/sessions/")) - return raw; - if (!raw || raw[0] !== "{") - return raw; - let obj; - try { - obj = JSON.parse(raw); - } catch { - return raw; - } - if (Array.isArray(obj.turns)) { - const header = []; - if (obj.date_time) - header.push(`date: ${obj.date_time}`); - if (obj.speakers) { - const s10 = obj.speakers; - const names = [s10.speaker_a, s10.speaker_b].filter(Boolean).join(", "); - if (names) - header.push(`speakers: ${names}`); + function isUnknownOption(arg) { + arg = arg.replace(/^-{3,}/, "--"); + if (arg.match(negative)) { + return false; + } + if (hasAllShortFlags(arg)) { + return false; + } + const flagWithEquals = /^-+([^=]+?)=[\s\S]*$/; + const normalFlag = /^-+([^=]+?)$/; + const flagEndingInHyphen = /^-+([^=]+?)-$/; + const flagEndingInDigits = /^-+([^=]+?\d+)$/; + const flagEndingInNonWordCharacters = /^-+([^=]+?)\W+.*$/; + return !hasFlagsMatching(arg, flagWithEquals, negatedBoolean, normalFlag, flagEndingInHyphen, flagEndingInDigits, flagEndingInNonWordCharacters); } - const lines = obj.turns.map((t6) => { - const sp = String(t6?.speaker ?? t6?.name ?? "?").trim(); - const tx = String(t6?.text ?? t6?.content ?? "").replace(/\s+/g, " ").trim(); - const tag = t6?.dia_id ? `[${t6.dia_id}] ` : ""; - return `${tag}${sp}: ${tx}`; - }); - const out2 = [...header, ...lines].join("\n"); - return out2.trim() ? out2 : raw; + function defaultValue(key) { + if (!checkAllAliases(key, flags.bools) && !checkAllAliases(key, flags.counts) && `${key}` in defaults2) { + return defaults2[key]; + } else { + return defaultForType(guessType(key)); + } + } + function defaultForType(type) { + const def = { + [DefaultValuesForTypeKey.BOOLEAN]: true, + [DefaultValuesForTypeKey.STRING]: "", + [DefaultValuesForTypeKey.NUMBER]: void 0, + [DefaultValuesForTypeKey.ARRAY]: [] + }; + return def[type]; + } + function guessType(key) { + let type = DefaultValuesForTypeKey.BOOLEAN; + if (checkAllAliases(key, flags.strings)) + type = DefaultValuesForTypeKey.STRING; + else if (checkAllAliases(key, flags.numbers)) + type = DefaultValuesForTypeKey.NUMBER; + else if (checkAllAliases(key, flags.bools)) + type = DefaultValuesForTypeKey.BOOLEAN; + else if (checkAllAliases(key, flags.arrays)) + type = DefaultValuesForTypeKey.ARRAY; + return type; + } + function isUndefined(num) { + return num === void 0; + } + function checkConfiguration() { + Object.keys(flags.counts).find((key) => { + if (checkAllAliases(key, flags.arrays)) { + error = Error(__("Invalid configuration: %s, opts.count excludes opts.array.", key)); + return true; + } else if (checkAllAliases(key, flags.nargs)) { + error = Error(__("Invalid configuration: %s, opts.count excludes opts.narg.", key)); + return true; + } + return false; + }); + } + return { + aliases: Object.assign({}, flags.aliases), + argv: Object.assign(argvReturn, argv), + configuration, + defaulted: Object.assign({}, defaulted), + error, + newAliases: Object.assign({}, newAliases) + }; } - const stripRecalled = (t6) => { - const i11 = t6.indexOf(""); - if (i11 === -1) - return t6; - const j14 = t6.lastIndexOf(""); - if (j14 === -1 || j14 < i11) - return t6; - const head = t6.slice(0, i11); - const tail = t6.slice(j14 + "".length); - return (head + tail).replace(/^\s+/, "").replace(/\n{3,}/g, "\n\n"); - }; - let out = null; - if (obj.type === "user_message") { - out = `[user] ${stripRecalled(String(obj.content ?? ""))}`; - } else if (obj.type === "assistant_message") { - const agent = obj.agent_type ? ` (agent=${obj.agent_type})` : ""; - out = `[assistant${agent}] ${stripRecalled(String(obj.content ?? ""))}`; - } else if (obj.type === "tool_call") { - out = formatToolCall(obj); +}; +function combineAliases(aliases) { + const aliasArrays = []; + const combined = /* @__PURE__ */ Object.create(null); + let change = true; + Object.keys(aliases).forEach(function(key) { + aliasArrays.push([].concat(aliases[key], key)); + }); + while (change) { + change = false; + for (let i11 = 0; i11 < aliasArrays.length; i11++) { + for (let ii2 = i11 + 1; ii2 < aliasArrays.length; ii2++) { + const intersect = aliasArrays[i11].filter(function(v27) { + return aliasArrays[ii2].indexOf(v27) !== -1; + }); + if (intersect.length) { + aliasArrays[i11] = aliasArrays[i11].concat(aliasArrays[ii2]); + aliasArrays.splice(ii2, 1); + change = true; + break; + } + } + } } - if (out === null) - return raw; - const trimmed = out.trim(); - if (!trimmed || trimmed === "[user]" || trimmed === "[assistant]" || /^\[tool:[^\]]*\]\s+input:\s+\{\}\s+response:\s+\{\}$/.test(trimmed)) - return raw; - return out; + aliasArrays.forEach(function(aliasArray) { + aliasArray = aliasArray.filter(function(v27, i11, self2) { + return self2.indexOf(v27) === i11; + }); + const lastAlias = aliasArray.pop(); + if (lastAlias !== void 0 && typeof lastAlias === "string") { + combined[lastAlias] = aliasArray; + } + }); + return combined; } -async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { - const { pathFilter, contentScanOnly, likeOp, escapedPattern } = opts; - const limit = opts.limit ?? 100; - const memFilter = contentScanOnly ? "" : ` AND summary::text ${likeOp} '%${escapedPattern}%'`; - const sessFilter = contentScanOnly ? "" : ` AND message::text ${likeOp} '%${escapedPattern}%'`; - const memQuery = `SELECT path, summary::text AS content FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; - const sessQuery = `SELECT path, message::text AS content FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; - const [memRows, sessRows] = await Promise.all([ - api.query(memQuery).catch(() => []), - api.query(sessQuery).catch(() => []) - ]); - const rows = []; - for (const r10 of memRows) - rows.push({ path: String(r10.path), content: String(r10.content ?? "") }); - for (const r10 of sessRows) - rows.push({ path: String(r10.path), content: String(r10.content ?? "") }); - return rows; +function increment(orig) { + return orig !== void 0 ? orig + 1 : 1; } -function buildPathFilter(targetPath) { - if (!targetPath || targetPath === "/") - return ""; - const clean = targetPath.replace(/\/+$/, ""); - return ` AND (path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%')`; +function sanitizeKey(key) { + if (key === "__proto__") + return "___proto___"; + return key; } -function compileGrepRegex(params) { - let reStr = params.fixedString ? params.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") : params.pattern; - if (params.wordMatch) - reStr = `\\b${reStr}\\b`; - try { - return new RegExp(reStr, params.ignoreCase ? "i" : ""); - } catch { - return new RegExp(params.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), params.ignoreCase ? "i" : ""); +function stripQuotes(val) { + return typeof val === "string" && (val[0] === "'" || val[0] === '"') && val[val.length - 1] === val[0] ? val.substring(1, val.length - 1) : val; +} + +// node_modules/yargs-parser/build/lib/index.js +import { readFileSync as readFileSync3 } from "fs"; +import { createRequire } from "node:module"; +var _a3; +var _b; +var _c; +var minNodeVersion = process && process.env && process.env.YARGS_MIN_NODE_VERSION ? Number(process.env.YARGS_MIN_NODE_VERSION) : 20; +var nodeVersion = (_b = (_a3 = process === null || process === void 0 ? void 0 : process.versions) === null || _a3 === void 0 ? void 0 : _a3.node) !== null && _b !== void 0 ? _b : (_c = process === null || process === void 0 ? void 0 : process.version) === null || _c === void 0 ? void 0 : _c.slice(1); +if (nodeVersion) { + const major = Number(nodeVersion.match(/^([^.]+)/)[1]); + if (major < minNodeVersion) { + throw Error(`yargs parser supports a minimum Node.js version of ${minNodeVersion}. Read our version support policy: https://github.com/yargs/yargs-parser#supported-nodejs-versions`); } } -function refineGrepMatches(rows, params, forceMultiFilePrefix) { - const re9 = compileGrepRegex(params); - const multi = forceMultiFilePrefix ?? rows.length > 1; - const output = []; - for (const row of rows) { - if (!row.content) - continue; - const lines = row.content.split("\n"); - const matched = []; - for (let i11 = 0; i11 < lines.length; i11++) { - const hit = re9.test(lines[i11]); - if (hit !== !!params.invertMatch) { - if (params.filesOnly) { - output.push(row.path); - break; - } - const prefix = multi ? `${row.path}:` : ""; - const ln3 = params.lineNumber ? `${i11 + 1}:` : ""; - matched.push(`${prefix}${ln3}${lines[i11]}`); - } - } - if (!params.filesOnly) { - if (params.countOnly) { - output.push(`${multi ? `${row.path}:` : ""}${matched.length}`); - } else { - output.push(...matched); - } +var env = process ? process.env : {}; +var require2 = createRequire ? createRequire(import.meta.url) : void 0; +var parser = new YargsParser({ + cwd: process.cwd, + env: () => { + return env; + }, + format, + normalize, + resolve: resolve4, + require: (path2) => { + if (typeof require2 !== "undefined") { + return require2(path2); + } else if (path2.match(/\.json$/)) { + return JSON.parse(readFileSync3(path2, "utf8")); + } else { + throw Error("only .json config files are supported in ESM"); } } - return output; -} +}); +var yargsParser = function Parser(args, opts) { + const result = parser.parse(args.slice(), opts); + return result.argv; +}; +yargsParser.detailed = function(args, opts) { + return parser.parse(args.slice(), opts); +}; +yargsParser.camelCase = camelCase2; +yargsParser.decamelize = decamelize; +yargsParser.looksLikeNumber = looksLikeNumber; +var lib_default = yargsParser; // dist/src/shell/grep-interceptor.js var MAX_FALLBACK_CANDIDATES = 500; @@ -68876,23 +69108,18 @@ function createGrepCommand(client, fs3, table, sessionsTable) { filesOnly: Boolean(parsed.l || parsed["files-with-matches"]), countOnly: Boolean(parsed.c || parsed["count"]) }; - const likeOp = matchParams.ignoreCase ? "ILIKE" : "LIKE"; - const hasRegexMeta = !matchParams.fixedString && /[.*+?^${}()|[\]\\]/.test(pattern); - const escapedPattern = sqlLike(pattern); let rows = []; try { - const perTarget = await Promise.race([ - Promise.all(targets.map((t6) => searchDeeplakeTables(client, table, sessionsTable ?? "sessions", { - pathFilter: buildPathFilter(t6), - contentScanOnly: hasRegexMeta, - likeOp, - escapedPattern, - limit: 100 - }))), + const searchOptions = { + ...buildGrepSearchOptions(matchParams, targets[0] ?? ctx.cwd), + pathFilter: buildPathFilterForTargets(targets), + limit: 100 + }; + const queryRows = await Promise.race([ + searchDeeplakeTables(client, table, sessionsTable ?? "sessions", searchOptions), new Promise((_16, reject) => setTimeout(() => reject(new Error("timeout")), 3e3)) ]); - for (const batch of perTarget) - rows.push(...batch); + rows.push(...queryRows); } catch { rows = []; } @@ -68920,6 +69147,13 @@ function createGrepCommand(client, fs3, table, sessionsTable) { // dist/src/shell/deeplake-shell.js async function main() { + const isOneShot = process.argv.includes("-c"); + if (isOneShot) { + delete process.env["HIVEMIND_TRACE_SQL"]; + delete process.env["DEEPLAKE_TRACE_SQL"]; + delete process.env["HIVEMIND_DEBUG"]; + delete process.env["DEEPLAKE_DEBUG"]; + } const config = loadConfig(); if (!config) { process.stderr.write("Deeplake credentials not found.\nSet HIVEMIND_TOKEN + HIVEMIND_ORG_ID in environment, or create ~/.deeplake/credentials.json\n"); @@ -68928,7 +69162,6 @@ async function main() { const table = process.env["HIVEMIND_TABLE"] ?? "memory"; const sessionsTable = process.env["HIVEMIND_SESSIONS_TABLE"] ?? "sessions"; const mount = process.env["HIVEMIND_MOUNT"] ?? "/"; - const isOneShot = process.argv.includes("-c"); const client = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, table); if (!isOneShot) { process.stderr.write(`Connecting to deeplake://${config.workspaceId}/${table} ... diff --git a/codex/bundle/stop.js b/codex/bundle/stop.js index 2de7118..e6081b5 100755 --- a/codex/bundle/stop.js +++ b/codex/bundle/stop.js @@ -1,7 +1,7 @@ #!/usr/bin/env node // dist/src/hooks/codex/stop.js -import { readFileSync as readFileSync3, existsSync as existsSync3 } from "node:fs"; +import { readFileSync as readFileSync4, existsSync as existsSync4 } from "node:fs"; // dist/src/utils/stdin.js function readStdin() { @@ -58,6 +58,9 @@ function loadConfig() { // dist/src/deeplake-api.js import { randomUUID } from "node:crypto"; +import { existsSync as existsSync2, mkdirSync, readFileSync as readFileSync2, writeFileSync } from "node:fs"; +import { join as join3 } from "node:path"; +import { tmpdir } from "node:os"; // dist/src/utils/debug.js import { appendFileSync } from "node:fs"; @@ -82,27 +85,48 @@ function sqlStr(value) { // dist/src/deeplake-api.js var log2 = (msg) => log("sdk", msg); -var TRACE_SQL = (process.env.HIVEMIND_TRACE_SQL ?? process.env.DEEPLAKE_TRACE_SQL) === "1" || (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; -var DEBUG_FILE_LOG = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; function summarizeSql(sql, maxLen = 220) { const compact = sql.replace(/\s+/g, " ").trim(); return compact.length > maxLen ? `${compact.slice(0, maxLen)}...` : compact; } function traceSql(msg) { - if (!TRACE_SQL) + const traceEnabled = (process.env.HIVEMIND_TRACE_SQL ?? process.env.DEEPLAKE_TRACE_SQL) === "1" || (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; + if (!traceEnabled) return; process.stderr.write(`[deeplake-sql] ${msg} `); - if (DEBUG_FILE_LOG) + const debugFileLog = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; + if (debugFileLog) log2(msg); } var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; var MAX_CONCURRENCY = 5; +var QUERY_TIMEOUT_MS = Number(process.env["HIVEMIND_QUERY_TIMEOUT_MS"] ?? process.env["DEEPLAKE_QUERY_TIMEOUT_MS"] ?? 1e4); +var INDEX_MARKER_TTL_MS = Number(process.env["HIVEMIND_INDEX_MARKER_TTL_MS"] ?? 6 * 60 * 6e4); function sleep(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } +function isTimeoutError(error) { + const name = error instanceof Error ? error.name.toLowerCase() : ""; + const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase(); + return name.includes("timeout") || name === "aborterror" || message.includes("timeout") || message.includes("timed out"); +} +function isDuplicateIndexError(error) { + const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase(); + return message.includes("duplicate key value violates unique constraint") || message.includes("pg_class_relname_nsp_index") || message.includes("already exists"); +} +function isSessionInsertQuery(sql) { + return /^\s*insert\s+into\s+"[^"]+"\s*\(\s*id\s*,\s*path\s*,\s*filename\s*,\s*message\s*,/i.test(sql); +} +function isTransientHtml403(text) { + const body = text.toLowerCase(); + return body.includes(" Object.fromEntries(raw.columns.map((col, i) => [col, row[i]]))); } const text = await resp.text().catch(() => ""); - if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { + const retryable403 = isSessionInsertQuery(sql) && (resp.status === 401 || resp.status === 403 && (text.length === 0 || isTransientHtml403(text))); + if (attempt < MAX_RETRIES && (RETRYABLE_CODES.has(resp.status) || retryable403)) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`); await sleep(delay); @@ -255,8 +287,61 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + buildLookupIndexName(table, suffix) { + return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); + } + getLookupIndexMarkerPath(table, suffix) { + const markerKey = [ + this.workspaceId, + this.orgId, + table, + suffix + ].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_"); + return join3(getIndexMarkerDir(), `${markerKey}.json`); + } + hasFreshLookupIndexMarker(table, suffix) { + const markerPath = this.getLookupIndexMarkerPath(table, suffix); + if (!existsSync2(markerPath)) + return false; + try { + const raw = JSON.parse(readFileSync2(markerPath, "utf-8")); + const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN; + if (!Number.isFinite(updatedAt) || Date.now() - updatedAt > INDEX_MARKER_TTL_MS) + return false; + return true; + } catch { + return false; + } + } + markLookupIndexReady(table, suffix) { + mkdirSync(getIndexMarkerDir(), { recursive: true }); + writeFileSync(this.getLookupIndexMarkerPath(table, suffix), JSON.stringify({ updatedAt: (/* @__PURE__ */ new Date()).toISOString() }), "utf-8"); + } + async ensureLookupIndex(table, suffix, columnsSql) { + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + const indexName = this.buildLookupIndexName(table, suffix); + try { + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" ${columnsSql}`); + this.markLookupIndexReady(table, suffix); + } catch (e) { + if (isDuplicateIndexError(e)) { + this.markLookupIndexReady(table, suffix); + return; + } + log2(`index "${indexName}" skipped: ${e.message}`); + } + } /** List all tables in the workspace (with retry). */ - async listTables() { + async listTables(forceRefresh = false) { + if (!forceRefresh && this._tablesCache) + return [...this._tablesCache]; + const { tables, cacheable } = await this._fetchTables(); + if (cacheable) + this._tablesCache = [...tables]; + return tables; + } + async _fetchTables() { for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { try { const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, { @@ -267,22 +352,25 @@ var DeeplakeApi = class { }); if (resp.ok) { const data = await resp.json(); - return (data.tables ?? []).map((t) => t.table_name); + return { + tables: (data.tables ?? []).map((t) => t.table_name), + cacheable: true + }; } if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { await sleep(BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200); continue; } - return []; + return { tables: [], cacheable: false }; } catch { if (attempt < MAX_RETRIES) { await sleep(BASE_DELAY_MS * Math.pow(2, attempt)); continue; } - return []; + return { tables: [], cacheable: false }; } } - return []; + return { tables: [], cacheable: false }; } /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */ async ensureTable(name) { @@ -292,6 +380,8 @@ var DeeplakeApi = class { log2(`table "${tbl}" not found, creating`); await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); + if (!tables.includes(tbl)) + this._tablesCache = [...tables, tbl]; } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ @@ -301,27 +391,30 @@ var DeeplakeApi = class { log2(`table "${name}" not found, creating`); await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${name}" created`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; } + await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); } }; // dist/src/hooks/codex/spawn-wiki-worker.js import { spawn, execSync } from "node:child_process"; import { fileURLToPath } from "node:url"; -import { dirname, join as join4 } from "node:path"; -import { writeFileSync, mkdirSync as mkdirSync2 } from "node:fs"; -import { homedir as homedir3, tmpdir } from "node:os"; +import { dirname, join as join5 } from "node:path"; +import { writeFileSync as writeFileSync2, mkdirSync as mkdirSync3 } from "node:fs"; +import { homedir as homedir3, tmpdir as tmpdir2 } from "node:os"; // dist/src/utils/wiki-log.js -import { mkdirSync, appendFileSync as appendFileSync2 } from "node:fs"; -import { join as join3 } from "node:path"; +import { mkdirSync as mkdirSync2, appendFileSync as appendFileSync2 } from "node:fs"; +import { join as join4 } from "node:path"; function makeWikiLogger(hooksDir, filename = "deeplake-wiki.log") { - const path = join3(hooksDir, filename); + const path = join4(hooksDir, filename); return { path, log(msg) { try { - mkdirSync(hooksDir, { recursive: true }); + mkdirSync2(hooksDir, { recursive: true }); appendFileSync2(path, `[${utcTimestamp()}] ${msg} `); } catch { @@ -332,7 +425,7 @@ function makeWikiLogger(hooksDir, filename = "deeplake-wiki.log") { // dist/src/hooks/codex/spawn-wiki-worker.js var HOME = homedir3(); -var wikiLogger = makeWikiLogger(join4(HOME, ".codex", "hooks")); +var wikiLogger = makeWikiLogger(join5(HOME, ".codex", "hooks")); var WIKI_LOG = wikiLogger.path; var WIKI_PROMPT_TEMPLATE = `You are building a personal wiki from a coding session. Your goal is to extract every piece of knowledge \u2014 entities, decisions, relationships, and facts \u2014 into a structured, searchable wiki entry. @@ -394,10 +487,10 @@ function findCodexBin() { function spawnCodexWikiWorker(opts) { const { config, sessionId, cwd, bundleDir, reason } = opts; const projectName = cwd.split("/").pop() || "unknown"; - const tmpDir = join4(tmpdir(), `deeplake-wiki-${sessionId}-${Date.now()}`); - mkdirSync2(tmpDir, { recursive: true }); - const configFile = join4(tmpDir, "config.json"); - writeFileSync(configFile, JSON.stringify({ + const tmpDir = join5(tmpdir2(), `deeplake-wiki-${sessionId}-${Date.now()}`); + mkdirSync3(tmpDir, { recursive: true }); + const configFile = join5(tmpDir, "config.json"); + writeFileSync2(configFile, JSON.stringify({ apiUrl: config.apiUrl, token: config.token, orgId: config.orgId, @@ -410,11 +503,11 @@ function spawnCodexWikiWorker(opts) { tmpDir, codexBin: findCodexBin(), wikiLog: WIKI_LOG, - hooksDir: join4(HOME, ".codex", "hooks"), + hooksDir: join5(HOME, ".codex", "hooks"), promptTemplate: WIKI_PROMPT_TEMPLATE })); wikiLog(`${reason}: spawning summary worker for ${sessionId}`); - const workerPath = join4(bundleDir, "wiki-worker.js"); + const workerPath = join5(bundleDir, "wiki-worker.js"); spawn("nohup", ["node", workerPath, configFile], { detached: true, stdio: ["ignore", "ignore", "ignore"] @@ -426,21 +519,21 @@ function bundleDirFromImportMeta(importMetaUrl) { } // dist/src/hooks/summary-state.js -import { readFileSync as readFileSync2, writeFileSync as writeFileSync2, writeSync, mkdirSync as mkdirSync3, renameSync, existsSync as existsSync2, unlinkSync, openSync, closeSync } from "node:fs"; +import { readFileSync as readFileSync3, writeFileSync as writeFileSync3, writeSync, mkdirSync as mkdirSync4, renameSync, existsSync as existsSync3, unlinkSync, openSync, closeSync } from "node:fs"; import { homedir as homedir4 } from "node:os"; -import { join as join5 } from "node:path"; +import { join as join6 } from "node:path"; var dlog = (msg) => log("summary-state", msg); -var STATE_DIR = join5(homedir4(), ".claude", "hooks", "summary-state"); +var STATE_DIR = join6(homedir4(), ".claude", "hooks", "summary-state"); var YIELD_BUF = new Int32Array(new SharedArrayBuffer(4)); function lockPath(sessionId) { - return join5(STATE_DIR, `${sessionId}.lock`); + return join6(STATE_DIR, `${sessionId}.lock`); } function tryAcquireLock(sessionId, maxAgeMs = 10 * 60 * 1e3) { - mkdirSync3(STATE_DIR, { recursive: true }); + mkdirSync4(STATE_DIR, { recursive: true }); const p = lockPath(sessionId); - if (existsSync2(p)) { + if (existsSync3(p)) { try { - const ageMs = Date.now() - parseInt(readFileSync2(p, "utf-8"), 10); + const ageMs = Date.now() - parseInt(readFileSync3(p, "utf-8"), 10); if (Number.isFinite(ageMs) && ageMs < maxAgeMs) return false; } catch (readErr) { @@ -507,8 +600,8 @@ async function main() { if (input.transcript_path) { try { const transcriptPath = input.transcript_path; - if (existsSync3(transcriptPath)) { - const transcript = readFileSync3(transcriptPath, "utf-8"); + if (existsSync4(transcriptPath)) { + const transcript = readFileSync4(transcriptPath, "utf-8"); const lines = transcript.trim().split("\n").reverse(); for (const line2 of lines) { try { diff --git a/codex/tests/codex-pre-tool-use-branches.test.ts b/codex/tests/codex-pre-tool-use-branches.test.ts new file mode 100644 index 0000000..e9d9772 --- /dev/null +++ b/codex/tests/codex-pre-tool-use-branches.test.ts @@ -0,0 +1,414 @@ +/** + * Branch-coverage suite for `src/hooks/codex/pre-tool-use.ts`. + * + * The codex hook mirrors the Claude Code pre-tool-use hook's routing + * logic but has its own decision shape (`action: "pass" | "guide" | + * "block"`) and a single Bash-command input (no separate Read tool). + * Before this suite the file sat at 0% coverage. This file drives the + * real `processCodexPreToolUse` entry point across every branch + * that the hook supports — not smoke tests, actual routing + content + * assertions per-branch. + */ + +import { describe, expect, it, vi } from "vitest"; +import { + buildUnsupportedGuidance, + processCodexPreToolUse, + runVirtualShell, +} from "../../src/hooks/codex/pre-tool-use.js"; + +const BASE_CONFIG = { + token: "t", + apiUrl: "http://example", + orgId: "org", + orgName: "org", + userName: "u", + workspaceId: "default", +}; + +function makeApi(queryResponses: Record[] | ((sql: string) => Record[]) = []) { + return { + query: vi.fn(async (sql: string) => + typeof queryResponses === "function" ? queryResponses(sql) : queryResponses, + ), + } as any; +} + +/** Base deps every test wants: neutral cache (no hit) + log silent. */ +function baseDeps(extra: Record = {}) { + return { + config: BASE_CONFIG as any, + createApi: vi.fn(() => makeApi()), + readCachedIndexContentFn: vi.fn(() => null) as any, + writeCachedIndexContentFn: vi.fn() as any, + runVirtualShellFn: vi.fn(() => "") as any, + logFn: vi.fn(), + ...extra, + }; +} + +describe("codex: pure helpers", () => { + it("buildUnsupportedGuidance names the allowed bash builtins and rejects interpreters", () => { + const s = buildUnsupportedGuidance(); + expect(s).toMatch(/cat.*grep.*echo/); + expect(s).toMatch(/python|node|curl/); + }); + + it("runVirtualShell returns empty string and calls logFn when the spawn fails", () => { + const logFn = vi.fn(); + // /nope is not executable → execFileSync throws, caught by the wrapper. + const out = runVirtualShell("cat /x", "/nope", logFn); + expect(out).toBe(""); + expect(logFn).toHaveBeenCalledWith(expect.stringContaining("virtual shell failed")); + }); +}); + +describe("processCodexPreToolUse: pass-through + unsafe", () => { + it("returns `pass` when the command doesn't mention the memory path", async () => { + const d = await processCodexPreToolUse( + { session_id: "s", tool_name: "shell", tool_input: { command: "ls /tmp" } }, + baseDeps(), + ); + expect(d.action).toBe("pass"); + }); + + it("returns `guide` with the unsupported-command guidance when a memory-path command uses an interpreter", async () => { + const d = await processCodexPreToolUse( + { session_id: "s", tool_name: "shell", tool_input: { command: "python ~/.deeplake/memory/x.py" } }, + baseDeps(), + ); + expect(d.action).toBe("guide"); + expect(d.output).toContain("not supported"); + expect(d.rewrittenCommand).toContain("python"); + }); + + it("falls back to runVirtualShell when no config is loaded", async () => { + const runVirtualShellFn = vi.fn(() => "FROM-SHELL") as any; + const d = await processCodexPreToolUse( + { session_id: "s", tool_name: "shell", tool_input: { command: "cat ~/.deeplake/memory/index.md" } }, + { ...baseDeps({ runVirtualShellFn }), config: null as any }, + ); + expect(d.action).toBe("block"); + expect(d.output).toBe("FROM-SHELL"); + expect(runVirtualShellFn).toHaveBeenCalledTimes(1); + }); + + it("falls back to the virtual shell's empty-result placeholder when the shell returns empty", async () => { + const runVirtualShellFn = vi.fn(() => "") as any; + const d = await processCodexPreToolUse( + { session_id: "s", tool_name: "shell", tool_input: { command: "cat ~/.deeplake/memory/nonexistent.md" } }, + { + ...baseDeps({ runVirtualShellFn }), + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + readVirtualPathContentFn: vi.fn(async () => null) as any, + }, + ); + expect(d.output).toContain("Command returned empty or the file does not exist"); + }); +}); + +describe("processCodexPreToolUse: compiled bash fast-path", () => { + it("delegates to executeCompiledBashCommand and blocks with its output when a segment compiles", async () => { + const executeCompiledBashCommandFn = vi.fn(async () => "COMPILED OUTPUT") as any; + const d = await processCodexPreToolUse( + { session_id: "s", tool_name: "shell", tool_input: { command: "cat ~/.deeplake/memory/index.md && ls ~/.deeplake/memory/summaries" } }, + { ...baseDeps(), executeCompiledBashCommandFn }, + ); + expect(d.action).toBe("block"); + expect(d.output).toBe("COMPILED OUTPUT"); + expect(executeCompiledBashCommandFn).toHaveBeenCalled(); + }); + + it("the compiled fallback callback cache-hits /index.md without re-querying the sessions table", async () => { + const readCachedIndexContentFn = vi.fn(() => "CACHED INDEX"); + const readVirtualPathContentsFn = vi.fn(async (_api, _m, _s, paths: string[]) => + new Map(paths.map((p) => [p, `FETCHED:${p}`])), + ) as any; + // Bash compiler asks for both /index.md and /sessions/x.json; only + // /sessions/x.json must reach the SQL layer. + const executeCompiledBashCommandFn = vi.fn(async (_api, _m, _s, _cmd, deps) => { + const fetched = await deps.readVirtualPathContentsFn(_api, _m, _s, ["/index.md", "/sessions/x.json"]); + return `idx=${fetched.get("/index.md")};x=${fetched.get("/sessions/x.json")}`; + }) as any; + + const d = await processCodexPreToolUse( + { session_id: "sess-A", tool_name: "shell", tool_input: { command: "cat ~/.deeplake/memory/index.md && cat ~/.deeplake/memory/sessions/x.json" } }, + { + ...baseDeps({ readCachedIndexContentFn, readVirtualPathContentsFn }), + executeCompiledBashCommandFn, + }, + ); + expect(d.output).toContain("idx=CACHED INDEX"); + expect(d.output).toContain("x=FETCHED:/sessions/x.json"); + // Cache read was issued; the SQL read only fetched the non-cached path. + expect(readCachedIndexContentFn).toHaveBeenCalledWith("sess-A"); + expect(readVirtualPathContentsFn).toHaveBeenCalledWith( + expect.anything(), expect.anything(), expect.anything(), + ["/sessions/x.json"], + ); + }); +}); + +describe("processCodexPreToolUse: direct read (cat/head/tail/wc)", () => { + it("cat returns raw content", async () => { + const d = await processCodexPreToolUse( + { session_id: "s", tool_name: "shell", tool_input: { command: "cat ~/.deeplake/memory/sessions/a.json" } }, + { + ...baseDeps(), + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + readVirtualPathContentFn: vi.fn(async () => "line1\nline2\nline3") as any, + }, + ); + expect(d.output).toBe("line1\nline2\nline3"); + }); + + it("head -N slices to the first N lines", async () => { + const d = await processCodexPreToolUse( + { session_id: "s", tool_name: "shell", tool_input: { command: "head -2 ~/.deeplake/memory/sessions/a.json" } }, + { + ...baseDeps(), + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + readVirtualPathContentFn: vi.fn(async () => "l1\nl2\nl3\nl4") as any, + }, + ); + expect(d.output).toBe("l1\nl2"); + }); + + it("head (no -N) defaults to 10 lines", async () => { + const d = await processCodexPreToolUse( + { session_id: "s", tool_name: "shell", tool_input: { command: "head ~/.deeplake/memory/sessions/a.json" } }, + { + ...baseDeps(), + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + readVirtualPathContentFn: vi.fn(async () => + Array.from({ length: 20 }, (_, i) => `L${i}`).join("\n"), + ) as any, + }, + ); + expect(d.output).toBe(Array.from({ length: 10 }, (_, i) => `L${i}`).join("\n")); + }); + + it("tail -N slices to the last N lines", async () => { + const d = await processCodexPreToolUse( + { session_id: "s", tool_name: "shell", tool_input: { command: "tail -2 ~/.deeplake/memory/sessions/a.json" } }, + { + ...baseDeps(), + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + readVirtualPathContentFn: vi.fn(async () => "l1\nl2\nl3\nl4") as any, + }, + ); + expect(d.output).toBe("l3\nl4"); + }); + + it("tail defaults to the last 10 lines", async () => { + const d = await processCodexPreToolUse( + { session_id: "s", tool_name: "shell", tool_input: { command: "tail ~/.deeplake/memory/sessions/a.json" } }, + { + ...baseDeps(), + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + readVirtualPathContentFn: vi.fn(async () => + Array.from({ length: 20 }, (_, i) => `L${i}`).join("\n"), + ) as any, + }, + ); + expect(d.output).toBe(Array.from({ length: 10 }, (_, i) => `L${i + 10}`).join("\n")); + }); + + it("wc -l returns ` `", async () => { + const d = await processCodexPreToolUse( + { session_id: "s", tool_name: "shell", tool_input: { command: "wc -l ~/.deeplake/memory/sessions/a.json" } }, + { + ...baseDeps(), + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + readVirtualPathContentFn: vi.fn(async () => "a\nb\nc") as any, + }, + ); + expect(d.output).toBe("3 /sessions/a.json"); + }); + + it("cat | head pipeline collapses to a single head read", async () => { + const d = await processCodexPreToolUse( + { session_id: "s", tool_name: "shell", tool_input: { command: "cat ~/.deeplake/memory/sessions/a.json | head -3" } }, + { + ...baseDeps(), + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + readVirtualPathContentFn: vi.fn(async () => + Array.from({ length: 30 }, (_, i) => `L${i}`).join("\n"), + ) as any, + }, + ); + expect(d.output).toBe("L0\nL1\nL2"); + }); +}); + +describe("processCodexPreToolUse: /index.md caching + fallback", () => { + it("serves /index.md from the session cache when present — no virtual-path fetch", async () => { + const readCachedIndexContentFn = vi.fn(() => "CACHED-BODY"); + const readVirtualPathContentFn = vi.fn(async () => "FRESH") as any; + const d = await processCodexPreToolUse( + { session_id: "s-cache", tool_name: "shell", tool_input: { command: "cat ~/.deeplake/memory/index.md" } }, + { + ...baseDeps({ readCachedIndexContentFn, readVirtualPathContentFn }), + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + }, + ); + expect(d.output).toBe("CACHED-BODY"); + expect(readVirtualPathContentFn).not.toHaveBeenCalled(); + }); + + it("on cache miss fetches /index.md via readVirtualPathContent + writes it into the cache", async () => { + const writeCachedIndexContentFn = vi.fn(); + const d = await processCodexPreToolUse( + { session_id: "s-miss", tool_name: "shell", tool_input: { command: "cat ~/.deeplake/memory/index.md" } }, + { + ...baseDeps({ writeCachedIndexContentFn }), + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + readVirtualPathContentFn: vi.fn(async () => "FRESH INDEX") as any, + }, + ); + expect(d.output).toBe("FRESH INDEX"); + expect(writeCachedIndexContentFn).toHaveBeenCalledWith("s-miss", "FRESH INDEX"); + }); + + it("falls back to the inline memory-table SELECT when readVirtualPathContent returns null for /index.md", async () => { + // Simulates a table where memory has rows but the path isn't in the + // exact-path union. Codex's fallback builder queries /summaries/%. + const api = makeApi([ + { path: "/summaries/a/s1.md", project: "proj", description: "desc", creation_date: "2026-04-20" }, + { path: "/summaries/a/s2.md", project: "", description: "", creation_date: "2026-04-19" }, + ]); + const d = await processCodexPreToolUse( + { session_id: "s", tool_name: "shell", tool_input: { command: "cat ~/.deeplake/memory/index.md" } }, + { + ...baseDeps({ createApi: vi.fn(() => api) }), + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + readVirtualPathContentFn: vi.fn(async () => null) as any, + }, + ); + expect(d.output).toContain("# Memory Index"); + expect(d.output).toContain("2 sessions:"); + expect(d.output).toContain("/summaries/a/s1.md"); + expect(d.output).toContain("[proj]"); + }); +}); + +describe("processCodexPreToolUse: ls branch", () => { + it("short-format listing renders file vs dir entries + empty-name rows are skipped", async () => { + const listVirtualPathRowsFn = vi.fn(async () => [ + { path: "/summaries/top.md", size_bytes: 10 }, // file directly under /summaries + { path: "/summaries/alice/s1.md", size_bytes: 42 }, // nested → alice becomes a dir + { path: "/summaries/", size_bytes: 0 }, // trailing slash — skipped + ]) as any; + + const d = await processCodexPreToolUse( + { session_id: "s", tool_name: "shell", tool_input: { command: "ls ~/.deeplake/memory/summaries" } }, + { + ...baseDeps({ listVirtualPathRowsFn }), + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + }, + ); + expect(d.output).toContain("top.md"); + expect(d.output).toContain("alice/"); + expect(d.output!.split("\n").filter(l => l).length).toBe(2); + }); + + it("long-format listing includes permission strings and sizes", async () => { + const d = await processCodexPreToolUse( + { session_id: "s", tool_name: "shell", tool_input: { command: "ls -la ~/.deeplake/memory/summaries" } }, + { + ...baseDeps(), + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + listVirtualPathRowsFn: vi.fn(async () => [ + { path: "/summaries/top.md", size_bytes: 42 }, + { path: "/summaries/alice/s1.md", size_bytes: 100 }, + ]) as any, + }, + ); + expect(d.output).toContain("-rw-r--r--"); + expect(d.output).toContain("top.md"); + expect(d.output).toContain("drwxr-xr-x"); + expect(d.output).toContain("alice/"); + }); + + it("ls on an empty or non-existent directory returns a 'cannot access' message", async () => { + const d = await processCodexPreToolUse( + { session_id: "s", tool_name: "shell", tool_input: { command: "ls ~/.deeplake/memory/nope" } }, + { + ...baseDeps(), + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + listVirtualPathRowsFn: vi.fn(async () => []) as any, + }, + ); + expect(d.output).toContain("cannot access"); + expect(d.output).toContain("No such file or directory"); + }); +}); + +describe("processCodexPreToolUse: find + grep + fallback", () => { + it("find -name '' returns matching paths joined with newlines", async () => { + const findVirtualPathsFn = vi.fn(async () => [ + "/sessions/conv_0_session_1.json", + "/sessions/conv_0_session_2.json", + ]) as any; + + const d = await processCodexPreToolUse( + { session_id: "s", tool_name: "shell", tool_input: { command: "find ~/.deeplake/memory/sessions -name '*.json'" } }, + { + ...baseDeps({ findVirtualPathsFn }), + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + }, + ); + expect(d.output).toBe("/sessions/conv_0_session_1.json\n/sessions/conv_0_session_2.json"); + }); + + it("find … | wc -l collapses to the count", async () => { + const d = await processCodexPreToolUse( + { session_id: "s", tool_name: "shell", tool_input: { command: "find ~/.deeplake/memory/sessions -name '*.json' | wc -l" } }, + { + ...baseDeps(), + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + findVirtualPathsFn: vi.fn(async () => ["/a", "/b", "/c"]) as any, + }, + ); + expect(d.output).toBe("3"); + }); + + it("find with zero matches returns '(no matches)'", async () => { + const d = await processCodexPreToolUse( + { session_id: "s", tool_name: "shell", tool_input: { command: "find ~/.deeplake/memory/sessions -name '*.xyz'" } }, + { + ...baseDeps(), + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + findVirtualPathsFn: vi.fn(async () => []) as any, + }, + ); + expect(d.output).toBe("(no matches)"); + }); + + it("grep via parseBashGrep delegates to handleGrepDirect", async () => { + const handleGrepDirectFn = vi.fn(async () => "/sessions/a.json:matching line") as any; + const d = await processCodexPreToolUse( + { session_id: "s", tool_name: "shell", tool_input: { command: "grep -l foo ~/.deeplake/memory/sessions/*.json" } }, + { + ...baseDeps({ handleGrepDirectFn }), + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + }, + ); + expect(d.output).toBe("/sessions/a.json:matching line"); + expect(handleGrepDirectFn).toHaveBeenCalled(); + }); + + it("falls back to runVirtualShell when the direct-query path throws mid-flow", async () => { + const runVirtualShellFn = vi.fn(() => "SHELL OK") as any; + const d = await processCodexPreToolUse( + { session_id: "s", tool_name: "shell", tool_input: { command: "cat ~/.deeplake/memory/sessions/a.json" } }, + { + ...baseDeps({ runVirtualShellFn }), + executeCompiledBashCommandFn: vi.fn(async () => null) as any, + readVirtualPathContentFn: vi.fn(async () => { throw new Error("network bonk"); }) as any, + }, + ); + expect(d.output).toBe("SHELL OK"); + expect(runVirtualShellFn).toHaveBeenCalled(); + }); +}); diff --git a/src/deeplake-api.ts b/src/deeplake-api.ts index ad3cb4d..a003b04 100644 --- a/src/deeplake-api.ts +++ b/src/deeplake-api.ts @@ -1,20 +1,30 @@ import { randomUUID } from "node:crypto"; +import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; import { log as _log } from "./utils/debug.js"; import { sqlStr } from "./utils/sql.js"; const log = (msg: string) => _log("sdk", msg); -const TRACE_SQL = (process.env.HIVEMIND_TRACE_SQL ?? process.env.DEEPLAKE_TRACE_SQL) === "1" || (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; -const DEBUG_FILE_LOG = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; function summarizeSql(sql: string, maxLen = 220): string { const compact = sql.replace(/\s+/g, " ").trim(); return compact.length > maxLen ? `${compact.slice(0, maxLen)}...` : compact; } +/** + * SQL tracing is opt-in and evaluated on every call so callers can flip the + * env vars after module load (e.g. the one-shot shell bundle silences + * `[deeplake-sql]` stderr writes so they don't land in Claude Code's + * Bash-tool result — Claude Code merges child stderr into tool_result). + */ function traceSql(msg: string): void { - if (!TRACE_SQL) return; + const traceEnabled = (process.env.HIVEMIND_TRACE_SQL ?? process.env.DEEPLAKE_TRACE_SQL) === "1" + || (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; + if (!traceEnabled) return; process.stderr.write(`[deeplake-sql] ${msg}\n`); - if (DEBUG_FILE_LOG) log(msg); + const debugFileLog = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; + if (debugFileLog) log(msg); } // ── Retry & concurrency primitives ────────────────────────────────────────── @@ -23,11 +33,45 @@ const RETRYABLE_CODES = new Set([429, 500, 502, 503, 504]); const MAX_RETRIES = 3; const BASE_DELAY_MS = 500; const MAX_CONCURRENCY = 5; +const QUERY_TIMEOUT_MS = Number(process.env["HIVEMIND_QUERY_TIMEOUT_MS"] ?? process.env["DEEPLAKE_QUERY_TIMEOUT_MS"] ?? 10_000); +const INDEX_MARKER_TTL_MS = Number(process.env["HIVEMIND_INDEX_MARKER_TTL_MS"] ?? 6 * 60 * 60_000); function sleep(ms: number): Promise { return new Promise(resolve => setTimeout(resolve, ms)); } +function isTimeoutError(error: unknown): boolean { + const name = error instanceof Error ? error.name.toLowerCase() : ""; + const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase(); + return name.includes("timeout") || + name === "aborterror" || + message.includes("timeout") || + message.includes("timed out"); +} + +function isDuplicateIndexError(error: unknown): boolean { + const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase(); + return message.includes("duplicate key value violates unique constraint") || + message.includes("pg_class_relname_nsp_index") || + message.includes("already exists"); +} + +function isSessionInsertQuery(sql: string): boolean { + return /^\s*insert\s+into\s+"[^"]+"\s*\(\s*id\s*,\s*path\s*,\s*filename\s*,\s*message\s*,/i.test(sql); +} + +function isTransientHtml403(text: string): boolean { + const body = text.toLowerCase(); + return body.includes(" void)[] = []; private active = 0; @@ -62,6 +106,7 @@ export interface WriteRow { export class DeeplakeApi { private _pendingRows: WriteRow[] = []; private _sem = new Semaphore(MAX_CONCURRENCY); + private _tablesCache: string[] | null = null; constructor( private token: string, @@ -95,6 +140,7 @@ export class DeeplakeApi { for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { let resp: Response; try { + const signal = AbortSignal.timeout(QUERY_TIMEOUT_MS); resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables/query`, { method: "POST", headers: { @@ -102,10 +148,15 @@ export class DeeplakeApi { "Content-Type": "application/json", "X-Activeloop-Org-Id": this.orgId, }, + signal, body: JSON.stringify({ query: sql }), }); } catch (e: unknown) { // Network-level failure (DNS, TCP reset, timeout, etc.) + if (isTimeoutError(e)) { + lastError = new Error(`Query timeout after ${QUERY_TIMEOUT_MS}ms`); + throw lastError; + } lastError = e instanceof Error ? e : new Error(String(e)); if (attempt < MAX_RETRIES) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; @@ -123,7 +174,10 @@ export class DeeplakeApi { ); } const text = await resp.text().catch(() => ""); - if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { + const retryable403 = + isSessionInsertQuery(sql) && + (resp.status === 401 || (resp.status === 403 && (text.length === 0 || isTransientHtml403(text)))); + if (attempt < MAX_RETRIES && (RETRYABLE_CODES.has(resp.status) || retryable403)) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`); await sleep(delay); @@ -199,8 +253,67 @@ export class DeeplakeApi { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + private buildLookupIndexName(table: string, suffix: string): string { + return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); + } + + private getLookupIndexMarkerPath(table: string, suffix: string): string { + const markerKey = [ + this.workspaceId, + this.orgId, + table, + suffix, + ].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_"); + return join(getIndexMarkerDir(), `${markerKey}.json`); + } + + private hasFreshLookupIndexMarker(table: string, suffix: string): boolean { + const markerPath = this.getLookupIndexMarkerPath(table, suffix); + if (!existsSync(markerPath)) return false; + try { + const raw = JSON.parse(readFileSync(markerPath, "utf-8")) as { updatedAt?: string }; + const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN; + if (!Number.isFinite(updatedAt) || (Date.now() - updatedAt) > INDEX_MARKER_TTL_MS) return false; + return true; + } catch { + return false; + } + } + + private markLookupIndexReady(table: string, suffix: string): void { + mkdirSync(getIndexMarkerDir(), { recursive: true }); + writeFileSync( + this.getLookupIndexMarkerPath(table, suffix), + JSON.stringify({ updatedAt: new Date().toISOString() }), + "utf-8", + ); + } + + private async ensureLookupIndex(table: string, suffix: string, columnsSql: string): Promise { + if (this.hasFreshLookupIndexMarker(table, suffix)) return; + const indexName = this.buildLookupIndexName(table, suffix); + try { + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" ${columnsSql}`); + this.markLookupIndexReady(table, suffix); + } catch (e: any) { + if (isDuplicateIndexError(e)) { + this.markLookupIndexReady(table, suffix); + return; + } + log(`index "${indexName}" skipped: ${e.message}`); + } + } + /** List all tables in the workspace (with retry). */ - async listTables(): Promise { + async listTables(forceRefresh = false): Promise { + if (!forceRefresh && this._tablesCache) return [...this._tablesCache]; + + const { tables, cacheable } = await this._fetchTables(); + if (cacheable) this._tablesCache = [...tables]; + return tables; + } + + private async _fetchTables(): Promise<{ tables: string[]; cacheable: boolean }> { for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { try { const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, { @@ -211,22 +324,25 @@ export class DeeplakeApi { }); if (resp.ok) { const data = await resp.json() as { tables?: { table_name: string }[] }; - return (data.tables ?? []).map(t => t.table_name); + return { + tables: (data.tables ?? []).map(t => t.table_name), + cacheable: true, + }; } if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { await sleep(BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200); continue; } - return []; + return { tables: [], cacheable: false }; } catch { if (attempt < MAX_RETRIES) { await sleep(BASE_DELAY_MS * Math.pow(2, attempt)); continue; } - return []; + return { tables: [], cacheable: false }; } } - return []; + return { tables: [], cacheable: false }; } /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */ @@ -252,6 +368,7 @@ export class DeeplakeApi { `) USING deeplake`, ); log(`table "${tbl}" created`); + if (!tables.includes(tbl)) this._tablesCache = [...tables, tbl]; } // BM25 index disabled — CREATE INDEX causes intermittent oid errors on fresh tables. // See bm25-oid-bug.sh for reproduction. Re-enable once Deeplake fixes the oid invalidation. @@ -284,6 +401,8 @@ export class DeeplakeApi { `) USING deeplake`, ); log(`table "${name}" created`); + if (!tables.includes(name)) this._tablesCache = [...tables, name]; } + await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); } } diff --git a/src/hooks/bash-command-compiler.ts b/src/hooks/bash-command-compiler.ts new file mode 100644 index 0000000..68e1534 --- /dev/null +++ b/src/hooks/bash-command-compiler.ts @@ -0,0 +1,525 @@ +import type { DeeplakeApi } from "../deeplake-api.js"; +import { sqlLike } from "../utils/sql.js"; +import { type GrepParams, handleGrepDirect, parseBashGrep } from "./grep-direct.js"; +import { normalizeContent, refineGrepMatches } from "../shell/grep-core.js"; +import { capOutputForClaude } from "../utils/output-cap.js"; +import { + listVirtualPathRowsForDirs, + readVirtualPathContents, + findVirtualPaths, +} from "./virtual-table-query.js"; + +type VirtualRow = Record; + +export type CompiledSegment = + | { kind: "echo"; text: string } + | { kind: "cat"; paths: string[]; lineLimit: number; fromEnd: boolean; countLines: boolean; ignoreMissing: boolean } + | { kind: "ls"; dirs: string[]; longFormat: boolean } + | { kind: "find"; dir: string; pattern: string; countOnly: boolean } + | { kind: "find_grep"; dir: string; patterns: string[]; params: GrepParams; lineLimit: number } + | { kind: "grep"; params: GrepParams; lineLimit: number }; + +interface ParsedModifier { + clean: string; + ignoreMissing: boolean; +} + +function isQuoted(ch: string): boolean { + return ch === "'" || ch === "\""; +} + +export function splitTopLevel(input: string, operators: string[]): string[] | null { + const parts: string[] = []; + let current = ""; + let quote: string | null = null; + + for (let i = 0; i < input.length; i++) { + const ch = input[i]; + if (quote) { + if (ch === quote) quote = null; + current += ch; + continue; + } + if (isQuoted(ch)) { + quote = ch; + current += ch; + continue; + } + + const matched = operators.find((op) => input.startsWith(op, i)); + if (matched) { + const trimmed = current.trim(); + if (trimmed) parts.push(trimmed); + current = ""; + i += matched.length - 1; + continue; + } + + current += ch; + } + + if (quote) return null; + const trimmed = current.trim(); + if (trimmed) parts.push(trimmed); + return parts; +} + +export function tokenizeShellWords(input: string): string[] | null { + const tokens: string[] = []; + let current = ""; + let quote: string | null = null; + + for (let i = 0; i < input.length; i++) { + const ch = input[i]; + if (quote) { + if (ch === quote) { + quote = null; + } else if (ch === "\\" && quote === "\"" && i + 1 < input.length) { + current += input[++i]; + } else { + current += ch; + } + continue; + } + + if (isQuoted(ch)) { + quote = ch; + continue; + } + + if (/\s/.test(ch)) { + if (current) { + tokens.push(current); + current = ""; + } + continue; + } + + current += ch; + } + + if (quote) return null; + if (current) tokens.push(current); + return tokens; +} + +export function expandBraceToken(token: string): string[] { + const match = token.match(/\{([^{}]+)\}/); + if (!match) return [token]; + + const [expr] = match; + const prefix = token.slice(0, match.index); + const suffix = token.slice((match.index ?? 0) + expr.length); + + let variants: string[] = []; + const numericRange = match[1].match(/^(-?\d+)\.\.(-?\d+)$/); + if (numericRange) { + const start = Number(numericRange[1]); + const end = Number(numericRange[2]); + const step = start <= end ? 1 : -1; + for (let value = start; step > 0 ? value <= end : value >= end; value += step) { + variants.push(String(value)); + } + } else { + variants = match[1].split(","); + } + + return variants.flatMap((variant) => expandBraceToken(`${prefix}${variant}${suffix}`)); +} + +export function stripAllowedModifiers(segment: string): ParsedModifier { + const ignoreMissing = /\s2>\/dev\/null\s*$/.test(segment); + const clean = segment + .replace(/\s2>\/dev\/null\s*$/g, "") + .replace(/\s2>&1\s*/g, " ") + .trim(); + return { clean, ignoreMissing }; +} + +export function hasUnsupportedRedirection(segment: string): boolean { + let quote: string | null = null; + for (let i = 0; i < segment.length; i++) { + const ch = segment[i]; + if (quote) { + if (ch === quote) quote = null; + continue; + } + if (isQuoted(ch)) { + quote = ch; + continue; + } + if (ch === ">" || ch === "<") return true; + } + return false; +} + +function parseHeadTailStage(stage: string): { lineLimit: number; fromEnd: boolean } | null { + const tokens = tokenizeShellWords(stage); + if (!tokens || tokens.length === 0) return null; + const [cmd, ...rest] = tokens; + if (cmd !== "head" && cmd !== "tail") return null; + if (rest.length === 0) return { lineLimit: 10, fromEnd: cmd === "tail" }; + if (rest.length === 1) { + const count = Number(rest[0]); + if (!Number.isFinite(count)) { + return { lineLimit: 10, fromEnd: cmd === "tail" }; + } + return { lineLimit: Math.abs(count), fromEnd: cmd === "tail" }; + } + if (rest.length === 2 && /^-\d+$/.test(rest[0])) { + const count = Number(rest[0]); + if (!Number.isFinite(count)) return null; + return { lineLimit: Math.abs(count), fromEnd: cmd === "tail" }; + } + if (rest.length === 2 && rest[0] === "-n") { + const count = Number(rest[1]); + if (!Number.isFinite(count)) return null; + return { lineLimit: Math.abs(count), fromEnd: cmd === "tail" }; + } + if (rest.length === 3 && rest[0] === "-n") { + const count = Number(rest[1]); + if (!Number.isFinite(count)) return null; + return { lineLimit: Math.abs(count), fromEnd: cmd === "tail" }; + } + return null; +} + +function isValidPipelineHeadTailStage(stage: string): boolean { + const tokens = tokenizeShellWords(stage); + if (!tokens || (tokens[0] !== "head" && tokens[0] !== "tail")) return false; + if (tokens.length === 1) return true; + if (tokens.length === 2) return /^-\d+$/.test(tokens[1]); + if (tokens.length === 3) return tokens[1] === "-n" && /^-?\d+$/.test(tokens[2]); + return false; +} + +function parseFindNamePatterns(tokens: string[]): string[] | null { + const patterns: string[] = []; + for (let i = 2; i < tokens.length; i++) { + const token = tokens[i]; + if (token === "-type") { + i += 1; + continue; + } + if (token === "-o") continue; + if (token === "-name") { + const pattern = tokens[i + 1]; + if (!pattern) return null; + patterns.push(pattern); + i += 1; + continue; + } + return null; + } + return patterns.length > 0 ? patterns : null; +} + +export function parseCompiledSegment(segment: string): CompiledSegment | null { + const { clean, ignoreMissing } = stripAllowedModifiers(segment); + if (hasUnsupportedRedirection(clean)) return null; + const pipeline = splitTopLevel(clean, ["|"]); + if (!pipeline || pipeline.length === 0) return null; + + const tokens = tokenizeShellWords(pipeline[0]); + if (!tokens || tokens.length === 0) return null; + + if (tokens[0] === "echo" && pipeline.length === 1) { + const text = tokens.slice(1).join(" "); + return { kind: "echo", text }; + } + + if (tokens[0] === "cat") { + const paths = tokens.slice(1).flatMap(expandBraceToken); + if (paths.length === 0) return null; + let lineLimit = 0; + let fromEnd = false; + let countLines = false; + if (pipeline.length > 1) { + if (pipeline.length !== 2) return null; + const pipeStage = pipeline[1].trim(); + if (/^wc\s+-l\s*$/.test(pipeStage)) { + if (paths.length !== 1) return null; + countLines = true; + } else { + if (!isValidPipelineHeadTailStage(pipeStage)) return null; + const headTail = parseHeadTailStage(pipeStage); + if (!headTail) return null; + lineLimit = headTail.lineLimit; + fromEnd = headTail.fromEnd; + } + } + return { kind: "cat", paths, lineLimit, fromEnd, countLines, ignoreMissing }; + } + + if (tokens[0] === "head" || tokens[0] === "tail") { + if (pipeline.length !== 1) return null; + const parsed = parseHeadTailStage(clean); + if (!parsed) return null; + const headTokens = tokenizeShellWords(clean); + if (!headTokens) return null; + if ( + (headTokens[1] === "-n" && headTokens.length < 4) || + (/^-\d+$/.test(headTokens[1] ?? "") && headTokens.length < 3) || + (headTokens.length === 2 && /^-?\d+$/.test(headTokens[1] ?? "")) + ) return null; + const path = headTokens[headTokens.length - 1]; + if (path === "head" || path === "tail" || path === "-n") return null; + return { + kind: "cat", + paths: expandBraceToken(path), + lineLimit: parsed.lineLimit, + fromEnd: parsed.fromEnd, + countLines: false, + ignoreMissing, + }; + } + + if (tokens[0] === "wc" && tokens[1] === "-l" && pipeline.length === 1 && tokens[2]) { + return { + kind: "cat", + paths: expandBraceToken(tokens[2]), + lineLimit: 0, + fromEnd: false, + countLines: true, + ignoreMissing, + }; + } + + if (tokens[0] === "ls" && pipeline.length === 1) { + const dirs = tokens + .slice(1) + .filter(token => !token.startsWith("-")) + .flatMap(expandBraceToken); + const longFormat = tokens.some(token => token.startsWith("-") && token.includes("l")); + return { kind: "ls", dirs: dirs.length > 0 ? dirs : ["/"], longFormat }; + } + + if (tokens[0] === "find") { + if (pipeline.length > 3) return null; + const dir = tokens[1]; + if (!dir) return null; + const patterns = parseFindNamePatterns(tokens); + if (!patterns) return null; + const countOnly = pipeline.length === 2 && /^wc\s+-l\s*$/.test(pipeline[1].trim()); + if (countOnly) { + if (patterns.length !== 1) return null; + return { kind: "find", dir, pattern: patterns[0], countOnly }; + } + + if (pipeline.length >= 2) { + const xargsTokens = tokenizeShellWords(pipeline[1].trim()); + if (!xargsTokens || xargsTokens[0] !== "xargs") return null; + const xargsArgs = xargsTokens.slice(1); + while (xargsArgs[0] && xargsArgs[0].startsWith("-")) { + if (xargsArgs[0] === "-r") { + xargsArgs.shift(); + continue; + } + return null; + } + const grepCmd = xargsArgs.join(" "); + const grepParams = parseBashGrep(grepCmd); + if (!grepParams) return null; + let lineLimit = 0; + if (pipeline.length === 3) { + const headStage = pipeline[2].trim(); + if (!isValidPipelineHeadTailStage(headStage)) return null; + const headTail = parseHeadTailStage(headStage); + if (!headTail || headTail.fromEnd) return null; + lineLimit = headTail.lineLimit; + } + return { kind: "find_grep", dir, patterns, params: grepParams, lineLimit }; + } + + if (patterns.length !== 1) return null; + return { kind: "find", dir, pattern: patterns[0], countOnly }; + } + + const grepParams = parseBashGrep(clean); + if (grepParams) { + let lineLimit = 0; + if (pipeline.length > 1) { + if (pipeline.length !== 2) return null; + const headStage = pipeline[1].trim(); + if (!isValidPipelineHeadTailStage(headStage)) return null; + const headTail = parseHeadTailStage(headStage); + if (!headTail || headTail.fromEnd) return null; + lineLimit = headTail.lineLimit; + } + return { kind: "grep", params: grepParams, lineLimit }; + } + + return null; +} + +export function parseCompiledBashCommand(cmd: string): CompiledSegment[] | null { + if (cmd.includes("||")) return null; + const segments = splitTopLevel(cmd, ["&&", ";", "\n"]); + if (!segments || segments.length === 0) return null; + const parsed = segments.map(parseCompiledSegment); + if (parsed.some((segment) => segment === null)) return null; + return parsed as CompiledSegment[]; +} + +function applyLineWindow(content: string, lineLimit: number, fromEnd: boolean): string { + if (lineLimit <= 0) return content; + const lines = content.split("\n"); + return (fromEnd ? lines.slice(-lineLimit) : lines.slice(0, lineLimit)).join("\n"); +} + +function countLines(content: string): number { + return content === "" ? 0 : content.split("\n").length; +} + +function renderDirectoryListing(dir: string, rows: VirtualRow[], longFormat: boolean): string { + const entries = new Map(); + const prefix = dir === "/" ? "/" : `${dir}/`; + for (const row of rows) { + const path = row["path"] as string; + if (!path.startsWith(prefix) && dir !== "/") continue; + const rest = dir === "/" ? path.slice(1) : path.slice(prefix.length); + const slash = rest.indexOf("/"); + const name = slash === -1 ? rest : rest.slice(0, slash); + if (!name) continue; + const existing = entries.get(name); + if (slash !== -1) { + if (!existing) entries.set(name, { isDir: true, size: 0 }); + } else { + entries.set(name, { isDir: false, size: Number(row["size_bytes"] ?? 0) }); + } + } + if (entries.size === 0) return `ls: cannot access '${dir}': No such file or directory`; + + const lines: string[] = []; + for (const [name, info] of [...entries].sort((a, b) => a[0].localeCompare(b[0]))) { + if (longFormat) { + const type = info.isDir ? "drwxr-xr-x" : "-rw-r--r--"; + const size = String(info.isDir ? 0 : info.size).padStart(6); + lines.push(`${type} 1 user user ${size} ${name}${info.isDir ? "/" : ""}`); + } else { + lines.push(name + (info.isDir ? "/" : "")); + } + } + return lines.join("\n"); +} + +interface ExecuteCompiledBashDeps { + readVirtualPathContentsFn?: typeof readVirtualPathContents; + listVirtualPathRowsForDirsFn?: typeof listVirtualPathRowsForDirs; + findVirtualPathsFn?: typeof findVirtualPaths; + handleGrepDirectFn?: typeof handleGrepDirect; +} + +export async function executeCompiledBashCommand( + api: DeeplakeApi, + memoryTable: string, + sessionsTable: string, + cmd: string, + deps: ExecuteCompiledBashDeps = {}, +): Promise { + const { + readVirtualPathContentsFn = readVirtualPathContents, + listVirtualPathRowsForDirsFn = listVirtualPathRowsForDirs, + findVirtualPathsFn = findVirtualPaths, + handleGrepDirectFn = handleGrepDirect, + } = deps; + + const plan = parseCompiledBashCommand(cmd); + if (!plan) return null; + + const readPaths = [...new Set(plan.flatMap((segment) => segment.kind === "cat" ? segment.paths : []))]; + const listDirs = [...new Set(plan.flatMap((segment) => segment.kind === "ls" ? segment.dirs.map(dir => dir.replace(/\/+$/, "") || "/") : []))]; + + const contentMap = readPaths.length > 0 + ? await readVirtualPathContentsFn(api, memoryTable, sessionsTable, readPaths) + : new Map(); + const dirRowsMap = listDirs.length > 0 + ? await listVirtualPathRowsForDirsFn(api, memoryTable, sessionsTable, listDirs) + : new Map(); + + const outputs: string[] = []; + for (const segment of plan) { + if (segment.kind === "echo") { + outputs.push(segment.text); + continue; + } + + if (segment.kind === "cat") { + const contents: string[] = []; + for (const path of segment.paths) { + const content = contentMap.get(path) ?? null; + if (content === null) { + if (segment.ignoreMissing) continue; + return null; + } + contents.push(content); + } + const combined = contents.join(""); + if (segment.countLines) { + outputs.push(`${countLines(combined)} ${segment.paths[0]}`); + } else { + outputs.push(applyLineWindow(combined, segment.lineLimit, segment.fromEnd)); + } + continue; + } + + if (segment.kind === "ls") { + for (const dir of segment.dirs) { + outputs.push(renderDirectoryListing(dir.replace(/\/+$/, "") || "/", dirRowsMap.get(dir.replace(/\/+$/, "") || "/") ?? [], segment.longFormat)); + } + continue; + } + + if (segment.kind === "find") { + const filenamePattern = sqlLike(segment.pattern).replace(/\*/g, "%").replace(/\?/g, "_"); + const paths = await findVirtualPathsFn(api, memoryTable, sessionsTable, segment.dir.replace(/\/+$/, "") || "/", filenamePattern); + outputs.push(segment.countOnly ? String(paths.length) : (paths.join("\n") || "(no matches)")); + continue; + } + + if (segment.kind === "find_grep") { + const dir = segment.dir.replace(/\/+$/, "") || "/"; + const candidateBatches = await Promise.all( + segment.patterns.map((pattern) => + findVirtualPathsFn( + api, + memoryTable, + sessionsTable, + dir, + sqlLike(pattern).replace(/\*/g, "%").replace(/\?/g, "_"), + ), + ), + ); + const candidatePaths = [...new Set(candidateBatches.flat())]; + if (candidatePaths.length === 0) { + outputs.push("(no matches)"); + continue; + } + const candidateContents = await readVirtualPathContentsFn(api, memoryTable, sessionsTable, candidatePaths); + const matched = refineGrepMatches( + candidatePaths.flatMap((path) => { + const content = candidateContents.get(path); + if (content === null || content === undefined) return []; + return [{ path, content: normalizeContent(path, content) }]; + }), + segment.params, + ); + const limited = segment.lineLimit > 0 ? matched.slice(0, segment.lineLimit) : matched; + outputs.push(limited.join("\n") || "(no matches)"); + continue; + } + + if (segment.kind === "grep") { + const result = await handleGrepDirectFn(api, memoryTable, sessionsTable, segment.params); + if (result === null) return null; + if (segment.lineLimit > 0) { + outputs.push(result.split("\n").slice(0, segment.lineLimit).join("\n")); + } else { + outputs.push(result); + } + continue; + } + } + + return capOutputForClaude(outputs.join("\n"), { kind: "bash" }); +} diff --git a/src/hooks/codex/pre-tool-use.ts b/src/hooks/codex/pre-tool-use.ts index 1d0904d..3b1aacd 100644 --- a/src/hooks/codex/pre-tool-use.ts +++ b/src/hooks/codex/pre-tool-use.ts @@ -4,74 +4,49 @@ * Codex PreToolUse hook — intercepts Bash commands targeting ~/.deeplake/memory/. * * Strategy: "block + inject" - * Codex 0.118.0 doesn't parse JSON hook output, but supports: - * - stderr + exit code 2 → blocks the command, stderr becomes model feedback - * - plain text stdout → adds context (command still runs) - * - exit 0 + no output → pass through + * Codex does not parse JSON hook output here, so the CLI wrapper still maps: + * - action=pass -> exit 0, no output + * - action=guide -> stdout guidance, exit 0 + * - action=block -> stderr content, exit 2 * - * When we detect a memory-targeting command, we: - * 1. Fetch the real content from the cloud (SQL or virtual shell) - * 2. Block the command (exit 2) and return the content via stderr - * 3. The model receives the cloud content as if the command ran - * - * Codex input: { session_id, tool_name, tool_use_id, tool_input: { command }, cwd, ... } + * The source logic is exported so tests can exercise it directly without + * spawning the bundled script in a subprocess. */ -import { existsSync } from "node:fs"; import { execFileSync } from "node:child_process"; -import { join } from "node:path"; -import { homedir } from "node:os"; +import { existsSync } from "node:fs"; +import { join, dirname } from "node:path"; import { fileURLToPath } from "node:url"; -import { dirname } from "node:path"; import { readStdin } from "../../utils/stdin.js"; import { loadConfig } from "../../config.js"; import { DeeplakeApi } from "../../deeplake-api.js"; -import { sqlStr, sqlLike } from "../../utils/sql.js"; +import { sqlLike } from "../../utils/sql.js"; import { parseBashGrep, handleGrepDirect } from "../grep-direct.js"; - +import { executeCompiledBashCommand } from "../bash-command-compiler.js"; +import { + findVirtualPaths, + readVirtualPathContents, + listVirtualPathRows, + readVirtualPathContent, +} from "../virtual-table-query.js"; +import { + readCachedIndexContent, + writeCachedIndexContent, +} from "../query-cache.js"; import { log as _log } from "../../utils/debug.js"; -const log = (msg: string) => _log("codex-pre", msg); +import { isDirectRun } from "../../utils/direct-run.js"; +import { isSafe, touchesMemory, rewritePaths } from "../memory-path-utils.js"; -const MEMORY_PATH = join(homedir(), ".deeplake", "memory"); -const TILDE_PATH = "~/.deeplake/memory"; -const HOME_VAR_PATH = "$HOME/.deeplake/memory"; +export { isSafe, touchesMemory, rewritePaths }; + +const log = (msg: string) => _log("codex-pre", msg); const __bundleDir = dirname(fileURLToPath(import.meta.url)); const SHELL_BUNDLE = existsSync(join(__bundleDir, "shell", "deeplake-shell.js")) ? join(__bundleDir, "shell", "deeplake-shell.js") : join(__bundleDir, "..", "shell", "deeplake-shell.js"); -// Safe builtins that can run against the virtual FS -const SAFE_BUILTINS = new Set([ - "cat", "ls", "cp", "mv", "rm", "rmdir", "mkdir", "touch", "ln", "chmod", - "stat", "readlink", "du", "tree", "file", - "grep", "egrep", "fgrep", "rg", "sed", "awk", "cut", "tr", "sort", "uniq", - "wc", "head", "tail", "tac", "rev", "nl", "fold", "expand", "unexpand", - "paste", "join", "comm", "column", "diff", "strings", "split", - "find", "xargs", "which", - "jq", "yq", "xan", "base64", "od", - "tar", "gzip", "gunzip", "zcat", - "md5sum", "sha1sum", "sha256sum", - "echo", "printf", "tee", - "pwd", "cd", "basename", "dirname", "env", "printenv", "hostname", "whoami", - "date", "seq", "expr", "sleep", "timeout", "time", "true", "false", "test", - "alias", "unalias", "history", "help", "clear", - "for", "while", "do", "done", "if", "then", "else", "fi", "case", "esac", -]); - -function isSafe(cmd: string): boolean { - // Reject command/process substitution before checking tokens - if (/\$\(|`|<\(/.test(cmd)) return false; - const stripped = cmd.replace(/'[^']*'/g, "''").replace(/"[^"]*"/g, '""'); - const stages = stripped.split(/\||;|&&|\|\||\n/); - for (const stage of stages) { - const firstToken = stage.trim().split(/\s+/)[0] ?? ""; - if (firstToken && !SAFE_BUILTINS.has(firstToken)) return false; - } - return true; -} - -interface CodexPreToolUseInput { +export interface CodexPreToolUseInput { session_id: string; tool_name: string; tool_use_id: string; @@ -82,168 +57,239 @@ interface CodexPreToolUseInput { turn_id?: string; } -function touchesMemory(cmd: string): boolean { - return cmd.includes(MEMORY_PATH) || cmd.includes(TILDE_PATH) || cmd.includes(HOME_VAR_PATH); -} - -function rewritePaths(cmd: string): string { - return cmd - .replace(new RegExp(MEMORY_PATH.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") + "/?", "g"), "/") - .replace(/~\/.deeplake\/memory\/?/g, "/") - .replace(/\$HOME\/.deeplake\/memory\/?/g, "/") - .replace(/"\$HOME\/.deeplake\/memory\/?"/g, '"/"'); +export interface CodexPreToolDecision { + action: "pass" | "guide" | "block"; + output?: string; + rewrittenCommand?: string; } -/** Block the command and return content to the model via stderr + exit 2. */ -function blockWithContent(content: string): never { - process.stderr.write(content); - process.exit(2); +export function buildUnsupportedGuidance(): string { + return "This command is not supported for ~/.deeplake/memory/ operations. " + + "Only bash builtins are available: cat, ls, grep, echo, jq, head, tail, sed, awk, wc, sort, find, etc. " + + "Do NOT use python, python3, node, curl, or other interpreters. " + + "Rewrite your command using only bash tools and retry."; } -/** Run a command through the virtual shell and return the output. */ -function runVirtualShell(cmd: string): string { +export function runVirtualShell(cmd: string, shellBundle = SHELL_BUNDLE, logFn: (msg: string) => void = log): string { try { - return execFileSync("node", [SHELL_BUNDLE, "-c", cmd], { + return execFileSync("node", [shellBundle, "-c", cmd], { encoding: "utf-8", timeout: 10_000, env: { ...process.env }, - stdio: ["pipe", "pipe", "pipe"], // capture stderr instead of inheriting + stdio: ["pipe", "pipe", "pipe"], }).trim(); } catch (e: any) { - log(`virtual shell failed: ${e.message}`); + logFn(`virtual shell failed: ${e.message}`); return ""; } } -async function main(): Promise { - const input = await readStdin(); +function buildIndexContent(rows: Record[]): string { + const lines = ["# Memory Index", "", `${rows.length} sessions:`, ""]; + for (const row of rows) { + const path = row["path"] as string; + const project = row["project"] as string || ""; + const description = (row["description"] as string || "").slice(0, 120); + const date = (row["creation_date"] as string || "").slice(0, 10); + lines.push(`- [${path}](${path}) ${date} ${project ? `[${project}]` : ""} ${description}`); + } + return lines.join("\n"); +} + +interface CodexPreToolDeps { + config?: ReturnType; + createApi?: (table: string, config: NonNullable>) => DeeplakeApi; + executeCompiledBashCommandFn?: typeof executeCompiledBashCommand; + readVirtualPathContentsFn?: typeof readVirtualPathContents; + readVirtualPathContentFn?: typeof readVirtualPathContent; + listVirtualPathRowsFn?: typeof listVirtualPathRows; + findVirtualPathsFn?: typeof findVirtualPaths; + handleGrepDirectFn?: typeof handleGrepDirect; + readCachedIndexContentFn?: typeof readCachedIndexContent; + writeCachedIndexContentFn?: typeof writeCachedIndexContent; + runVirtualShellFn?: typeof runVirtualShell; + shellBundle?: string; + logFn?: (msg: string) => void; +} + +export async function processCodexPreToolUse( + input: CodexPreToolUseInput, + deps: CodexPreToolDeps = {}, +): Promise { + const { + config = loadConfig(), + createApi = (table, activeConfig) => new DeeplakeApi( + activeConfig.token, + activeConfig.apiUrl, + activeConfig.orgId, + activeConfig.workspaceId, + table, + ), + executeCompiledBashCommandFn = executeCompiledBashCommand, + readVirtualPathContentsFn = readVirtualPathContents, + readVirtualPathContentFn = readVirtualPathContent, + listVirtualPathRowsFn = listVirtualPathRows, + findVirtualPathsFn = findVirtualPaths, + handleGrepDirectFn = handleGrepDirect, + readCachedIndexContentFn = readCachedIndexContent, + writeCachedIndexContentFn = writeCachedIndexContent, + runVirtualShellFn = runVirtualShell, + shellBundle = SHELL_BUNDLE, + logFn = log, + } = deps; + const cmd = input.tool_input?.command ?? ""; - log(`hook fired: cmd=${cmd}`); + logFn(`hook fired: cmd=${cmd}`); - if (!touchesMemory(cmd)) return; + if (!touchesMemory(cmd)) return { action: "pass" }; const rewritten = rewritePaths(cmd); - if (!isSafe(rewritten)) { - // Instead of hard-blocking (exit code 2), output guidance so the agent self-corrects. - const guidance = "This command is not supported for ~/.deeplake/memory/ operations. " + - "Only bash builtins are available: cat, ls, grep, echo, jq, head, tail, sed, awk, wc, sort, find, etc. " + - "Do NOT use python, python3, node, curl, or other interpreters. " + - "Rewrite your command using only bash tools and retry."; - log(`unsupported command, returning guidance: ${rewritten}`); - process.stdout.write(guidance); - process.exit(0); + const guidance = buildUnsupportedGuidance(); + logFn(`unsupported command, returning guidance: ${rewritten}`); + return { + action: "guide", + output: guidance, + rewrittenCommand: rewritten, + }; } - // ── Fast path: handle grep and cat directly via SQL ── - const config = loadConfig(); if (config) { const table = process.env["HIVEMIND_TABLE"] ?? "memory"; - const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, table); + const sessionsTable = process.env["HIVEMIND_SESSIONS_TABLE"] ?? "sessions"; + const api = createApi(table, config); + + const readVirtualPathContentsWithCache = async ( + cachePaths: string[], + ): Promise> => { + const uniquePaths = [...new Set(cachePaths)]; + const result = new Map(uniquePaths.map((path) => [path, null])); + const cachedIndex = uniquePaths.includes("/index.md") + ? readCachedIndexContentFn(input.session_id) + : null; + + const remainingPaths = cachedIndex === null + ? uniquePaths + : uniquePaths.filter((path) => path !== "/index.md"); + + if (cachedIndex !== null) { + result.set("/index.md", cachedIndex); + } + + if (remainingPaths.length > 0) { + const fetched = await readVirtualPathContentsFn(api, table, sessionsTable, remainingPaths); + for (const [path, content] of fetched) result.set(path, content); + } + + const fetchedIndex = result.get("/index.md"); + if (typeof fetchedIndex === "string") { + writeCachedIndexContentFn(input.session_id, fetchedIndex); + } + + return result; + }; try { - // Detect: cat/head/tail/wc — read a single file - { - let virtualPath: string | null = null; - let lineLimit = 0; - let fromEnd = false; - - // cat [2>/dev/null] [| head -N] - const catCmd = rewritten.replace(/\s+2>\S+/g, "").trim(); - const catPipeHead = catCmd.match(/^cat\s+(\S+?)\s*(?:\|[^|]*)*\|\s*head\s+(?:-n?\s*)?(-?\d+)\s*$/); - if (catPipeHead) { virtualPath = catPipeHead[1]; lineLimit = Math.abs(parseInt(catPipeHead[2], 10)); } - if (!virtualPath) { - const catMatch = catCmd.match(/^cat\s+(\S+)\s*$/); - if (catMatch) virtualPath = catMatch[1]; - } - // head [-n] N - if (!virtualPath) { - const headMatch = rewritten.match(/^head\s+(?:-n\s*)?(-?\d+)\s+(\S+)\s*$/) ?? - rewritten.match(/^head\s+(\S+)\s*$/); - if (headMatch) { - if (headMatch[2]) { virtualPath = headMatch[2]; lineLimit = Math.abs(parseInt(headMatch[1], 10)); } - else { virtualPath = headMatch[1]; lineLimit = 10; } + const compiled = await executeCompiledBashCommandFn(api, table, sessionsTable, rewritten, { + readVirtualPathContentsFn: async (_api, _memoryTable, _sessionsTable, cachePaths) => readVirtualPathContentsWithCache(cachePaths), + }); + if (compiled !== null) { + return { action: "block", output: compiled, rewrittenCommand: rewritten }; + } + + let virtualPath: string | null = null; + let lineLimit = 0; + let fromEnd = false; + + const catCmd = rewritten.replace(/\s+2>\S+/g, "").trim(); + const catPipeHead = catCmd.match(/^cat\s+(\S+?)\s*(?:\|[^|]*)*\|\s*head\s+(?:-n?\s*)?(-?\d+)\s*$/); + if (catPipeHead) { + virtualPath = catPipeHead[1]; + lineLimit = Math.abs(parseInt(catPipeHead[2], 10)); + } + if (!virtualPath) { + const catMatch = catCmd.match(/^cat\s+(\S+)\s*$/); + if (catMatch) virtualPath = catMatch[1]; + } + if (!virtualPath) { + const headMatch = rewritten.match(/^head\s+(?:-n\s*)?(-?\d+)\s+(\S+)\s*$/) + ?? rewritten.match(/^head\s+(\S+)\s*$/); + if (headMatch) { + if (headMatch[2]) { + virtualPath = headMatch[2]; + lineLimit = Math.abs(parseInt(headMatch[1], 10)); + } else { + virtualPath = headMatch[1]; + lineLimit = 10; } } - // tail [-n] N - if (!virtualPath) { - const tailMatch = rewritten.match(/^tail\s+(?:-n\s*)?(-?\d+)\s+(\S+)\s*$/) ?? - rewritten.match(/^tail\s+(\S+)\s*$/); - if (tailMatch) { - fromEnd = true; - if (tailMatch[2]) { virtualPath = tailMatch[2]; lineLimit = Math.abs(parseInt(tailMatch[1], 10)); } - else { virtualPath = tailMatch[1]; lineLimit = 10; } + } + if (!virtualPath) { + const tailMatch = rewritten.match(/^tail\s+(?:-n\s*)?(-?\d+)\s+(\S+)\s*$/) + ?? rewritten.match(/^tail\s+(\S+)\s*$/); + if (tailMatch) { + fromEnd = true; + if (tailMatch[2]) { + virtualPath = tailMatch[2]; + lineLimit = Math.abs(parseInt(tailMatch[1], 10)); + } else { + virtualPath = tailMatch[1]; + lineLimit = 10; } } - // wc -l - if (!virtualPath) { - const wcMatch = rewritten.match(/^wc\s+-l\s+(\S+)\s*$/); - if (wcMatch) { virtualPath = wcMatch[1]; lineLimit = -1; } + } + if (!virtualPath) { + const wcMatch = rewritten.match(/^wc\s+-l\s+(\S+)\s*$/); + if (wcMatch) { + virtualPath = wcMatch[1]; + lineLimit = -1; } + } - if (virtualPath && !virtualPath.endsWith("/")) { - const sessionsTable = process.env["HIVEMIND_SESSIONS_TABLE"] ?? "sessions"; - const isSession = virtualPath.startsWith("/sessions/"); - log(`direct read: ${virtualPath}`); - - let content: string | null = null; - if (isSession) { - const rows = await api.query( - `SELECT message::text AS content FROM "${sessionsTable}" WHERE path = '${sqlStr(virtualPath)}' LIMIT 1` - ); - if (rows.length > 0 && rows[0]["content"]) content = rows[0]["content"] as string; - } else { - const rows = await api.query( - `SELECT summary FROM "${table}" WHERE path = '${sqlStr(virtualPath)}' LIMIT 1` - ); - if (rows.length > 0 && rows[0]["summary"]) { - content = rows[0]["summary"] as string; - } else if (virtualPath === "/index.md") { - // Virtual index — generate from metadata - const idxRows = await api.query( - `SELECT path, project, description, creation_date FROM "${table}" WHERE path LIKE '/summaries/%' ORDER BY creation_date DESC` - ); - const lines = ["# Memory Index", "", `${idxRows.length} sessions:`, ""]; - for (const r of idxRows) { - const p = r["path"] as string; - const proj = r["project"] as string || ""; - const desc = (r["description"] as string || "").slice(0, 120); - const date = (r["creation_date"] as string || "").slice(0, 10); - lines.push(`- [${p}](${p}) ${date} ${proj ? `[${proj}]` : ""} ${desc}`); - } - content = lines.join("\n"); - } - } + if (virtualPath && !virtualPath.endsWith("/")) { + logFn(`direct read: ${virtualPath}`); + let content = virtualPath === "/index.md" + ? readCachedIndexContentFn(input.session_id) + : null; + if (content === null) { + content = await readVirtualPathContentFn(api, table, sessionsTable, virtualPath); + } + if (content === null && virtualPath === "/index.md") { + const idxRows = await api.query( + `SELECT path, project, description, creation_date FROM "${table}" WHERE path LIKE '/summaries/%' ORDER BY creation_date DESC` + ); + content = buildIndexContent(idxRows); + } - if (content !== null) { - if (lineLimit === -1) { - blockWithContent(`${content.split("\n").length} ${virtualPath}`); - } - if (lineLimit > 0) { - const lines = content.split("\n"); - content = fromEnd ? lines.slice(-lineLimit).join("\n") : lines.slice(0, lineLimit).join("\n"); - } - blockWithContent(content); + if (content !== null) { + if (virtualPath === "/index.md") { + writeCachedIndexContentFn(input.session_id, content); + } + if (lineLimit === -1) { + return { action: "block", output: `${content.split("\n").length} ${virtualPath}`, rewrittenCommand: rewritten }; + } + if (lineLimit > 0) { + const lines = content.split("\n"); + content = fromEnd + ? lines.slice(-lineLimit).join("\n") + : lines.slice(0, lineLimit).join("\n"); } + return { action: "block", output: content, rewrittenCommand: rewritten }; } } - // Detect: ls [-alh...] const lsMatch = rewritten.match(/^ls\s+(?:-[a-zA-Z]+\s+)*(\S+)?\s*$/); if (lsMatch) { const dir = (lsMatch[1] ?? "/").replace(/\/+$/, "") || "/"; const isLong = /\s-[a-zA-Z]*l/.test(rewritten); - log(`direct ls: ${dir}`); - const rows = await api.query( - `SELECT path, size_bytes FROM "${table}" WHERE path LIKE '${sqlLike(dir === "/" ? "" : dir)}/%' ORDER BY path` - ); - // Build directory listing from paths + logFn(`direct ls: ${dir}`); + const rows = await listVirtualPathRowsFn(api, table, sessionsTable, dir); const entries = new Map(); - const prefix = dir === "/" ? "/" : dir + "/"; + const prefix = dir === "/" ? "/" : `${dir}/`; for (const row of rows) { - const p = row["path"] as string; - if (!p.startsWith(prefix) && dir !== "/") continue; - const rest = dir === "/" ? p.slice(1) : p.slice(prefix.length); + const path = row["path"] as string; + if (!path.startsWith(prefix) && dir !== "/") continue; + const rest = dir === "/" ? path.slice(1) : path.slice(prefix.length); const slash = rest.indexOf("/"); const name = slash === -1 ? rest : rest.slice(0, slash); if (!name) continue; @@ -254,6 +300,7 @@ async function main(): Promise { entries.set(name, { isDir: false, size: (row["size_bytes"] as number) ?? 0 }); } } + if (entries.size > 0) { const lines: string[] = []; for (const [name, info] of [...entries].sort((a, b) => a[0].localeCompare(b[0]))) { @@ -265,57 +312,68 @@ async function main(): Promise { lines.push(name + (info.isDir ? "/" : "")); } } - blockWithContent(lines.join("\n")); - } else { - blockWithContent(`ls: cannot access '${dir}': No such file or directory`); + return { action: "block", output: lines.join("\n"), rewrittenCommand: rewritten }; } + + return { + action: "block", + output: `ls: cannot access '${dir}': No such file or directory`, + rewrittenCommand: rewritten, + }; } - // Detect: find -name '' - { - const findMatch = rewritten.match(/^find\s+(\S+)\s+(?:-type\s+\S+\s+)?-name\s+'([^']+)'/); - if (findMatch) { - const dir = findMatch[1].replace(/\/+$/, "") || "/"; - const namePattern = sqlLike(findMatch[2]).replace(/\*/g, "%").replace(/\?/g, "_"); - const sessionsTable = process.env["HIVEMIND_SESSIONS_TABLE"] ?? "sessions"; - const isSessionDir = dir === "/sessions" || dir.startsWith("/sessions/"); - const findTable = isSessionDir ? sessionsTable : table; - log(`direct find: ${dir} -name '${findMatch[2]}'`); - const rows = await api.query( - `SELECT path FROM "${findTable}" WHERE path LIKE '${sqlLike(dir === "/" ? "" : dir)}/%' AND filename LIKE '${namePattern}' ORDER BY path` - ); - let result = rows.map(r => r["path"] as string).join("\n") || ""; - if (/\|\s*wc\s+-l\s*$/.test(rewritten)) { - result = String(rows.length); - } - blockWithContent(result || "(no matches)"); - } + const findMatch = rewritten.match(/^find\s+(\S+)\s+(?:-type\s+\S+\s+)?-name\s+'([^']+)'/); + if (findMatch) { + const dir = findMatch[1].replace(/\/+$/, "") || "/"; + const namePattern = sqlLike(findMatch[2]).replace(/\*/g, "%").replace(/\?/g, "_"); + logFn(`direct find: ${dir} -name '${findMatch[2]}'`); + const paths = await findVirtualPathsFn(api, table, sessionsTable, dir, namePattern); + let result = paths.join("\n") || ""; + if (/\|\s*wc\s+-l\s*$/.test(rewritten)) result = String(paths.length); + return { + action: "block", + output: result || "(no matches)", + rewrittenCommand: rewritten, + }; } - // Detect: grep/egrep/fgrep with all flags const grepParams = parseBashGrep(rewritten); if (grepParams) { - const sessionsTable = process.env["HIVEMIND_SESSIONS_TABLE"] ?? "sessions"; - log(`direct grep: pattern=${grepParams.pattern} path=${grepParams.targetPath}`); - const result = await handleGrepDirect(api, table, sessionsTable, grepParams); + logFn(`direct grep: pattern=${grepParams.pattern} path=${grepParams.targetPath}`); + const result = await handleGrepDirectFn(api, table, sessionsTable, grepParams); if (result !== null) { - blockWithContent(result); + return { action: "block", output: result, rewrittenCommand: rewritten }; } } } catch (e: any) { - log(`direct query failed, falling back to shell: ${e.message}`); + logFn(`direct query failed, falling back to shell: ${e.message}`); } } - // ── Fallback: run through virtual shell, return output ── - log(`intercepted → running via virtual shell: ${rewritten}`); - const result = runVirtualShell(rewritten); + logFn(`intercepted → running via virtual shell: ${rewritten}`); + const result = runVirtualShellFn(rewritten, shellBundle, logFn); + return { + action: "block", + output: result || "[Deeplake Memory] Command returned empty or the file does not exist in cloud storage.", + rewrittenCommand: rewritten, + }; +} + +/* c8 ignore start */ +async function main(): Promise { + const input = await readStdin(); + const decision = await processCodexPreToolUse(input); - if (result) { - blockWithContent(result); - } else { - blockWithContent("[Deeplake Memory] Command returned empty or the file does not exist in cloud storage."); + if (decision.action === "pass") return; + if (decision.action === "guide") { + if (decision.output) process.stdout.write(decision.output); + process.exit(0); } + if (decision.output) process.stderr.write(decision.output); + process.exit(2); } -main().catch((e) => { log(`fatal: ${e.message}`); process.exit(0); }); +if (isDirectRun(import.meta.url)) { + main().catch((e) => { log(`fatal: ${e.message}`); process.exit(0); }); +} +/* c8 ignore stop */ diff --git a/src/hooks/grep-direct.ts b/src/hooks/grep-direct.ts index 93a4561..95e15d9 100644 --- a/src/hooks/grep-direct.ts +++ b/src/hooks/grep-direct.ts @@ -7,6 +7,7 @@ import type { DeeplakeApi } from "../deeplake-api.js"; import { grepBothTables, type GrepMatchParams } from "../shell/grep-core.js"; +import { capOutputForClaude } from "../utils/output-cap.js"; export interface GrepParams { pattern: string; @@ -20,53 +21,142 @@ export interface GrepParams { fixedString: boolean; } +function splitFirstPipelineStage(cmd: string): string | null { + const input = cmd.trim(); + let quote: "'" | "\"" | null = null; + let escaped = false; + + for (let i = 0; i < input.length; i++) { + const ch = input[i]; + if (escaped) { + escaped = false; + continue; + } + if (quote) { + if (ch === quote) { + quote = null; + continue; + } + if (ch === "\\" && quote === "\"") { + escaped = true; + } + continue; + } + if (ch === "\\") { + escaped = true; + continue; + } + if (ch === "'" || ch === "\"") { + quote = ch; + continue; + } + if (ch === "|") return input.slice(0, i).trim(); + } + + return quote ? null : input; +} + +function tokenizeGrepStage(input: string): string[] | null { + const tokens: string[] = []; + let current = ""; + let quote: "'" | "\"" | null = null; + + for (let i = 0; i < input.length; i++) { + const ch = input[i]; + if (quote) { + if (ch === quote) { + quote = null; + } else if (ch === "\\" && quote === "\"" && i + 1 < input.length) { + current += input[++i]; + } else { + current += ch; + } + continue; + } + + if (ch === "'" || ch === "\"") { + quote = ch; + continue; + } + if (ch === "\\" && i + 1 < input.length) { + current += input[++i]; + continue; + } + if (/\s/.test(ch)) { + if (current) { + tokens.push(current); + current = ""; + } + continue; + } + current += ch; + } + + if (quote) return null; + if (current) tokens.push(current); + return tokens; +} + /** Parse a bash grep/egrep/fgrep command string into GrepParams. */ export function parseBashGrep(cmd: string): GrepParams | null { - const first = cmd.trim().split(/\s*\|\s*/)[0]; + const first = splitFirstPipelineStage(cmd); + if (!first) return null; if (!/^(grep|egrep|fgrep)\b/.test(first)) return null; const isFixed = first.startsWith("fgrep"); - // Tokenize respecting single/double quotes - const tokens: string[] = []; - let pos = 0; - while (pos < first.length) { - if (first[pos] === " " || first[pos] === "\t") { pos++; continue; } - if (first[pos] === "'" || first[pos] === '"') { - const q = first[pos]; - let end = pos + 1; - while (end < first.length && first[end] !== q) end++; - tokens.push(first.slice(pos + 1, end)); - pos = end + 1; - } else { - let end = pos; - while (end < first.length && first[end] !== " " && first[end] !== "\t") end++; - tokens.push(first.slice(pos, end)); - pos = end; - } - } + const tokens = tokenizeGrepStage(first); + if (!tokens || tokens.length === 0) return null; let ignoreCase = false, wordMatch = false, filesOnly = false, countOnly = false, lineNumber = false, invertMatch = false, fixedString = isFixed; + const explicitPatterns: string[] = []; let ti = 1; - while (ti < tokens.length && tokens[ti].startsWith("-") && tokens[ti] !== "--") { - const flag = tokens[ti]; - if (flag.startsWith("--")) { - const handlers: Record void> = { - "--ignore-case": () => { ignoreCase = true; }, - "--word-regexp": () => { wordMatch = true; }, - "--files-with-matches": () => { filesOnly = true; }, - "--count": () => { countOnly = true; }, - "--line-number": () => { lineNumber = true; }, - "--invert-match": () => { invertMatch = true; }, - "--fixed-strings": () => { fixedString = true; }, + while (ti < tokens.length) { + const token = tokens[ti]; + if (token === "--") { + ti++; + break; + } + if (!token.startsWith("-") || token === "-") break; + + if (token.startsWith("--")) { + const [flag, inlineValue] = token.split("=", 2); + const handlers: Record boolean> = { + "--ignore-case": () => { ignoreCase = true; return false; }, + "--word-regexp": () => { wordMatch = true; return false; }, + "--files-with-matches": () => { filesOnly = true; return false; }, + "--count": () => { countOnly = true; return false; }, + "--line-number": () => { lineNumber = true; return false; }, + "--invert-match": () => { invertMatch = true; return false; }, + "--fixed-strings": () => { fixedString = true; return false; }, + "--after-context": () => inlineValue === undefined, + "--before-context": () => inlineValue === undefined, + "--context": () => inlineValue === undefined, + "--max-count": () => inlineValue === undefined, + "--regexp": () => { + if (inlineValue !== undefined) { + explicitPatterns.push(inlineValue); + return false; + } + return true; + }, }; - handlers[flag]?.(); - ti++; continue; + const consumeNext = handlers[flag]?.() ?? false; + if (consumeNext) { + ti++; + if (ti >= tokens.length) return null; + if (flag === "--regexp") explicitPatterns.push(tokens[ti]); + } + ti++; + continue; } - for (const c of flag.slice(1)) { - switch (c) { + + const shortFlags = token.slice(1); + for (let i = 0; i < shortFlags.length; i++) { + const flag = shortFlags[i]; + switch (flag) { case "i": ignoreCase = true; break; case "w": wordMatch = true; break; case "l": filesOnly = true; break; @@ -74,19 +164,47 @@ export function parseBashGrep(cmd: string): GrepParams | null { case "n": lineNumber = true; break; case "v": invertMatch = true; break; case "F": fixedString = true; break; - // r/R/E: no-op (recursive implied, extended default) + case "r": + case "R": + case "E": + break; + case "A": + case "B": + case "C": + case "m": + if (i === shortFlags.length - 1) { + ti++; + if (ti >= tokens.length) return null; + } + i = shortFlags.length; + break; + case "e": { + const inlineValue = shortFlags.slice(i + 1); + if (inlineValue) { + explicitPatterns.push(inlineValue); + } else { + ti++; + if (ti >= tokens.length) return null; + explicitPatterns.push(tokens[ti]); + } + i = shortFlags.length; + break; + } + default: + break; } } ti++; } - if (ti < tokens.length && tokens[ti] === "--") ti++; - if (ti >= tokens.length) return null; - let target = tokens[ti + 1] ?? "/"; + const pattern = explicitPatterns.length > 0 ? explicitPatterns[0] : tokens[ti]; + if (!pattern) return null; + + let target = explicitPatterns.length > 0 ? (tokens[ti] ?? "/") : (tokens[ti + 1] ?? "/"); if (target === "." || target === "./") target = "/"; return { - pattern: tokens[ti], targetPath: target, + pattern, targetPath: target, ignoreCase, wordMatch, filesOnly, countOnly, lineNumber, invertMatch, fixedString, }; } @@ -112,5 +230,6 @@ export async function handleGrepDirect( }; const output = await grepBothTables(api, table, sessionsTable, matchParams, params.targetPath); - return output.join("\n") || "(no matches)"; + const joined = output.join("\n") || "(no matches)"; + return capOutputForClaude(joined, { kind: "grep" }); } diff --git a/src/hooks/memory-path-utils.ts b/src/hooks/memory-path-utils.ts new file mode 100644 index 0000000..b741cb3 --- /dev/null +++ b/src/hooks/memory-path-utils.ts @@ -0,0 +1,46 @@ +import { homedir } from "node:os"; +import { join } from "node:path"; + +export const MEMORY_PATH = join(homedir(), ".deeplake", "memory"); +export const TILDE_PATH = "~/.deeplake/memory"; +export const HOME_VAR_PATH = "$HOME/.deeplake/memory"; + +export const SAFE_BUILTINS = new Set([ + "cat", "ls", "cp", "mv", "rm", "rmdir", "mkdir", "touch", "ln", "chmod", + "stat", "readlink", "du", "tree", "file", + "grep", "egrep", "fgrep", "rg", "sed", "awk", "cut", "tr", "sort", "uniq", + "wc", "head", "tail", "tac", "rev", "nl", "fold", "expand", "unexpand", + "paste", "join", "comm", "column", "diff", "strings", "split", + "find", "xargs", "which", + "jq", "yq", "xan", "base64", "od", + "tar", "gzip", "gunzip", "zcat", + "md5sum", "sha1sum", "sha256sum", + "echo", "printf", "tee", + "pwd", "cd", "basename", "dirname", "env", "printenv", "hostname", "whoami", + "date", "seq", "expr", "sleep", "timeout", "time", "true", "false", "test", + "alias", "unalias", "history", "help", "clear", + "for", "while", "do", "done", "if", "then", "else", "fi", "case", "esac", +]); + +export function isSafe(cmd: string): boolean { + if (/\$\(|`|<\(/.test(cmd)) return false; + const stripped = cmd.replace(/'[^']*'/g, "''").replace(/"[^"]*"/g, '""'); + const stages = stripped.split(/\||;|&&|\|\||\n/); + for (const stage of stages) { + const firstToken = stage.trim().split(/\s+/)[0] ?? ""; + if (firstToken && !SAFE_BUILTINS.has(firstToken)) return false; + } + return true; +} + +export function touchesMemory(p: string): boolean { + return p.includes(MEMORY_PATH) || p.includes(TILDE_PATH) || p.includes(HOME_VAR_PATH); +} + +export function rewritePaths(cmd: string): string { + return cmd + .replace(new RegExp(MEMORY_PATH.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") + "/?", "g"), "/") + .replace(/~\/.deeplake\/memory\/?/g, "/") + .replace(/\$HOME\/.deeplake\/memory\/?/g, "/") + .replace(/"\$HOME\/.deeplake\/memory\/?"/g, '"/"'); +} diff --git a/src/hooks/pre-tool-use.ts b/src/hooks/pre-tool-use.ts index cad65fc..34c45db 100644 --- a/src/hooks/pre-tool-use.ts +++ b/src/hooks/pre-tool-use.ts @@ -1,90 +1,108 @@ #!/usr/bin/env node -import { existsSync } from "node:fs"; -import { join } from "node:path"; +import { existsSync, mkdirSync, writeFileSync } from "node:fs"; import { homedir } from "node:os"; +import { join, dirname, sep } from "node:path"; import { fileURLToPath } from "node:url"; -import { dirname } from "node:path"; import { readStdin } from "../utils/stdin.js"; import { loadConfig } from "../config.js"; import { DeeplakeApi } from "../deeplake-api.js"; -import { sqlStr, sqlLike } from "../utils/sql.js"; +import { sqlLike } from "../utils/sql.js"; +import { log as _log } from "../utils/debug.js"; +import { isDirectRun } from "../utils/direct-run.js"; import { type GrepParams, parseBashGrep, handleGrepDirect } from "./grep-direct.js"; +import { executeCompiledBashCommand } from "./bash-command-compiler.js"; +import { + findVirtualPaths, + readVirtualPathContents, + listVirtualPathRows, + readVirtualPathContent, +} from "./virtual-table-query.js"; +import { + readCachedIndexContent, + writeCachedIndexContent, +} from "./query-cache.js"; +import { isSafe, touchesMemory, rewritePaths } from "./memory-path-utils.js"; +import { capOutputForClaude } from "../utils/output-cap.js"; + +export { isSafe, touchesMemory, rewritePaths }; -import { log as _log } from "../utils/debug.js"; const log = (msg: string) => _log("pre", msg); -const MEMORY_PATH = join(homedir(), ".deeplake", "memory"); -const TILDE_PATH = "~/.deeplake/memory"; -const HOME_VAR_PATH = "$HOME/.deeplake/memory"; - const __bundleDir = dirname(fileURLToPath(import.meta.url)); const SHELL_BUNDLE = existsSync(join(__bundleDir, "shell", "deeplake-shell.js")) ? join(__bundleDir, "shell", "deeplake-shell.js") : join(__bundleDir, "..", "shell", "deeplake-shell.js"); -// All commands supported by just-bash + shell control flow -const SAFE_BUILTINS = new Set([ - // filesystem - "cat", "ls", "cp", "mv", "rm", "rmdir", "mkdir", "touch", "ln", "chmod", - "stat", "readlink", "du", "tree", "file", - // text processing - "grep", "egrep", "fgrep", "rg", "sed", "awk", "cut", "tr", "sort", "uniq", - "wc", "head", "tail", "tac", "rev", "nl", "fold", "expand", "unexpand", - "paste", "join", "comm", "column", "diff", "strings", "split", - // search - "find", "xargs", "which", - // data formats - "jq", "yq", "xan", "base64", "od", - // archives - "tar", "gzip", "gunzip", "zcat", - // hashing - "md5sum", "sha1sum", "sha256sum", - // output/io - "echo", "printf", "tee", "cat", - // path/env - "pwd", "cd", "basename", "dirname", "env", "printenv", "hostname", "whoami", - // misc - "date", "seq", "expr", "sleep", "timeout", "time", "true", "false", "test", - "alias", "unalias", "history", "help", "clear", - // shell control flow - "for", "while", "do", "done", "if", "then", "else", "fi", "case", "esac", -]); - -function isSafe(cmd: string): boolean { - // Reject command/process substitution before checking tokens - if (/\$\(|`|<\(/.test(cmd)) return false; - // Strip quoted strings before splitting on pipes — prevents splitting - // inside jq expressions like 'select(.type) | .content' - const stripped = cmd.replace(/'[^']*'/g, "''").replace(/"[^"]*"/g, '""'); - const stages = stripped.split(/\||;|&&|\|\||\n/); - for (const stage of stages) { - const firstToken = stage.trim().split(/\s+/)[0] ?? ""; - if (firstToken && !SAFE_BUILTINS.has(firstToken)) return false; - } - return true; -} - -interface PreToolUseInput { +export interface PreToolUseInput { session_id: string; tool_name: string; tool_input: Record; tool_use_id: string; } -function touchesMemory(p: string): boolean { - return p.includes(MEMORY_PATH) || p.includes(TILDE_PATH) || p.includes(HOME_VAR_PATH); +export interface ClaudePreToolDecision { + command: string; + description: string; + /** + * When set, main() emits the hook response as `updatedInput: {file_path}` + * instead of `updatedInput: {command, description}`. This is required for + * Read-tool intercepts: Claude Code's Read implementation reads + * `updatedInput.file_path` and errors with "path must be of type string, + * got undefined" if the hook hands it the Bash-shaped input. + */ + file_path?: string; } -function rewritePaths(cmd: string): string { - return cmd - .replace(new RegExp(MEMORY_PATH.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") + "/?", "g"), "/") - .replace(/~\/.deeplake\/memory\/?/g, "/") - .replace(/\$HOME\/.deeplake\/memory\/?/g, "/") - .replace(/"\$HOME\/.deeplake\/memory\/?"/g, '"/"'); +const READ_CACHE_ROOT = join(homedir(), ".deeplake", "query-cache"); + +/** + * Materialize fetched content for a Read intercept into a real file on disk + * so Claude Code's Read tool can read it via `updatedInput.file_path`. The + * file lives under `~/.deeplake/query-cache//read/` and mirrors + * the virtual path structure (e.g. `/sessions/conv_0_session_1.json` → + * `.../read/sessions/conv_0_session_1.json`). Per-session dirs are cleaned + * alongside the index cache at session end. + */ +export function writeReadCacheFile( + sessionId: string, + virtualPath: string, + content: string, + deps: { cacheRoot?: string } = {}, +): string { + const { cacheRoot = READ_CACHE_ROOT } = deps; + const safeSessionId = sessionId.replace(/[^a-zA-Z0-9._-]/g, "_") || "unknown"; + const rel = virtualPath.replace(/^\/+/, "") || "content"; + const expectedRoot = join(cacheRoot, safeSessionId, "read"); + const absPath = join(expectedRoot, rel); + // Containment guard: if the DB-derived virtualPath contains `..` segments, + // `join` resolves them and absPath can escape the per-session cache dir. + // Refuse the write rather than silently writing outside the sandbox. + if (absPath !== expectedRoot && !absPath.startsWith(expectedRoot + sep)) { + throw new Error(`writeReadCacheFile: path escapes cache root: ${absPath}`); + } + mkdirSync(dirname(absPath), { recursive: true }); + writeFileSync(absPath, content, "utf-8"); + return absPath; } -function getShellCommand(toolName: string, toolInput: Record): string | null { +export function buildReadDecision(file_path: string, description: string): ClaudePreToolDecision { + return { command: "", description, file_path }; +} + +function getReadTargetPath(toolInput: Record): string | null { + const rawPath = (toolInput.file_path ?? toolInput.path) as string | undefined; + return rawPath ? rawPath : null; +} + +function isLikelyDirectoryPath(virtualPath: string): boolean { + const normalized = virtualPath.replace(/\/+$/, "") || "/"; + if (normalized === "/") return true; + const base = normalized.split("/").pop() ?? ""; + return !base.includes("."); +} + +export function getShellCommand(toolName: string, toolInput: Record): string | null { switch (toolName) { case "Grep": { const p = toolInput.path as string | undefined; @@ -98,53 +116,37 @@ function getShellCommand(toolName: string, toolInput: Record): break; } case "Read": { - const fp = toolInput.file_path as string | undefined; + const fp = getReadTargetPath(toolInput); if (fp && touchesMemory(fp)) { - const virtualPath = rewritePaths(fp) || "/"; - return `cat ${virtualPath}`; + const rewritten = rewritePaths(fp) || "/"; + return `${isLikelyDirectoryPath(rewritten) ? "ls" : "cat"} ${rewritten}`; } break; } case "Bash": { const cmd = toolInput.command as string | undefined; if (!cmd || !touchesMemory(cmd)) break; - { - const rewritten = rewritePaths(cmd); - if (!isSafe(rewritten)) { - log(`unsafe command blocked: ${rewritten}`); - return null; - } - return rewritten; + const rewritten = rewritePaths(cmd); + if (!isSafe(rewritten)) { + log(`unsafe command blocked: ${rewritten}`); + return null; } - break; + return rewritten; } case "Glob": { const p = toolInput.path as string | undefined; - if (p && touchesMemory(p)) { - return `ls /`; - } + if (p && touchesMemory(p)) return "ls /"; break; } } return null; } -// ── Output helper ──────────────────────────────────────────────────────────── - -function emitResult(command: string, description: string): void { - console.log(JSON.stringify({ - hookSpecificOutput: { - hookEventName: "PreToolUse", - permissionDecision: "allow", - updatedInput: { command, description }, - }, - })); +export function buildAllowDecision(command: string, description: string): ClaudePreToolDecision { + return { command, description }; } -// ── Grep parameter extraction (Claude Code specific) ───────────────────────── - -/** Extract grep parameters from Grep tool input or Bash grep command. */ -function extractGrepParams( +export function extractGrepParams( toolName: string, toolInput: Record, shellCmd: string, @@ -167,269 +169,283 @@ function extractGrepParams( return null; } +function buildFallbackDecision(shellCmd: string, shellBundle = SHELL_BUNDLE): ClaudePreToolDecision { + return buildAllowDecision( + `node "${shellBundle}" -c "${shellCmd.replace(/"/g, '\\"')}"`, + `[DeepLake shell] ${shellCmd}`, + ); +} -async function main(): Promise { - const input = await readStdin(); - log(`hook fired: tool=${input.tool_name} input=${JSON.stringify(input.tool_input)}`); +interface ClaudePreToolDeps { + config?: ReturnType; + createApi?: (table: string, config: NonNullable>) => DeeplakeApi; + executeCompiledBashCommandFn?: typeof executeCompiledBashCommand; + handleGrepDirectFn?: typeof handleGrepDirect; + readVirtualPathContentsFn?: typeof readVirtualPathContents; + readVirtualPathContentFn?: typeof readVirtualPathContent; + listVirtualPathRowsFn?: typeof listVirtualPathRows; + findVirtualPathsFn?: typeof findVirtualPaths; + readCachedIndexContentFn?: typeof readCachedIndexContent; + writeCachedIndexContentFn?: typeof writeCachedIndexContent; + writeReadCacheFileFn?: typeof writeReadCacheFile; + shellBundle?: string; + logFn?: (msg: string) => void; +} + +export async function processPreToolUse(input: PreToolUseInput, deps: ClaudePreToolDeps = {}): Promise { + const { + config = loadConfig(), + createApi = (table, activeConfig) => new DeeplakeApi( + activeConfig.token, + activeConfig.apiUrl, + activeConfig.orgId, + activeConfig.workspaceId, + table, + ), + executeCompiledBashCommandFn = executeCompiledBashCommand, + handleGrepDirectFn = handleGrepDirect, + readVirtualPathContentsFn = readVirtualPathContents, + readVirtualPathContentFn = readVirtualPathContent, + listVirtualPathRowsFn = listVirtualPathRows, + findVirtualPathsFn = findVirtualPaths, + readCachedIndexContentFn = readCachedIndexContent, + writeCachedIndexContentFn = writeCachedIndexContent, + writeReadCacheFileFn = writeReadCacheFile, + shellBundle = SHELL_BUNDLE, + logFn = log, + } = deps; const cmd = (input.tool_input.command as string) ?? ""; const shellCmd = getShellCommand(input.tool_name, input.tool_input); + const toolPath = (getReadTargetPath(input.tool_input) ?? input.tool_input.path ?? "") as string; - // Also check non-Bash tools (Read/Write/Edit/Glob/Grep) that touch memory but didn't get a shellCmd - const toolPath = (input.tool_input.file_path ?? input.tool_input.path ?? "") as string; if (!shellCmd && (touchesMemory(cmd) || touchesMemory(toolPath))) { - // Instead of denying (which triggers alarm loops in Claude Code), return - // an "allow" with guidance that tells the agent to retry with bash. - // Uses stdout so the agent sees it as output (not a fatal error), but - // prefixed with [RETRY] to signal it should try again differently. const guidance = "[RETRY REQUIRED] The command you tried is not available for ~/.deeplake/memory/. " + "This virtual filesystem only supports bash builtins: cat, ls, grep, echo, jq, head, tail, sed, awk, wc, sort, find, etc. " + "python, python3, node, and curl are NOT available. " + "You MUST rewrite your command using only the bash tools listed above and try again. " + "For example, to parse JSON use: cat file.json | jq '.key'. To count keys: cat file.json | jq 'keys | length'."; - log(`unsupported command, returning guidance: ${cmd}`); - console.log(JSON.stringify({ - hookSpecificOutput: { - hookEventName: "PreToolUse", - permissionDecision: "allow", - updatedInput: { - command: `echo ${JSON.stringify(guidance)}`, - description: "[DeepLake] unsupported command — rewrite using bash builtins", - }, - }, - })); - return; + logFn(`unsupported command, returning guidance: ${cmd}`); + return buildAllowDecision( + `echo ${JSON.stringify(guidance)}`, + "[DeepLake] unsupported command — rewrite using bash builtins", + ); } - if (!shellCmd) return; - - // ── Fast path: handle Read and Grep directly via SQL (no shell spawn) ── - const config = loadConfig(); - if (config) { - const table = process.env["HIVEMIND_TABLE"] ?? "memory"; - const sessionsTable = process.env["HIVEMIND_SESSIONS_TABLE"] ?? "sessions"; - const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, table); - - try { - // ── Grep (Grep tool or Bash grep) — single SQL query ── - const grepParams = extractGrepParams(input.tool_name, input.tool_input, shellCmd); - if (grepParams) { - log(`direct grep: pattern=${grepParams.pattern} path=${grepParams.targetPath}`); - const result = await handleGrepDirect(api, table, sessionsTable, grepParams); - if (result !== null) { - emitResult(`echo ${JSON.stringify(result)}`, `[DeepLake direct] grep ${grepParams.pattern}`); - return; - } + if (!shellCmd) return null; + if (!config) return buildFallbackDecision(shellCmd, shellBundle); + + const table = process.env["HIVEMIND_TABLE"] ?? "memory"; + const sessionsTable = process.env["HIVEMIND_SESSIONS_TABLE"] ?? "sessions"; + const api = createApi(table, config); + + const readVirtualPathContentsWithCache = async ( + cachePaths: string[], + ): Promise> => { + const uniquePaths = [...new Set(cachePaths)]; + const result = new Map(uniquePaths.map((path) => [path, null])); + const cachedIndex = uniquePaths.includes("/index.md") + ? readCachedIndexContentFn(input.session_id) + : null; + + const remainingPaths = cachedIndex === null + ? uniquePaths + : uniquePaths.filter((path) => path !== "/index.md"); + + if (cachedIndex !== null) { + result.set("/index.md", cachedIndex); + } + + if (remainingPaths.length > 0) { + const fetched = await readVirtualPathContentsFn(api, table, sessionsTable, remainingPaths); + for (const [path, content] of fetched) result.set(path, content); + } + + const fetchedIndex = result.get("/index.md"); + if (typeof fetchedIndex === "string") { + writeCachedIndexContentFn(input.session_id, fetchedIndex); + } + + return result; + }; + + try { + if (input.tool_name === "Bash") { + const compiled = await executeCompiledBashCommandFn(api, table, sessionsTable, shellCmd, { + readVirtualPathContentsFn: async (_api, _memoryTable, _sessionsTable, cachePaths) => readVirtualPathContentsWithCache(cachePaths), + }); + if (compiled !== null) { + return buildAllowDecision(`echo ${JSON.stringify(compiled)}`, `[DeepLake compiled] ${shellCmd}`); } + } - // ── Read file: Read tool, or Bash cat/head/tail ── - { - let virtualPath: string | null = null; - let lineLimit = 0; // 0 = all lines - let fromEnd = false; // true = tail + const grepParams = extractGrepParams(input.tool_name, input.tool_input, shellCmd); + if (grepParams) { + logFn(`direct grep: pattern=${grepParams.pattern} path=${grepParams.targetPath}`); + const result = await handleGrepDirectFn(api, table, sessionsTable, grepParams); + if (result !== null) return buildAllowDecision(`echo ${JSON.stringify(result)}`, `[DeepLake direct] grep ${grepParams.pattern}`); + } - if (input.tool_name === "Read") { - virtualPath = rewritePaths((input.tool_input.file_path as string) ?? ""); - } else if (input.tool_name === "Bash") { - // cat [2>...] [| grep ... | head -N] or [| head -N] - // Strip stderr redirect (2>/dev/null, 2>&1, etc.) and optional grep -v pipe - const catCmd = shellCmd.replace(/\s+2>\S+/g, "").trim(); - const catPipeHead = catCmd.match(/^cat\s+(\S+?)\s*(?:\|[^|]*)*\|\s*head\s+(?:-n?\s*)?(-?\d+)\s*$/); - if (catPipeHead) { virtualPath = catPipeHead[1]; lineLimit = Math.abs(parseInt(catPipeHead[2], 10)); } - // cat - if (!virtualPath) { - const catMatch = catCmd.match(/^cat\s+(\S+)\s*$/); - if (catMatch) virtualPath = catMatch[1]; - } - // head [-n] N - if (!virtualPath) { - const headMatch = shellCmd.match(/^head\s+(?:-n\s*)?(-?\d+)\s+(\S+)\s*$/) ?? - shellCmd.match(/^head\s+(\S+)\s*$/); - if (headMatch) { - if (headMatch[2]) { virtualPath = headMatch[2]; lineLimit = Math.abs(parseInt(headMatch[1], 10)); } - else { virtualPath = headMatch[1]; lineLimit = 10; } - } - } - // tail [-n] N - if (!virtualPath) { - const tailMatch = shellCmd.match(/^tail\s+(?:-n\s*)?(-?\d+)\s+(\S+)\s*$/) ?? - shellCmd.match(/^tail\s+(\S+)\s*$/); - if (tailMatch) { - fromEnd = true; - if (tailMatch[2]) { virtualPath = tailMatch[2]; lineLimit = Math.abs(parseInt(tailMatch[1], 10)); } - else { virtualPath = tailMatch[1]; lineLimit = 10; } - } - } - // wc -l - if (!virtualPath) { - const wcMatch = shellCmd.match(/^wc\s+-l\s+(\S+)\s*$/); - if (wcMatch) { virtualPath = wcMatch[1]; lineLimit = -1; } // -1 = count mode - } + let virtualPath: string | null = null; + let lineLimit = 0; + let fromEnd = false; + let lsDir: string | null = null; + let longFormat = false; + + if (input.tool_name === "Read") { + virtualPath = rewritePaths(getReadTargetPath(input.tool_input) ?? ""); + if (virtualPath && isLikelyDirectoryPath(virtualPath)) { + lsDir = virtualPath.replace(/\/+$/, "") || "/"; + virtualPath = null; + } + } else if (input.tool_name === "Bash") { + const catCmd = shellCmd.replace(/\s+2>\S+/g, "").trim(); + const catPipeHead = catCmd.match(/^cat\s+(\S+?)\s*(?:\|[^|]*)*\|\s*head\s+(?:-n?\s*)?(-?\d+)\s*$/); + if (catPipeHead) { virtualPath = catPipeHead[1]; lineLimit = Math.abs(parseInt(catPipeHead[2], 10)); } + if (!virtualPath) { + const catMatch = catCmd.match(/^cat\s+(\S+)\s*$/); + if (catMatch) virtualPath = catMatch[1]; + } + if (!virtualPath) { + const headMatch = shellCmd.match(/^head\s+(?:-n\s*)?(-?\d+)\s+(\S+)\s*$/) ?? + shellCmd.match(/^head\s+(\S+)\s*$/); + if (headMatch) { + if (headMatch[2]) { virtualPath = headMatch[2]; lineLimit = Math.abs(parseInt(headMatch[1], 10)); } + else { virtualPath = headMatch[1]; lineLimit = 10; } } - - if (virtualPath && !virtualPath.endsWith("/")) { - log(`direct read: ${virtualPath}`); - let content: string | null = null; - - if (virtualPath.startsWith("/sessions/")) { - // Session files live in the sessions table — skip memory - try { - const sessionRows = await api.query( - `SELECT message::text AS content FROM "${sessionsTable}" WHERE path = '${sqlStr(virtualPath)}' LIMIT 1` - ); - if (sessionRows.length > 0 && sessionRows[0]["content"]) { - content = sessionRows[0]["content"] as string; - } - } catch { /* fall through to shell */ } - } else { - // Memory table (summaries, notes, etc.) - const rows = await api.query( - `SELECT summary FROM "${table}" WHERE path = '${sqlStr(virtualPath)}' LIMIT 1` - ); - if (rows.length > 0 && rows[0]["summary"]) { - content = rows[0]["summary"] as string; - } else if (virtualPath === "/index.md") { - // Virtual index — generate from metadata - const idxRows = await api.query( - `SELECT path, project, description, creation_date FROM "${table}" WHERE path LIKE '/summaries/%' ORDER BY creation_date DESC` - ); - const lines = ["# Memory Index", "", `${idxRows.length} sessions:`, ""]; - for (const r of idxRows) { - const p = r["path"] as string; - const proj = r["project"] as string || ""; - const desc = (r["description"] as string || "").slice(0, 120); - const date = (r["creation_date"] as string || "").slice(0, 10); - lines.push(`- [${p}](${p}) ${date} ${proj ? `[${proj}]` : ""} ${desc}`); - } - content = lines.join("\n"); - } - } - - if (content !== null) { - if (lineLimit === -1) { - const count = content.split("\n").length; - emitResult(`echo ${JSON.stringify(`${count} ${virtualPath}`)}`, `[DeepLake direct] wc -l ${virtualPath}`); - return; - } - if (lineLimit > 0) { - const lines = content.split("\n"); - content = fromEnd ? lines.slice(-lineLimit).join("\n") : lines.slice(0, lineLimit).join("\n"); - } - const label = lineLimit > 0 ? (fromEnd ? `tail -${lineLimit}` : `head -${lineLimit}`) : "cat"; - emitResult(`echo ${JSON.stringify(content)}`, `[DeepLake direct] ${label} ${virtualPath}`); - return; - } + } + if (!virtualPath) { + const tailMatch = shellCmd.match(/^tail\s+(?:-n\s*)?(-?\d+)\s+(\S+)\s*$/) ?? + shellCmd.match(/^tail\s+(\S+)\s*$/); + if (tailMatch) { + fromEnd = true; + if (tailMatch[2]) { virtualPath = tailMatch[2]; lineLimit = Math.abs(parseInt(tailMatch[1], 10)); } + else { virtualPath = tailMatch[1]; lineLimit = 10; } } } + if (!virtualPath) { + const wcMatch = shellCmd.match(/^wc\s+-l\s+(\S+)\s*$/); + if (wcMatch) { virtualPath = wcMatch[1]; lineLimit = -1; } + } + } - // ── ls: Bash ls or Glob tool ── - { - let lsDir: string | null = null; - let longFormat = false; - - if (input.tool_name === "Glob") { - lsDir = rewritePaths((input.tool_input.path as string) ?? "") || "/"; - } else if (input.tool_name === "Bash") { - const lsMatch = shellCmd.match(/^ls\s+(?:-([a-zA-Z]+)\s+)?(\S+)?\s*$/); - if (lsMatch) { - lsDir = lsMatch[2] ?? "/"; - longFormat = (lsMatch[1] ?? "").includes("l"); - } + if (virtualPath && !virtualPath.endsWith("/")) { + logFn(`direct read: ${virtualPath}`); + let content = virtualPath === "/index.md" + ? readCachedIndexContentFn(input.session_id) + : null; + + if (content === null) { + // `/index.md` goes through the dual-table builder inside + // `readVirtualPathContents` (fix #1). Other paths fall back to the + // same helper which returns null when neither table has a row, at + // which point we let the shell bundle handle the miss below. + content = await readVirtualPathContentFn(api, table, sessionsTable, virtualPath); + } + if (content !== null) { + if (virtualPath === "/index.md") { + writeCachedIndexContentFn(input.session_id, content); } - - if (lsDir) { - const dir = lsDir.replace(/\/+$/, "") || "/"; - log(`direct ls: ${dir}`); - // Query the right table(s) based on path - const isSessionDir = dir === "/sessions" || dir.startsWith("/sessions/"); - const isRoot = dir === "/"; - const lsQueries: Promise[]>[] = []; - if (!isSessionDir) { - lsQueries.push(api.query( - `SELECT path, size_bytes FROM "${table}" WHERE path LIKE '${sqlLike(dir === "/" ? "" : dir)}/%' ORDER BY path` - ).catch(() => [])); - } - if (isSessionDir || isRoot) { - lsQueries.push(api.query( - `SELECT path, size_bytes FROM "${sessionsTable}" WHERE path LIKE '${sqlLike(dir === "/" ? "" : dir)}/%' ORDER BY path` - ).catch(() => [])); - } - const rows = (await Promise.all(lsQueries)).flat(); - const entries = new Map(); - const prefix = dir === "/" ? "/" : dir + "/"; - for (const row of rows) { - const p = row["path"] as string; - if (!p.startsWith(prefix) && dir !== "/") continue; - const rest = dir === "/" ? p.slice(1) : p.slice(prefix.length); - const slash = rest.indexOf("/"); - const name = slash === -1 ? rest : rest.slice(0, slash); - if (!name) continue; - const existing = entries.get(name); - if (slash !== -1) { - if (!existing) entries.set(name, { isDir: true, size: 0 }); - } else { - entries.set(name, { isDir: false, size: (row["size_bytes"] as number) ?? 0 }); - } - } - const lines: string[] = []; - for (const [name, info] of [...entries].sort((a, b) => a[0].localeCompare(b[0]))) { - if (longFormat) { - const type = info.isDir ? "drwxr-xr-x" : "-rw-r--r--"; - const size = String(info.isDir ? 0 : info.size).padStart(6); - lines.push(`${type} 1 user user ${size} ${name}${info.isDir ? "/" : ""}`); - } else { - lines.push(name + (info.isDir ? "/" : "")); - } - } - emitResult(`echo ${JSON.stringify(lines.join("\n") || "(empty directory)")}`, `[DeepLake direct] ls ${dir}`); - return; + if (lineLimit === -1) return buildAllowDecision(`echo ${JSON.stringify(`${content.split("\n").length} ${virtualPath}`)}`, `[DeepLake direct] wc -l ${virtualPath}`); + if (lineLimit > 0) { + const lines = content.split("\n"); + content = fromEnd ? lines.slice(-lineLimit).join("\n") : lines.slice(0, lineLimit).join("\n"); + } + const label = lineLimit > 0 ? (fromEnd ? `tail -${lineLimit}` : `head -${lineLimit}`) : "cat"; + // Read tool writes content to disk and Claude Code reads the file directly, + // so no size pressure; keep full content. Bash intercepts flow through + // Claude Code's 16 KB tool_result threshold so we cap before reaching it. + if (input.tool_name === "Read") { + const file_path = writeReadCacheFileFn(input.session_id, virtualPath, content); + return buildReadDecision(file_path, `[DeepLake direct] ${label} ${virtualPath}`); } + const capped = capOutputForClaude(content, { kind: label }); + return buildAllowDecision(`echo ${JSON.stringify(capped)}`, `[DeepLake direct] ${label} ${virtualPath}`); } + } - // ── find -name '' ── - if (input.tool_name === "Bash") { - const findMatch = shellCmd.match(/^find\s+(\S+)\s+(?:-type\s+\S+\s+)?-name\s+'([^']+)'/); - if (findMatch) { - const dir = findMatch[1].replace(/\/+$/, "") || "/"; - const namePattern = sqlLike(findMatch[2]).replace(/\*/g, "%").replace(/\?/g, "_"); - log(`direct find: ${dir} -name '${findMatch[2]}'`); - const isSessionDir = dir === "/sessions" || dir.startsWith("/sessions/"); - const findTable = isSessionDir ? sessionsTable : table; - const rows = await api.query( - `SELECT path FROM "${findTable}" WHERE path LIKE '${sqlLike(dir === "/" ? "" : dir)}/%' AND filename LIKE '${namePattern}' ORDER BY path` - ); - let result = rows.map(r => r["path"] as string).join("\n") || ""; - // Handle piped wc -l - if (/\|\s*wc\s+-l\s*$/.test(shellCmd)) { - result = String(rows.length); - } - emitResult(`echo ${JSON.stringify(result || "(no matches)")}`, `[DeepLake direct] find ${dir}`); - return; + if (!lsDir && input.tool_name === "Glob") { + lsDir = rewritePaths((input.tool_input.path as string) ?? "") || "/"; + } else if (input.tool_name === "Bash") { + const lsMatch = shellCmd.match(/^ls\s+(?:-([a-zA-Z]+)\s+)?(\S+)?\s*$/); + if (lsMatch) { + lsDir = lsMatch[2] ?? "/"; + longFormat = (lsMatch[1] ?? "").includes("l"); + } + } + + if (lsDir) { + const dir = lsDir.replace(/\/+$/, "") || "/"; + logFn(`direct ls: ${dir}`); + const rows = await listVirtualPathRowsFn(api, table, sessionsTable, dir); + const entries = new Map(); + const prefix = dir === "/" ? "/" : dir + "/"; + for (const row of rows) { + const p = row["path"] as string; + if (!p.startsWith(prefix) && dir !== "/") continue; + const rest = dir === "/" ? p.slice(1) : p.slice(prefix.length); + const slash = rest.indexOf("/"); + const name = slash === -1 ? rest : rest.slice(0, slash); + if (!name) continue; + const existing = entries.get(name); + if (slash !== -1) { + if (!existing) entries.set(name, { isDir: true, size: 0 }); + } else { + entries.set(name, { isDir: false, size: (row["size_bytes"] as number) ?? 0 }); } } - } catch (e: any) { - log(`direct query failed, falling back to shell: ${e.message}`); + const lines: string[] = []; + for (const [name, info] of [...entries].sort((a, b) => a[0].localeCompare(b[0]))) { + if (longFormat) { + const type = info.isDir ? "drwxr-xr-x" : "-rw-r--r--"; + const size = String(info.isDir ? 0 : info.size).padStart(6); + lines.push(`${type} 1 user user ${size} ${name}${info.isDir ? "/" : ""}`); + } else { + lines.push(name + (info.isDir ? "/" : "")); + } + } + const lsOutput = capOutputForClaude(lines.join("\n") || "(empty directory)", { kind: "ls" }); + return buildAllowDecision(`echo ${JSON.stringify(lsOutput)}`, `[DeepLake direct] ls ${dir}`); } - } - // ── Slow path: rewrite to virtual shell (for Bash, Glob, or when direct fails) ── - log(`intercepted → rewriting to shell: ${shellCmd}`); + if (input.tool_name === "Bash") { + const findMatch = shellCmd.match(/^find\s+(\S+)\s+(?:-type\s+\S+\s+)?-name\s+'([^']+)'/); + if (findMatch) { + const dir = findMatch[1].replace(/\/+$/, "") || "/"; + const namePattern = sqlLike(findMatch[2]).replace(/\*/g, "%").replace(/\?/g, "_"); + logFn(`direct find: ${dir} -name '${findMatch[2]}'`); + const paths = await findVirtualPathsFn(api, table, sessionsTable, dir, namePattern); + let result = paths.join("\n") || ""; + if (/\|\s*wc\s+-l\s*$/.test(shellCmd)) result = String(paths.length); + const capped = capOutputForClaude(result || "(no matches)", { kind: "find" }); + return buildAllowDecision(`echo ${JSON.stringify(capped)}`, `[DeepLake direct] find ${dir}`); + } + } + } catch (e: any) { + logFn(`direct query failed, falling back to shell: ${e.message}`); + } - const rewrittenCommand = `node "${SHELL_BUNDLE}" -c "${shellCmd.replace(/"/g, '\\"')}"`; + return buildFallbackDecision(shellCmd, shellBundle); +} - const output: Record = { +/* c8 ignore start */ +async function main(): Promise { + const input = await readStdin(); + const decision = await processPreToolUse(input); + if (!decision) return; + const updatedInput: Record = decision.file_path !== undefined + ? { file_path: decision.file_path } + : { command: decision.command, description: decision.description }; + console.log(JSON.stringify({ hookSpecificOutput: { hookEventName: "PreToolUse", permissionDecision: "allow", - updatedInput: { - command: rewrittenCommand, - description: `[DeepLake] ${shellCmd}`, - }, + updatedInput, }, - }; - - log(`rewritten: ${rewrittenCommand}`); - console.log(JSON.stringify(output)); + })); } -main().catch((e) => { log(`fatal: ${e.message}`); process.exit(0); }); +if (isDirectRun(import.meta.url)) { + main().catch((e) => { log(`fatal: ${e.message}`); process.exit(0); }); +} +/* c8 ignore stop */ diff --git a/src/hooks/query-cache.ts b/src/hooks/query-cache.ts new file mode 100644 index 0000000..aee72e4 --- /dev/null +++ b/src/hooks/query-cache.ts @@ -0,0 +1,49 @@ +import { mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; +import { homedir } from "node:os"; +import { log as _log } from "../utils/debug.js"; + +const log = (msg: string) => _log("query-cache", msg); +const DEFAULT_CACHE_ROOT = join(homedir(), ".deeplake", "query-cache"); +const INDEX_CACHE_FILE = "index.md"; + +interface QueryCacheDeps { + cacheRoot?: string; + logFn?: (msg: string) => void; +} + +export function getSessionQueryCacheDir(sessionId: string, deps: QueryCacheDeps = {}): string { + const { cacheRoot = DEFAULT_CACHE_ROOT } = deps; + return join(cacheRoot, sessionId); +} + +export function clearSessionQueryCache(sessionId: string, deps: QueryCacheDeps = {}): void { + const { logFn = log } = deps; + try { + rmSync(getSessionQueryCacheDir(sessionId, deps), { recursive: true, force: true }); + } catch (e: any) { + logFn(`clear failed for session=${sessionId}: ${e.message}`); + } +} + +export function readCachedIndexContent(sessionId: string, deps: QueryCacheDeps = {}): string | null { + const { logFn = log } = deps; + try { + return readFileSync(join(getSessionQueryCacheDir(sessionId, deps), INDEX_CACHE_FILE), "utf-8"); + } catch (e: any) { + if (e?.code === "ENOENT") return null; + logFn(`read failed for session=${sessionId}: ${e.message}`); + return null; + } +} + +export function writeCachedIndexContent(sessionId: string, content: string, deps: QueryCacheDeps = {}): void { + const { logFn = log } = deps; + try { + const dir = getSessionQueryCacheDir(sessionId, deps); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, INDEX_CACHE_FILE), content, "utf-8"); + } catch (e: any) { + logFn(`write failed for session=${sessionId}: ${e.message}`); + } +} diff --git a/src/hooks/session-queue.ts b/src/hooks/session-queue.ts new file mode 100644 index 0000000..1157a44 --- /dev/null +++ b/src/hooks/session-queue.ts @@ -0,0 +1,477 @@ +import { + appendFileSync, + closeSync, + existsSync, + mkdirSync, + openSync, + readFileSync, + readdirSync, + renameSync, + rmSync, + statSync, + writeFileSync, +} from "node:fs"; +import { dirname, join } from "node:path"; +import { homedir } from "node:os"; +import { sqlIdent, sqlStr } from "../utils/sql.js"; + +export interface SessionQueueApi { + query(sql: string): Promise[]>; + ensureSessionsTable(name?: string): Promise; +} + +export interface QueuedSessionRow { + id: string; + path: string; + filename: string; + message: string; + author: string; + sizeBytes: number; + project: string; + description: string; + agent: string; + creationDate: string; + lastUpdateDate: string; +} + +export interface FlushSessionQueueOptions { + sessionId: string; + sessionsTable: string; + queueDir?: string; + maxBatchRows?: number; + allowStaleInflight?: boolean; + staleInflightMs?: number; + waitIfBusyMs?: number; + drainAll?: boolean; +} + +export interface FlushSessionQueueResult { + status: "empty" | "busy" | "flushed" | "disabled"; + rows: number; + batches: number; +} + +export interface DrainSessionQueueOptions { + sessionsTable: string; + queueDir?: string; + maxBatchRows?: number; + staleInflightMs?: number; +} + +export interface DrainSessionQueueResult { + queuedSessions: number; + flushedSessions: number; + rows: number; + batches: number; +} + +const DEFAULT_QUEUE_DIR = join(homedir(), ".deeplake", "queue"); +const DEFAULT_MAX_BATCH_ROWS = 50; +const DEFAULT_STALE_INFLIGHT_MS = 60_000; +const DEFAULT_AUTH_FAILURE_TTL_MS = 5 * 60_000; +const DEFAULT_DRAIN_LOCK_STALE_MS = 30_000; +const BUSY_WAIT_STEP_MS = 100; + +interface SessionWriteDisabledState { + disabledAt: string; + reason: string; + sessionsTable: string; +} + +class SessionWriteDisabledError extends Error { + constructor(message: string) { + super(message); + this.name = "SessionWriteDisabledError"; + } +} + +export function buildSessionPath(config: { userName: string; orgName: string; workspaceId: string }, sessionId: string): string { + return `/sessions/${config.userName}/${config.userName}_${config.orgName}_${config.workspaceId}_${sessionId}.jsonl`; +} + +export function buildQueuedSessionRow(args: { + sessionPath: string; + line: string; + userName: string; + projectName: string; + description: string; + agent: string; + timestamp: string; +}): QueuedSessionRow { + return { + id: crypto.randomUUID(), + path: args.sessionPath, + filename: args.sessionPath.split("/").pop() ?? "", + message: args.line, + author: args.userName, + sizeBytes: Buffer.byteLength(args.line, "utf-8"), + project: args.projectName, + description: args.description, + agent: args.agent, + creationDate: args.timestamp, + lastUpdateDate: args.timestamp, + }; +} + +export function appendQueuedSessionRow(row: QueuedSessionRow, queueDir = DEFAULT_QUEUE_DIR): string { + mkdirSync(queueDir, { recursive: true }); + const sessionId = extractSessionId(row.path); + const queuePath = getQueuePath(queueDir, sessionId); + appendFileSync(queuePath, `${JSON.stringify(row)}\n`); + return queuePath; +} + +export function buildSessionInsertSql(sessionsTable: string, rows: QueuedSessionRow[]): string { + if (rows.length === 0) throw new Error("buildSessionInsertSql: rows must not be empty"); + const table = sqlIdent(sessionsTable); + const values = rows.map((row) => { + const jsonForSql = sqlStr(coerceJsonbPayload(row.message)); + return ( + `('${sqlStr(row.id)}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', '${jsonForSql}'::jsonb, ` + + `'${sqlStr(row.author)}', ${row.sizeBytes}, '${sqlStr(row.project)}', '${sqlStr(row.description)}', ` + + `'${sqlStr(row.agent)}', '${sqlStr(row.creationDate)}', '${sqlStr(row.lastUpdateDate)}')` + ); + }).join(", "); + + return ( + `INSERT INTO "${table}" ` + + `(id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date) ` + + `VALUES ${values}` + ); +} + +function coerceJsonbPayload(message: string): string { + try { + return JSON.stringify(JSON.parse(message)); + } catch { + return JSON.stringify({ + type: "raw_message", + content: message, + }); + } +} + +export async function flushSessionQueue(api: SessionQueueApi, opts: FlushSessionQueueOptions): Promise { + const queueDir = opts.queueDir ?? DEFAULT_QUEUE_DIR; + const maxBatchRows = opts.maxBatchRows ?? DEFAULT_MAX_BATCH_ROWS; + const staleInflightMs = opts.staleInflightMs ?? DEFAULT_STALE_INFLIGHT_MS; + const waitIfBusyMs = opts.waitIfBusyMs ?? 0; + const drainAll = opts.drainAll ?? false; + + mkdirSync(queueDir, { recursive: true }); + + const queuePath = getQueuePath(queueDir, opts.sessionId); + const inflightPath = getInflightPath(queueDir, opts.sessionId); + if (isSessionWriteDisabled(opts.sessionsTable, queueDir)) { + return existsSync(queuePath) || existsSync(inflightPath) + ? { status: "disabled", rows: 0, batches: 0 } + : { status: "empty", rows: 0, batches: 0 }; + } + let totalRows = 0; + let totalBatches = 0; + let flushedAny = false; + + while (true) { + if (opts.allowStaleInflight) recoverStaleInflight(queuePath, inflightPath, staleInflightMs); + + if (existsSync(inflightPath)) { + if (waitIfBusyMs > 0) { + await waitForInflightToClear(inflightPath, waitIfBusyMs); + if (opts.allowStaleInflight) recoverStaleInflight(queuePath, inflightPath, staleInflightMs); + } + if (existsSync(inflightPath)) { + return flushedAny + ? { status: "flushed", rows: totalRows, batches: totalBatches } + : { status: "busy", rows: 0, batches: 0 }; + } + } + + if (!existsSync(queuePath)) { + return flushedAny + ? { status: "flushed", rows: totalRows, batches: totalBatches } + : { status: "empty", rows: 0, batches: 0 }; + } + + try { + renameSync(queuePath, inflightPath); + } catch (e: any) { + if (e?.code === "ENOENT") { + return flushedAny + ? { status: "flushed", rows: totalRows, batches: totalBatches } + : { status: "empty", rows: 0, batches: 0 }; + } + throw e; + } + + try { + const { rows, batches } = await flushInflightFile(api, opts.sessionsTable, inflightPath, maxBatchRows); + totalRows += rows; + totalBatches += batches; + flushedAny = flushedAny || rows > 0; + } catch (e) { + requeueInflight(queuePath, inflightPath); + if (e instanceof SessionWriteDisabledError) { + return { status: "disabled", rows: totalRows, batches: totalBatches }; + } + throw e; + } + + if (!drainAll) { + return { status: "flushed", rows: totalRows, batches: totalBatches }; + } + } +} + +export async function drainSessionQueues(api: SessionQueueApi, opts: DrainSessionQueueOptions): Promise { + const queueDir = opts.queueDir ?? DEFAULT_QUEUE_DIR; + mkdirSync(queueDir, { recursive: true }); + + const sessionIds = listQueuedSessionIds(queueDir, opts.staleInflightMs ?? DEFAULT_STALE_INFLIGHT_MS); + let flushedSessions = 0; + let rows = 0; + let batches = 0; + + for (const sessionId of sessionIds) { + const result = await flushSessionQueue(api, { + sessionId, + sessionsTable: opts.sessionsTable, + queueDir, + maxBatchRows: opts.maxBatchRows, + allowStaleInflight: true, + staleInflightMs: opts.staleInflightMs, + drainAll: true, + }); + if (result.status === "flushed") { + flushedSessions += 1; + rows += result.rows; + batches += result.batches; + } + } + + return { + queuedSessions: sessionIds.length, + flushedSessions, + rows, + batches, + }; +} + +export function tryAcquireSessionDrainLock( + sessionsTable: string, + queueDir = DEFAULT_QUEUE_DIR, + staleMs = DEFAULT_DRAIN_LOCK_STALE_MS, +): (() => void) | null { + mkdirSync(queueDir, { recursive: true }); + const lockPath = getSessionDrainLockPath(queueDir, sessionsTable); + + for (let attempt = 0; attempt < 2; attempt++) { + try { + const fd = openSync(lockPath, "wx"); + closeSync(fd); + return () => rmSync(lockPath, { force: true }); + } catch (e: any) { + if (e?.code !== "EEXIST") throw e; + if (existsSync(lockPath) && isStale(lockPath, staleMs)) { + rmSync(lockPath, { force: true }); + continue; + } + return null; + } + } + + return null; +} + +function getQueuePath(queueDir: string, sessionId: string): string { + return join(queueDir, `${sessionId}.jsonl`); +} + +function getInflightPath(queueDir: string, sessionId: string): string { + return join(queueDir, `${sessionId}.inflight`); +} + +function extractSessionId(sessionPath: string): string { + const filename = sessionPath.split("/").pop() ?? ""; + return filename.replace(/\.jsonl$/, "").split("_").pop() ?? filename; +} + +async function flushInflightFile( + api: SessionQueueApi, + sessionsTable: string, + inflightPath: string, + maxBatchRows: number, +): Promise<{ rows: number; batches: number }> { + const rows = readQueuedRows(inflightPath); + if (rows.length === 0) { + rmSync(inflightPath, { force: true }); + return { rows: 0, batches: 0 }; + } + + let ensured = false; + let batches = 0; + const queueDir = dirname(inflightPath); + for (let i = 0; i < rows.length; i += maxBatchRows) { + const chunk = rows.slice(i, i + maxBatchRows); + const sql = buildSessionInsertSql(sessionsTable, chunk); + try { + await api.query(sql); + } catch (e: any) { + if (isSessionWriteAuthError(e)) { + markSessionWriteDisabled(sessionsTable, errorMessage(e), queueDir); + throw new SessionWriteDisabledError(errorMessage(e)); + } + if (!ensured && isEnsureSessionsTableRetryable(e)) { + try { + await api.ensureSessionsTable(sessionsTable); + } catch (ensureError: unknown) { + if (isSessionWriteAuthError(ensureError)) { + markSessionWriteDisabled(sessionsTable, errorMessage(ensureError), queueDir); + throw new SessionWriteDisabledError(errorMessage(ensureError)); + } + throw ensureError; + } + ensured = true; + try { + await api.query(sql); + } catch (retryError: unknown) { + if (isSessionWriteAuthError(retryError)) { + markSessionWriteDisabled(sessionsTable, errorMessage(retryError), queueDir); + throw new SessionWriteDisabledError(errorMessage(retryError)); + } + throw retryError; + } + } else { + throw e; + } + } + batches += 1; + } + + clearSessionWriteDisabled(sessionsTable, queueDir); + rmSync(inflightPath, { force: true }); + return { rows: rows.length, batches }; +} + +function readQueuedRows(path: string): QueuedSessionRow[] { + const raw = readFileSync(path, "utf-8"); + return raw + .split("\n") + .map(line => line.trim()) + .filter(Boolean) + .map((line) => JSON.parse(line) as QueuedSessionRow); +} + +function requeueInflight(queuePath: string, inflightPath: string): void { + if (!existsSync(inflightPath)) return; + const inflight = readFileSync(inflightPath, "utf-8"); + appendFileSync(queuePath, inflight); + rmSync(inflightPath, { force: true }); +} + +function recoverStaleInflight(queuePath: string, inflightPath: string, staleInflightMs: number): void { + if (!existsSync(inflightPath) || !isStale(inflightPath, staleInflightMs)) return; + requeueInflight(queuePath, inflightPath); +} + +function isStale(path: string, staleInflightMs: number): boolean { + return Date.now() - statSync(path).mtimeMs >= staleInflightMs; +} + +function listQueuedSessionIds(queueDir: string, staleInflightMs: number): string[] { + const sessionIds = new Set(); + for (const name of readdirSync(queueDir)) { + if (name.endsWith(".jsonl")) { + sessionIds.add(name.slice(0, -".jsonl".length)); + } else if (name.endsWith(".inflight")) { + const path = join(queueDir, name); + if (isStale(path, staleInflightMs)) { + sessionIds.add(name.slice(0, -".inflight".length)); + } + } + } + return [...sessionIds].sort(); +} + +function isEnsureSessionsTableRetryable(error: unknown): boolean { + const message = errorMessage(error).toLowerCase(); + return message.includes("does not exist") || + message.includes("doesn't exist") || + message.includes("relation") || + message.includes("not found"); +} + +export function isSessionWriteAuthError(error: unknown): boolean { + const message = errorMessage(error).toLowerCase(); + return message.includes("403") || + message.includes("401") || + message.includes("forbidden") || + message.includes("unauthorized"); +} + +export function markSessionWriteDisabled( + sessionsTable: string, + reason: string, + queueDir = DEFAULT_QUEUE_DIR, +): void { + mkdirSync(queueDir, { recursive: true }); + writeFileSync( + getSessionWriteDisabledPath(queueDir, sessionsTable), + JSON.stringify({ + disabledAt: new Date().toISOString(), + reason, + sessionsTable, + } satisfies SessionWriteDisabledState), + ); +} + +export function clearSessionWriteDisabled( + sessionsTable: string, + queueDir = DEFAULT_QUEUE_DIR, +): void { + rmSync(getSessionWriteDisabledPath(queueDir, sessionsTable), { force: true }); +} + +export function isSessionWriteDisabled( + sessionsTable: string, + queueDir = DEFAULT_QUEUE_DIR, + ttlMs = DEFAULT_AUTH_FAILURE_TTL_MS, +): boolean { + const path = getSessionWriteDisabledPath(queueDir, sessionsTable); + if (!existsSync(path)) return false; + try { + const raw = readFileSync(path, "utf-8"); + const state = JSON.parse(raw) as SessionWriteDisabledState; + const ageMs = Date.now() - new Date(state.disabledAt).getTime(); + if (Number.isNaN(ageMs) || ageMs >= ttlMs) { + rmSync(path, { force: true }); + return false; + } + return true; + } catch { + rmSync(path, { force: true }); + return false; + } +} + +function getSessionWriteDisabledPath(queueDir: string, sessionsTable: string): string { + return join(queueDir, `.${sessionsTable}.disabled.json`); +} + +function getSessionDrainLockPath(queueDir: string, sessionsTable: string): string { + return join(queueDir, `.${sessionsTable}.drain.lock`); +} + +function errorMessage(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + +async function waitForInflightToClear(inflightPath: string, waitIfBusyMs: number): Promise { + const startedAt = Date.now(); + while (existsSync(inflightPath) && (Date.now() - startedAt) < waitIfBusyMs) { + await sleep(BUSY_WAIT_STEP_MS); + } +} + +function sleep(ms: number): Promise { + return new Promise(resolve => setTimeout(resolve, ms)); +} diff --git a/src/hooks/version-check.ts b/src/hooks/version-check.ts new file mode 100644 index 0000000..fd0fa0f --- /dev/null +++ b/src/hooks/version-check.ts @@ -0,0 +1,110 @@ +import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs"; +import { dirname, join } from "node:path"; +import { homedir } from "node:os"; + +export const DEFAULT_VERSION_CACHE_PATH = join(homedir(), ".deeplake", ".version-check.json"); +export const DEFAULT_VERSION_CACHE_TTL_MS = 60 * 60 * 1000; + +export interface VersionCacheEntry { + checkedAt: number; + latest: string | null; + url: string; +} + +export function getInstalledVersion(bundleDir: string, pluginManifestDir: ".claude-plugin" | ".codex-plugin"): string | null { + try { + const pluginJson = join(bundleDir, "..", pluginManifestDir, "plugin.json"); + const plugin = JSON.parse(readFileSync(pluginJson, "utf-8")); + if (plugin.version) return plugin.version; + } catch { /* fall through */ } + + let dir = bundleDir; + for (let i = 0; i < 5; i++) { + const candidate = join(dir, "package.json"); + try { + const pkg = JSON.parse(readFileSync(candidate, "utf-8")); + if ((pkg.name === "hivemind" || pkg.name === "hivemind-codex") && pkg.version) return pkg.version; + } catch { /* not here */ } + const parent = dirname(dir); + if (parent === dir) break; + dir = parent; + } + return null; +} + +export function isNewer(latest: string, current: string): boolean { + const parse = (v: string) => v.replace(/-.*$/, "").split(".").map(Number); + const [la, lb, lc] = parse(latest); + const [ca, cb, cc] = parse(current); + return la > ca || (la === ca && lb > cb) || (la === ca && lb === cb && lc > cc); +} + +export function readVersionCache(cachePath = DEFAULT_VERSION_CACHE_PATH): VersionCacheEntry | null { + if (!existsSync(cachePath)) return null; + try { + const parsed = JSON.parse(readFileSync(cachePath, "utf-8")); + if ( + parsed + && typeof parsed.checkedAt === "number" + && typeof parsed.url === "string" + && (typeof parsed.latest === "string" || parsed.latest === null) + ) { + return parsed as VersionCacheEntry; + } + } catch { /* ignore */ } + return null; +} + +export function writeVersionCache(entry: VersionCacheEntry, cachePath = DEFAULT_VERSION_CACHE_PATH): void { + mkdirSync(dirname(cachePath), { recursive: true }); + writeFileSync(cachePath, JSON.stringify(entry)); +} + +export function readFreshCachedLatestVersion( + url: string, + ttlMs = DEFAULT_VERSION_CACHE_TTL_MS, + cachePath = DEFAULT_VERSION_CACHE_PATH, + nowMs = Date.now(), +): string | null | undefined { + const cached = readVersionCache(cachePath); + if (!cached || cached.url !== url) return undefined; + if ((nowMs - cached.checkedAt) > ttlMs) return undefined; + return cached.latest; +} + +export async function getLatestVersionCached(opts: { + url: string; + timeoutMs: number; + ttlMs?: number; + cachePath?: string; + nowMs?: number; + fetchImpl?: typeof fetch; +}): Promise { + const ttlMs = opts.ttlMs ?? DEFAULT_VERSION_CACHE_TTL_MS; + const cachePath = opts.cachePath ?? DEFAULT_VERSION_CACHE_PATH; + const nowMs = opts.nowMs ?? Date.now(); + const fetchImpl = opts.fetchImpl ?? fetch; + + const fresh = readFreshCachedLatestVersion(opts.url, ttlMs, cachePath, nowMs); + if (fresh !== undefined) return fresh; + + const stale = readVersionCache(cachePath); + try { + const res = await fetchImpl(opts.url, { signal: AbortSignal.timeout(opts.timeoutMs) }); + const latest = res.ok ? (await res.json() as { version?: string }).version ?? null : (stale?.latest ?? null); + writeVersionCache({ + checkedAt: nowMs, + latest, + url: opts.url, + }, cachePath); + return latest; + } catch { + const latest = stale?.latest ?? null; + writeVersionCache({ + checkedAt: nowMs, + latest, + url: opts.url, + }, cachePath); + return latest; + } +} diff --git a/src/hooks/virtual-table-query.ts b/src/hooks/virtual-table-query.ts new file mode 100644 index 0000000..a430a35 --- /dev/null +++ b/src/hooks/virtual-table-query.ts @@ -0,0 +1,220 @@ +import type { DeeplakeApi } from "../deeplake-api.js"; +import { sqlLike, sqlStr } from "../utils/sql.js"; +import { normalizeContent } from "../shell/grep-core.js"; + +type Row = Record; + +function normalizeSessionPart(path: string, content: string): string { + return normalizeContent(path, content); +} + +export function buildVirtualIndexContent(summaryRows: Row[], sessionRows: Row[] = []): string { + const total = summaryRows.length + sessionRows.length; + const lines = [ + "# Memory Index", + "", + `${total} entries (${summaryRows.length} summaries, ${sessionRows.length} sessions):`, + "", + ]; + if (summaryRows.length > 0) { + lines.push("## Summaries", ""); + for (const row of summaryRows) { + const path = row["path"] as string; + const project = row["project"] as string || ""; + const description = (row["description"] as string || "").slice(0, 120); + const date = (row["creation_date"] as string || "").slice(0, 10); + lines.push(`- [${path}](${path}) ${date} ${project ? `[${project}]` : ""} ${description}`); + } + lines.push(""); + } + if (sessionRows.length > 0) { + lines.push("## Sessions", ""); + for (const row of sessionRows) { + const path = row["path"] as string; + const description = (row["description"] as string || "").slice(0, 120); + lines.push(`- [${path}](${path}) ${description}`); + } + } + return lines.join("\n"); +} + +function buildUnionQuery(memoryQuery: string, sessionsQuery: string): string { + return ( + `SELECT path, content, size_bytes, creation_date, source_order FROM (` + + `(${memoryQuery}) UNION ALL (${sessionsQuery})` + + `) AS combined ORDER BY path, source_order, creation_date` + ); +} + +function buildInList(paths: string[]): string { + return paths.map(path => `'${sqlStr(path)}'`).join(", "); +} + +function buildDirFilter(dirs: string[]): string { + const cleaned = [...new Set(dirs.map(dir => dir.replace(/\/+$/, "") || "/"))]; + if (cleaned.length === 0 || cleaned.includes("/")) return ""; + const clauses = cleaned.map((dir) => `path LIKE '${sqlLike(dir)}/%' ESCAPE '\\'`); + return ` WHERE ${clauses.join(" OR ")}`; +} + +async function queryUnionRows( + api: DeeplakeApi, + memoryQuery: string, + sessionsQuery: string, +): Promise { + const unionQuery = buildUnionQuery(memoryQuery, sessionsQuery); + try { + return await api.query(unionQuery); + } catch { + const [memoryRows, sessionRows] = await Promise.all([ + api.query(memoryQuery).catch(() => []), + api.query(sessionsQuery).catch(() => []), + ]); + return [...memoryRows, ...sessionRows]; + } +} + +export async function readVirtualPathContents( + api: DeeplakeApi, + memoryTable: string, + sessionsTable: string, + virtualPaths: string[], +): Promise> { + const uniquePaths = [...new Set(virtualPaths)]; + const result = new Map(uniquePaths.map(path => [path, null])); + if (uniquePaths.length === 0) return result; + + const inList = buildInList(uniquePaths); + const rows = await queryUnionRows( + api, + `SELECT path, summary::text AS content, NULL::bigint AS size_bytes, '' AS creation_date, 0 AS source_order FROM "${memoryTable}" WHERE path IN (${inList})`, + `SELECT path, message::text AS content, NULL::bigint AS size_bytes, COALESCE(creation_date::text, '') AS creation_date, 1 AS source_order FROM "${sessionsTable}" WHERE path IN (${inList})`, + ); + + const memoryHits = new Map(); + const sessionHits = new Map(); + for (const row of rows) { + const path = row["path"]; + const content = row["content"]; + const sourceOrder = Number(row["source_order"] ?? 0); + if (typeof path !== "string" || typeof content !== "string") continue; + if (sourceOrder === 0) { + memoryHits.set(path, content); + } else { + const current = sessionHits.get(path) ?? []; + current.push(normalizeSessionPart(path, content)); + sessionHits.set(path, current); + } + } + + for (const path of uniquePaths) { + if (memoryHits.has(path)) { + result.set(path, memoryHits.get(path) ?? null); + continue; + } + const sessionParts = sessionHits.get(path) ?? []; + if (sessionParts.length > 0) { + result.set(path, sessionParts.join("\n")); + } + } + + if (result.get("/index.md") === null && uniquePaths.includes("/index.md")) { + const [summaryRows, sessionRows] = await Promise.all([ + api.query( + `SELECT path, project, description, creation_date FROM "${memoryTable}" WHERE path LIKE '/summaries/%' ORDER BY creation_date DESC` + ).catch(() => [] as Row[]), + api.query( + `SELECT path, description FROM "${sessionsTable}" WHERE path LIKE '/sessions/%' ORDER BY path` + ).catch(() => [] as Row[]), + ]); + result.set("/index.md", buildVirtualIndexContent(summaryRows, sessionRows)); + } + + return result; +} + +export async function listVirtualPathRowsForDirs( + api: DeeplakeApi, + memoryTable: string, + sessionsTable: string, + dirs: string[], +): Promise> { + const uniqueDirs = [...new Set(dirs.map(dir => dir.replace(/\/+$/, "") || "/"))]; + const filter = buildDirFilter(uniqueDirs); + const rows = await queryUnionRows( + api, + `SELECT path, NULL::text AS content, size_bytes, '' AS creation_date, 0 AS source_order FROM "${memoryTable}"${filter}`, + `SELECT path, NULL::text AS content, size_bytes, '' AS creation_date, 1 AS source_order FROM "${sessionsTable}"${filter}`, + ); + + const deduped = dedupeRowsByPath(rows.map((row) => ({ + path: row["path"], + size_bytes: row["size_bytes"], + }))); + + const byDir = new Map(); + for (const dir of uniqueDirs) byDir.set(dir, []); + for (const row of deduped) { + const path = row["path"]; + if (typeof path !== "string") continue; + for (const dir of uniqueDirs) { + const prefix = dir === "/" ? "/" : `${dir}/`; + if (dir === "/" || path.startsWith(prefix)) { + byDir.get(dir)?.push(row); + } + } + } + return byDir; +} + +export async function readVirtualPathContent( + api: DeeplakeApi, + memoryTable: string, + sessionsTable: string, + virtualPath: string, +): Promise { + return (await readVirtualPathContents(api, memoryTable, sessionsTable, [virtualPath])).get(virtualPath) ?? null; +} + +export async function listVirtualPathRows( + api: DeeplakeApi, + memoryTable: string, + sessionsTable: string, + dir: string, +): Promise { + return (await listVirtualPathRowsForDirs(api, memoryTable, sessionsTable, [dir])).get(dir.replace(/\/+$/, "") || "/") ?? []; +} + +export async function findVirtualPaths( + api: DeeplakeApi, + memoryTable: string, + sessionsTable: string, + dir: string, + filenamePattern: string, +): Promise { + const normalizedDir = dir.replace(/\/+$/, "") || "/"; + const likePath = `${sqlLike(normalizedDir === "/" ? "" : normalizedDir)}/%`; + const rows = await queryUnionRows( + api, + `SELECT path, NULL::text AS content, NULL::bigint AS size_bytes, '' AS creation_date, 0 AS source_order FROM "${memoryTable}" WHERE path LIKE '${likePath}' ESCAPE '\\' AND filename LIKE '${filenamePattern}' ESCAPE '\\'`, + `SELECT path, NULL::text AS content, NULL::bigint AS size_bytes, '' AS creation_date, 1 AS source_order FROM "${sessionsTable}" WHERE path LIKE '${likePath}' ESCAPE '\\' AND filename LIKE '${filenamePattern}' ESCAPE '\\'`, + ); + + return [...new Set( + rows + .map(row => row["path"]) + .filter((value): value is string => typeof value === "string" && value.length > 0), + )]; +} + +function dedupeRowsByPath(rows: Row[]): Row[] { + const seen = new Set(); + const unique: Row[] = []; + for (const row of rows) { + const path = typeof row["path"] === "string" ? row["path"] : ""; + if (!path || seen.has(path)) continue; + seen.add(path); + unique.push(row); + } + return unique; +} diff --git a/src/shell/deeplake-fs.ts b/src/shell/deeplake-fs.ts index a9cd895..8db0716 100644 --- a/src/shell/deeplake-fs.ts +++ b/src/shell/deeplake-fs.ts @@ -5,6 +5,7 @@ import type { IFileSystem, FsStat, MkdirOptions, RmOptions, CpOptions, FileContent, BufferEncoding, } from "just-bash"; +import { normalizeContent } from "./grep-core.js"; interface ReadFileOptions { encoding?: BufferEncoding } interface WriteFileOptions { encoding?: BufferEncoding } @@ -12,6 +13,7 @@ interface DirentEntry { name: string; isFile: boolean; isDirectory: boolean; isS // ── constants ───────────────────────────────────────────────────────────────── const BATCH_SIZE = 10; +const PREFETCH_BATCH_SIZE = 50; const FLUSH_DEBOUNCE_MS = 200; // ── helpers ─────────────────────────────────────────────────────────────────── @@ -38,6 +40,15 @@ export function guessMime(filename: string): string { ); } +function normalizeSessionMessage(path: string, message: unknown): string { + const raw = typeof message === "string" ? message : JSON.stringify(message); + return normalizeContent(path, raw); +} + +function joinSessionMessages(path: string, messages: unknown[]): string { + return messages.map((message) => normalizeSessionMessage(path, message)).join("\n"); +} + function fsErr(code: string, msg: string, path: string): Error { return Object.assign(new Error(`${code}: ${msg}, '${path}'`), { code }); } @@ -243,7 +254,7 @@ export class DeeplakeFs implements IFileSystem { // Build a lookup: key → session path from sessionPaths // Supports two formats: // 1. /sessions//___.jsonl → key = sessionId - // 2. /sessions/.json (e.g. conv_0_session_1.json) → key = filename stem + // 2. /sessions//.json or .jsonl → key = filename stem const sessionPathsByKey = new Map(); for (const sp of this.sessionPaths) { const hivemind = sp.match(/\/sessions\/[^/]+\/[^/]+_([^.]+)\.jsonl$/); @@ -296,24 +307,50 @@ export class DeeplakeFs implements IFileSystem { */ async prefetch(paths: string[]): Promise { const uncached: string[] = []; + const uncachedSessions: string[] = []; for (const raw of paths) { const p = normPath(raw); if (this.files.get(p) !== null && this.files.get(p) !== undefined) continue; if (this.pending.has(p)) continue; - if (this.sessionPaths.has(p)) continue; if (!this.files.has(p)) continue; // unknown path - uncached.push(p); + if (this.sessionPaths.has(p)) { + uncachedSessions.push(p); + } else { + uncached.push(p); + } } - if (uncached.length === 0) return; - const inList = uncached.map(p => `'${esc(p)}'`).join(", "); - const rows = await this.client.query( - `SELECT path, summary FROM "${this.table}" WHERE path IN (${inList})` - ); - for (const row of rows) { - const p = row["path"] as string; - const text = (row["summary"] as string) ?? ""; - this.files.set(p, Buffer.from(text, "utf-8")); + for (let i = 0; i < uncached.length; i += PREFETCH_BATCH_SIZE) { + const chunk = uncached.slice(i, i + PREFETCH_BATCH_SIZE); + const inList = chunk.map(p => `'${esc(p)}'`).join(", "); + const rows = await this.client.query( + `SELECT path, summary FROM "${this.table}" WHERE path IN (${inList})` + ); + for (const row of rows) { + const p = row["path"] as string; + const text = (row["summary"] as string) ?? ""; + this.files.set(p, Buffer.from(text, "utf-8")); + } + } + + if (!this.sessionsTable) return; + + for (let i = 0; i < uncachedSessions.length; i += PREFETCH_BATCH_SIZE) { + const chunk = uncachedSessions.slice(i, i + PREFETCH_BATCH_SIZE); + const inList = chunk.map(p => `'${esc(p)}'`).join(", "); + const rows = await this.client.query( + `SELECT path, message, creation_date FROM "${this.sessionsTable}" WHERE path IN (${inList}) ORDER BY path, creation_date ASC` + ); + const grouped = new Map(); + for (const row of rows) { + const p = row["path"] as string; + const current = grouped.get(p) ?? []; + current.push(normalizeSessionMessage(p, row["message"])); + grouped.set(p, current); + } + for (const [p, parts] of grouped) { + this.files.set(p, Buffer.from(parts.join("\n"), "utf-8")); + } } } @@ -338,7 +375,7 @@ export class DeeplakeFs implements IFileSystem { `SELECT message FROM "${this.sessionsTable}" WHERE path = '${esc(p)}' ORDER BY creation_date ASC` ); if (rows.length === 0) throw fsErr("ENOENT", "no such file or directory", p); - const text = rows.map(r => typeof r["message"] === "string" ? r["message"] : JSON.stringify(r["message"])).join("\n"); + const text = joinSessionMessages(p, rows.map((row) => row["message"])); const buf = Buffer.from(text, "utf-8"); this.files.set(p, buf); return buf; @@ -390,7 +427,7 @@ export class DeeplakeFs implements IFileSystem { `SELECT message FROM "${this.sessionsTable}" WHERE path = '${esc(p)}' ORDER BY creation_date ASC` ); if (rows.length === 0) throw fsErr("ENOENT", "no such file or directory", p); - const text = rows.map(r => typeof r["message"] === "string" ? r["message"] : JSON.stringify(r["message"])).join("\n"); + const text = joinSessionMessages(p, rows.map((row) => row["message"])); const buf = Buffer.from(text, "utf-8"); this.files.set(p, buf); return text; diff --git a/src/shell/deeplake-shell.ts b/src/shell/deeplake-shell.ts index dcdbfa5..e58dfb8 100644 --- a/src/shell/deeplake-shell.ts +++ b/src/shell/deeplake-shell.ts @@ -29,6 +29,20 @@ import { DeeplakeFs } from "./deeplake-fs.js"; import { createGrepCommand } from "./grep-interceptor.js"; async function main(): Promise { + const isOneShot = process.argv.includes("-c"); + + // One-shot mode is what the pre-tool-use hook invokes via `node shell-bundle -c "..."` + // to execute compound bash commands. Claude Code's Bash tool merges the child's + // stderr into the tool_result string Claude sees, so any `[deeplake-sql]` trace + // written to stderr here pollutes the model's view of the command output. + // Silence trace env vars regardless of how the caller set them. + if (isOneShot) { + delete process.env["HIVEMIND_TRACE_SQL"]; + delete process.env["DEEPLAKE_TRACE_SQL"]; + delete process.env["HIVEMIND_DEBUG"]; + delete process.env["DEEPLAKE_DEBUG"]; + } + const config = loadConfig(); if (!config) { process.stderr.write( @@ -42,8 +56,6 @@ async function main(): Promise { const sessionsTable = process.env["HIVEMIND_SESSIONS_TABLE"] ?? "sessions"; const mount = process.env["HIVEMIND_MOUNT"] ?? "/"; - const isOneShot = process.argv.includes("-c"); - const client = new DeeplakeApi( config.token, config.apiUrl, config.orgId, config.workspaceId, table ); diff --git a/src/shell/grep-core.ts b/src/shell/grep-core.ts index 5fd435f..6e93c5b 100644 --- a/src/shell/grep-core.ts +++ b/src/shell/grep-core.ts @@ -4,8 +4,8 @@ * - src/shell/grep-interceptor.ts (slow-path inside deeplake-shell) * * Responsibilities: - * 1. searchDeeplakeTables: run parallel LIKE/ILIKE queries against both the - * memory table (summaries, column `summary`) AND the sessions table + * 1. searchDeeplakeTables: run one UNION ALL query across both the memory + * table (summaries, column `summary`) AND the sessions table * (raw dialogue, column `message` JSONB), returning {path, content}. * 2. normalizeSessionContent: when a row comes from a session path, turn the * single-line JSON blob into multi-line "Speaker: text" so the standard @@ -44,6 +44,10 @@ export interface SearchOptions { likeOp: "LIKE" | "ILIKE"; /** LIKE-escaped pattern (via sqlLike). */ escapedPattern: string; + /** Optional safe literal anchor for regex searches (e.g. foo.*bar → foo). */ + prefilterPattern?: string; + /** Optional safe literal alternation anchors for regex searches (e.g. foo|bar). */ + prefilterPatterns?: string[]; /** Per-table row cap. */ limit?: number; } @@ -169,7 +173,7 @@ export function normalizeContent(path: string, raw: string): string { let obj: any; try { obj = JSON.parse(raw); } catch { return raw; } - // ── LoCoMo benchmark shape: { turns: [...] } ───────────────────────────── + // ── Turn-array session shape: { turns: [...] } ─────────────────────────── if (Array.isArray(obj.turns)) { const header: string[] = []; if (obj.date_time) header.push(`date: ${obj.date_time}`); @@ -225,10 +229,24 @@ export function normalizeContent(path: string, raw: string): string { // ── SQL search (both tables in parallel) ──────────────────────────────────── +function buildPathCondition(targetPath: string): string { + if (!targetPath || targetPath === "/") return ""; + const clean = targetPath.replace(/\/+$/, ""); + if (/[*?]/.test(clean)) { + const likePattern = sqlLike(clean).replace(/\*/g, "%").replace(/\?/g, "_"); + return `path LIKE '${likePattern}' ESCAPE '\\'`; + } + const base = clean.split("/").pop() ?? ""; + if (base.includes(".")) { + return `path = '${sqlStr(clean)}'`; + } + return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%' ESCAPE '\\')`; +} + /** * Dual-table LIKE/ILIKE search. Casts `summary` (TEXT) and `message` (JSONB) - * to ::text so the same predicate works across both. Both queries run in - * parallel; if one fails, the other's rows are still returned. + * to ::text so the same predicate works across both. The lookup always goes + * through a single UNION ALL query so one grep maps to one SQL search. */ export async function searchDeeplakeTables( api: DeeplakeApi, @@ -236,31 +254,147 @@ export async function searchDeeplakeTables( sessionsTable: string, opts: SearchOptions, ): Promise { - const { pathFilter, contentScanOnly, likeOp, escapedPattern } = opts; + const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns } = opts; const limit = opts.limit ?? 100; + const filterPatterns = contentScanOnly + ? (prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : (prefilterPattern ? [prefilterPattern] : [])) + : [escapedPattern]; + const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns); + const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns); + + const memQuery = `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; + const sessQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; + + const rows = await api.query( + `SELECT path, content, source_order, creation_date FROM (` + + `(${memQuery}) UNION ALL (${sessQuery})` + + `) AS combined ORDER BY path, source_order, creation_date` + ); + + return rows.map(row => ({ + path: String(row["path"]), + content: String(row["content"] ?? ""), + })); +} - const memFilter = contentScanOnly ? "" : ` AND summary::text ${likeOp} '%${escapedPattern}%'`; - const sessFilter = contentScanOnly ? "" : ` AND message::text ${likeOp} '%${escapedPattern}%'`; +/** Build a LIKE pathFilter clause for a `path` column. Returns "" if targetPath is root or empty. */ +export function buildPathFilter(targetPath: string): string { + const condition = buildPathCondition(targetPath); + return condition ? ` AND ${condition}` : ""; +} - const memQuery = `SELECT path, summary::text AS content FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; - const sessQuery = `SELECT path, message::text AS content FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; +/** Build one combined pathFilter clause for multiple grep targets. */ +export function buildPathFilterForTargets(targetPaths: string[]): string { + if (targetPaths.some((targetPath) => !targetPath || targetPath === "/")) return ""; + const conditions = [...new Set( + targetPaths + .map((targetPath) => buildPathCondition(targetPath)) + .filter((condition): condition is string => condition.length > 0), + )]; + if (conditions.length === 0) return ""; + if (conditions.length === 1) return ` AND ${conditions[0]}`; + return ` AND (${conditions.join(" OR ")})`; +} - const [memRows, sessRows] = await Promise.all([ - api.query(memQuery).catch(() => []), - api.query(sessQuery).catch(() => []), - ]); +/** + * Extract a safe literal substring from a regex-like grep pattern. + * Only patterns composed of plain text plus `.*` wildcards qualify. + * Example: `foo.*bar` → `foo` (or `bar`), `colou?r` → null. + */ +export function extractRegexLiteralPrefilter(pattern: string): string | null { + if (!pattern) return null; - const rows: ContentRow[] = []; - for (const r of memRows) rows.push({ path: String(r.path), content: String(r.content ?? "") }); - for (const r of sessRows) rows.push({ path: String(r.path), content: String(r.content ?? "") }); - return rows; + const parts: string[] = []; + let current = ""; + for (let i = 0; i < pattern.length; i++) { + const ch = pattern[i]; + if (ch === "\\") { + const next = pattern[i + 1]; + if (!next) return null; + if (/[dDsSwWbBAZzGkKpP]/.test(next)) return null; + current += next; + i++; + continue; + } + if (ch === ".") { + if (pattern[i + 1] === "*") { + if (current) parts.push(current); + current = ""; + i++; + continue; + } + return null; + } + if ("|()[]{}+?^$".includes(ch) || ch === "*") return null; + current += ch; + } + if (current) parts.push(current); + + const literal = parts.reduce((best, part) => part.length > best.length ? part : best, ""); + return literal.length >= 2 ? literal : null; } -/** Build a LIKE pathFilter clause for a `path` column. Returns "" if targetPath is root or empty. */ -export function buildPathFilter(targetPath: string): string { - if (!targetPath || targetPath === "/") return ""; - const clean = targetPath.replace(/\/+$/, ""); - return ` AND (path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%')`; +export function extractRegexAlternationPrefilters(pattern: string): string[] | null { + if (!pattern.includes("|")) return null; + + const parts: string[] = []; + let current = ""; + let escaped = false; + + for (let i = 0; i < pattern.length; i++) { + const ch = pattern[i]; + if (escaped) { + current += `\\${ch}`; + escaped = false; + continue; + } + if (ch === "\\") { + escaped = true; + continue; + } + if (ch === "|") { + if (!current) return null; + parts.push(current); + current = ""; + continue; + } + if ("()[]{}^$".includes(ch)) return null; + current += ch; + } + + if (escaped || !current) return null; + parts.push(current); + + const literals = [...new Set( + parts + .map((part) => extractRegexLiteralPrefilter(part)) + .filter((part): part is string => typeof part === "string" && part.length >= 2), + )]; + return literals.length > 0 ? literals : null; +} + +export function buildGrepSearchOptions(params: GrepMatchParams, targetPath: string): SearchOptions { + const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(params.pattern); + const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(params.pattern) : null; + const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(params.pattern) : null; + return { + pathFilter: buildPathFilter(targetPath), + contentScanOnly: hasRegexMeta, + likeOp: params.ignoreCase ? "ILIKE" : "LIKE", + escapedPattern: sqlLike(params.pattern), + prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : undefined, + prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)), + }; +} + +function buildContentFilter( + column: string, + likeOp: "LIKE" | "ILIKE", + patterns: string[], +): string { + if (patterns.length === 0) return ""; + if (patterns.length === 1) return ` AND ${column} ${likeOp} '%${patterns[0]}%'`; + return ` AND (${patterns.map((pattern) => `${column} ${likeOp} '%${pattern}%'`).join(" OR ")})`; } // ── Regex refinement (line-by-line grep) ──────────────────────────────────── @@ -329,13 +463,7 @@ export async function grepBothTables( params: GrepMatchParams, targetPath: string, ): Promise { - const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(params.pattern); - const rows = await searchDeeplakeTables(api, memoryTable, sessionsTable, { - pathFilter: buildPathFilter(targetPath), - contentScanOnly: hasRegexMeta, - likeOp: params.ignoreCase ? "ILIKE" : "LIKE", - escapedPattern: sqlLike(params.pattern), - }); + const rows = await searchDeeplakeTables(api, memoryTable, sessionsTable, buildGrepSearchOptions(params, targetPath)); // Defensive path dedup — memory and sessions tables use disjoint path // prefixes in every schema we ship (/summaries/… vs /sessions/…), so the // overlap is theoretical, but we dedupe to match grep-interceptor.ts and diff --git a/src/shell/grep-interceptor.ts b/src/shell/grep-interceptor.ts index 290ec00..debd0cd 100644 --- a/src/shell/grep-interceptor.ts +++ b/src/shell/grep-interceptor.ts @@ -4,14 +4,14 @@ import yargsParser from "yargs-parser"; import type { DeeplakeFs } from "./deeplake-fs.js"; import { + buildGrepSearchOptions, + buildPathFilterForTargets, searchDeeplakeTables, - buildPathFilter, normalizeContent, refineGrepMatches, type GrepMatchParams, type ContentRow, } from "./grep-core.js"; -import { sqlLike } from "../utils/sql.js"; const MAX_FALLBACK_CANDIDATES = 500; @@ -71,28 +71,18 @@ export function createGrepCommand( countOnly: Boolean(parsed.c || parsed["count"]), }; - const likeOp = matchParams.ignoreCase ? "ILIKE" : "LIKE"; - const hasRegexMeta = !matchParams.fixedString && /[.*+?^${}()|[\]\\]/.test(pattern); - const escapedPattern = sqlLike(pattern); - - // Targets can be multiple; we run one SQL round per distinct target so the - // per-table pathFilter can prune server-side. In practice targets is 1-2 - // entries, so the cost is negligible and still faster than the old shell. let rows: ContentRow[] = []; try { - const perTarget = await Promise.race([ - Promise.all(targets.map(t => - searchDeeplakeTables(client, table, sessionsTable ?? "sessions", { - pathFilter: buildPathFilter(t), - contentScanOnly: hasRegexMeta, - likeOp, - escapedPattern, - limit: 100, - }) - )), + const searchOptions = { + ...buildGrepSearchOptions(matchParams, targets[0] ?? ctx.cwd), + pathFilter: buildPathFilterForTargets(targets), + limit: 100, + }; + const queryRows = await Promise.race([ + searchDeeplakeTables(client, table, sessionsTable ?? "sessions", searchOptions), new Promise((_, reject) => setTimeout(() => reject(new Error("timeout")), 3000)), ]); - for (const batch of perTarget) rows.push(...batch); + rows.push(...queryRows); } catch { rows = []; // fall through to in-memory fallback } diff --git a/src/utils/direct-run.ts b/src/utils/direct-run.ts new file mode 100644 index 0000000..85a4c92 --- /dev/null +++ b/src/utils/direct-run.ts @@ -0,0 +1,13 @@ +import { resolve } from "node:path"; +import { fileURLToPath } from "node:url"; + +export function isDirectRun(metaUrl: string): boolean { + const entry = process.argv[1]; + if (!entry) return false; + + try { + return resolve(fileURLToPath(metaUrl)) === resolve(entry); + } catch { + return false; + } +} diff --git a/src/utils/output-cap.ts b/src/utils/output-cap.ts new file mode 100644 index 0000000..c6dea35 --- /dev/null +++ b/src/utils/output-cap.ts @@ -0,0 +1,83 @@ +/** + * Cap large tool outputs before they reach Claude Code. + * + * Claude Code's Bash tool silently persists any tool_result larger than + * ~16 KB to disk and replaces it with a 2 KB "preview" + a path to the + * persisted file. In the locomo `baseline_cloud_100qa_fix123` run, 11 + * out of 14 losing QAs that hit this path NEVER recovered — the model + * saw a 2 KB slice of grep output and gave up instead of reading the + * persisted file. For our workload 8 KB of meaningful content is + * consistently more useful to the model than 2 KB + a dangling file + * pointer, so we cap the plugin-returned output below that threshold + * and replace the tail with a footer that tells the model how to + * narrow the next call. + * + * The cap is applied at line boundaries to keep grep / cat output + * structure intact. A short footer indicates how many lines / bytes + * were elided and suggests refinements ("pipe to | head -N" or + * "tighten the pattern"). + */ + +export const CLAUDE_OUTPUT_CAP_BYTES = 8 * 1024; + +function byteLen(str: string): number { + return Buffer.byteLength(str, "utf8"); +} + +export interface CapOutputOptions { + /** Hint shown in the footer. Examples: "grep", "cat", "for-loop". */ + kind?: string; + /** Override the cap size (bytes). Defaults to CLAUDE_OUTPUT_CAP_BYTES. */ + maxBytes?: number; +} + +/** + * If `output` fits in the cap, return it unchanged. Otherwise truncate + * at the last newline that keeps the total (including footer) under the + * cap, and append a footer describing what was elided. + */ +export function capOutputForClaude(output: string, options: CapOutputOptions = {}): string { + const maxBytes = options.maxBytes ?? CLAUDE_OUTPUT_CAP_BYTES; + if (byteLen(output) <= maxBytes) return output; + + const kind = options.kind ?? "output"; + // Reserve ~200 bytes for the footer so it always fits within maxBytes. + const footerReserve = 220; + const budget = Math.max(1, maxBytes - footerReserve); + + // Find the last newline before the byte budget. Walk forward building + // the slice so the byte boundary stays valid even for multibyte UTF-8. + let running = 0; + const lines = output.split("\n"); + const keptLines: string[] = []; + for (const line of lines) { + const lineBytes = byteLen(line) + 1; // +1 for the newline + if (running + lineBytes > budget) break; + keptLines.push(line); + running += lineBytes; + } + + if (keptLines.length === 0) { + // A single line is already over budget — take a prefix and mark it. + // `Buffer.subarray` (non-deprecated replacement for `.slice`) cuts at a + // byte boundary, which can split a multi-byte UTF-8 sequence and leak + // U+FFFD into the output. Back up to the last valid UTF-8 start byte + // (any byte whose top two bits aren't `10xxxxxx` — i.e. not a + // continuation byte) so `toString("utf8")` decodes cleanly. + const buf = Buffer.from(output, "utf8"); + let cutByte = Math.min(budget, buf.length); + while (cutByte > 0 && (buf[cutByte] & 0xc0) === 0x80) cutByte--; + const slice = buf.subarray(0, cutByte).toString("utf8"); + const footer = `\n... [${kind} truncated: ${(byteLen(output) / 1024).toFixed(1)} KB total; refine with '| head -N' or a tighter pattern]`; + return slice + footer; + } + + // `split("\n")` on `"a\nb\n"` produces `["a", "b", ""]` — the trailing + // empty entry is a newline terminator, not a real extra line. Counting + // it would over-report the elided-line tally in the footer. + const totalLines = lines.length - (lines[lines.length - 1] === "" ? 1 : 0); + const elidedLines = Math.max(0, totalLines - keptLines.length); + const elidedBytes = byteLen(output) - byteLen(keptLines.join("\n")); + const footer = `\n... [${kind} truncated: ${elidedLines} more lines (${(elidedBytes / 1024).toFixed(1)} KB) elided — refine with '| head -N' or a tighter pattern]`; + return keptLines.join("\n") + footer; +} diff --git a/vitest.config.ts b/vitest.config.ts index 864e7a4..dccf756 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -59,6 +59,41 @@ export default defineConfig({ functions: 90, lines: 90, }, + "src/hooks/session-queue.ts": { + statements: 80, + branches: 80, + functions: 80, + lines: 80, + }, + // fix/index-md-include-sessions — 5-fix PR stacked on PR #61. + // output-cap.ts is new in this PR (fix #5); virtual-table-query.ts was + // heavily modified by fix #1 (index.md builder / fallback) and fix #4 + // (ESCAPE '\' on LIKE clauses). Held at 90 to match the rest of the + // plugin-hot-path files already at that bar. + "src/utils/output-cap.ts": { + statements: 90, + branches: 90, + functions: 90, + lines: 90, + }, + "src/hooks/virtual-table-query.ts": { + statements: 90, + branches: 90, + functions: 90, + lines: 90, + }, + "src/hooks/pre-tool-use.ts": { + statements: 90, + branches: 90, + functions: 90, + lines: 90, + }, + "src/hooks/memory-path-utils.ts": { + statements: 90, + branches: 90, + functions: 90, + lines: 90, + }, }, }, },