From dd8a0e75f1eee6c24810ebc08b9eaaf8da3156c6 Mon Sep 17 00:00:00 2001 From: davitbun Date: Mon, 20 Apr 2026 13:54:48 -0700 Subject: [PATCH 1/7] Align session retrieval with local transcript files --- claude-code/bundle/pre-tool-use.js | 681 +++++++++++++++-- claude-code/bundle/shell/deeplake-shell.js | 582 ++++++++++++--- .../tests/bash-command-compiler.test.ts | 86 +++ .../tests/benchmark-replay-parity.test.ts | 352 +++++++++ claude-code/tests/grep-core.test.ts | 466 ++++++++++-- claude-code/tests/grep-direct.test.ts | 5 +- claude-code/tests/grep-interceptor.test.ts | 15 +- claude-code/tests/hooks-source.test.ts | 53 ++ claude-code/tests/sessions-table.test.ts | 26 + claude-code/tests/virtual-table-query.test.ts | 113 ++- codex/bundle/pre-tool-use.js | 683 ++++++++++++++++-- codex/bundle/shell/deeplake-shell.js | 582 ++++++++++++--- src/hooks/bash-command-compiler.ts | 59 +- src/hooks/codex/pre-tool-use.ts | 24 +- src/hooks/grep-direct.ts | 15 +- src/hooks/memory-path-utils.ts | 61 +- src/hooks/pre-tool-use.ts | 21 +- src/hooks/virtual-table-query.ts | 130 +++- src/shell/deeplake-fs.ts | 71 +- src/shell/grep-core.ts | 397 ++++++++-- src/shell/grep-interceptor.ts | 8 +- src/utils/retrieval-mode.ts | 14 + src/utils/summary-format.ts | 184 +++++ 23 files changed, 4067 insertions(+), 561 deletions(-) create mode 100644 claude-code/tests/benchmark-replay-parity.test.ts create mode 100644 src/utils/retrieval-mode.ts create mode 100644 src/utils/summary-format.ts diff --git a/claude-code/bundle/pre-tool-use.js b/claude-code/bundle/pre-tool-use.js index e316382..12b65db 100755 --- a/claude-code/bundle/pre-tool-use.js +++ b/claude-code/bundle/pre-tool-use.js @@ -289,6 +289,29 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + /** Create the standard BM25 summary index for a memory table. */ + async createSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const indexName = this.buildLookupIndexName(table, "summary_bm25"); + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" USING deeplake_index ("summary")`); + } + /** Ensure the standard BM25 summary index exists, using a local freshness marker to avoid repeated CREATEs. */ + async ensureSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const suffix = "summary_bm25"; + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + try { + await this.createSummaryBm25Index(table); + this.markLookupIndexReady(table, suffix); + } catch (e) { + if (isDuplicateIndexError(e)) { + this.markLookupIndexReady(table, suffix); + return; + } + throw e; + } + } buildLookupIndexName(table, suffix) { return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); } @@ -414,7 +437,28 @@ function isDirectRun(metaUrl) { } } +// dist/src/utils/retrieval-mode.js +function isSessionsOnlyMode() { + const raw = process.env["HIVEMIND_SESSIONS_ONLY"] ?? process.env["DEEPLAKE_SESSIONS_ONLY"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} +function isIndexDisabled() { + const raw = process.env["HIVEMIND_DISABLE_INDEX"] ?? process.env["DEEPLAKE_DISABLE_INDEX"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} +function isSummaryBm25Disabled() { + const raw = process.env["HIVEMIND_DISABLE_SUMMARY_BM25"] ?? process.env["DEEPLAKE_DISABLE_SUMMARY_BM25"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} + // dist/src/shell/grep-core.js +var DEFAULT_GREP_CANDIDATE_LIMIT = Number(process.env["HIVEMIND_GREP_LIMIT"] ?? process.env["DEEPLAKE_GREP_LIMIT"] ?? 500); +function escapeRegexLiteral(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} +function normalizeGrepRegexPattern(pattern) { + return pattern.replace(/\\([|(){}+?])/g, "$1").replace(/\\/g, "\\b"); +} var TOOL_INPUT_FIELDS = [ "command", "file_path", @@ -577,24 +621,9 @@ function normalizeContent(path, raw) { } catch { return raw; } - if (Array.isArray(obj.turns)) { - const header = []; - if (obj.date_time) - header.push(`date: ${obj.date_time}`); - if (obj.speakers) { - const s = obj.speakers; - const names = [s.speaker_a, s.speaker_b].filter(Boolean).join(", "); - if (names) - header.push(`speakers: ${names}`); - } - const lines = obj.turns.map((t) => { - const sp = String(t?.speaker ?? t?.name ?? "?").trim(); - const tx = String(t?.text ?? t?.content ?? "").replace(/\s+/g, " ").trim(); - const tag = t?.dia_id ? `[${t.dia_id}] ` : ""; - return `${tag}${sp}: ${tx}`; - }); - const out2 = [...header, ...lines].join("\n"); - return out2.trim() ? out2 : raw; + if (Array.isArray(obj.turns) || Array.isArray(obj.dialogue)) { + return `${JSON.stringify(obj, null, 2)} +`; } const stripRecalled = (t) => { const i = t.indexOf(""); @@ -638,14 +667,34 @@ function buildPathCondition(targetPath) { return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%')`; } async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { - const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns } = opts; - const limit = opts.limit ?? 100; + const { pathFilter, contentScanOnly, likeOp, escapedPattern, regexPattern, prefilterPattern, prefilterPatterns, bm25QueryText } = opts; + const limit = opts.limit ?? DEFAULT_GREP_CANDIDATE_LIMIT; const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : [escapedPattern]; - const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns); - const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns); - const memQuery = `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; - const sessQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; - const rows = await api.query(`SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`); + const ignoreCase = likeOp === "ILIKE"; + const likeMemFilter = buildContentFilter("summary::text", likeOp, filterPatterns); + const likeSessFilter = buildContentFilter("message::text", likeOp, filterPatterns); + const regexMemFilter = regexPattern ? buildRegexFilter("summary::text", regexPattern, ignoreCase) : ""; + const regexSessFilter = regexPattern ? buildRegexFilter("message::text", regexPattern, ignoreCase) : ""; + const primarySessFilter = `${likeSessFilter}${regexSessFilter}`; + const fallbackSessFilter = likeSessFilter; + const hasSqlRegexFilter = Boolean(regexMemFilter || regexSessFilter); + const sessionsOnly = isSessionsOnlyMode(); + const useSummaryBm25 = !sessionsOnly && !isSummaryBm25Disabled() && Boolean(bm25QueryText); + const shouldUseFallbackCapablePrimary = useSummaryBm25 || hasSqlRegexFilter; + const ensureSummaryBm25Index = api.ensureSummaryBm25Index; + if (useSummaryBm25 && typeof ensureSummaryBm25Index === "function") { + await ensureSummaryBm25Index.call(api, memoryTable).catch(() => { + }); + } + const buildCombinedQuery = (memFilter, sessFilter, useBm25Summary = false) => { + const memQuery = useBm25Summary ? buildSummaryBm25Query(memoryTable, pathFilter, bm25QueryText ?? "", limit) : `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; + const sessQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; + return sessionsOnly ? `SELECT path, content, source_order, creation_date FROM (${sessQuery}) AS combined ORDER BY path, source_order, creation_date` : `SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`; + }; + const primaryMemFilter = useSummaryBm25 ? "" : `${likeMemFilter}${regexMemFilter}`; + const primaryQuery = buildCombinedQuery(primaryMemFilter, primarySessFilter, useSummaryBm25); + const fallbackQuery = buildCombinedQuery(likeMemFilter, fallbackSessFilter, false); + const rows = shouldUseFallbackCapablePrimary ? await api.query(primaryQuery).catch(() => api.query(fallbackQuery)) : await api.query(primaryQuery); return rows.map((row) => ({ path: String(row["path"]), content: String(row["content"] ?? "") @@ -666,6 +715,10 @@ function extractRegexLiteralPrefilter(pattern) { const next = pattern[i + 1]; if (!next) return null; + if (/[bByYmM<>]/.test(next)) { + i++; + continue; + } if (/[dDsSwWbBAZzGkKpP]/.test(next)) return null; current += next; @@ -692,13 +745,14 @@ function extractRegexLiteralPrefilter(pattern) { return literal.length >= 2 ? literal : null; } function extractRegexAlternationPrefilters(pattern) { - if (!pattern.includes("|")) + const unwrapped = unwrapWholeRegexGroup(pattern); + if (!unwrapped.includes("|")) return null; const parts = []; let current = ""; let escaped = false; - for (let i = 0; i < pattern.length; i++) { - const ch = pattern[i]; + for (let i = 0; i < unwrapped.length; i++) { + const ch = unwrapped[i]; if (escaped) { current += `\\${ch}`; escaped = false; @@ -726,33 +780,177 @@ function extractRegexAlternationPrefilters(pattern) { return literals.length > 0 ? literals : null; } function buildGrepSearchOptions(params, targetPath) { - const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(params.pattern); - const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(params.pattern) : null; - const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(params.pattern) : null; + const normalizedPattern = params.fixedString ? params.pattern : normalizeGrepRegexPattern(params.pattern); + const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(normalizedPattern); + const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(normalizedPattern) : null; + const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(normalizedPattern) : null; + const bm25QueryText = buildSummaryBm25QueryText(normalizedPattern, params.fixedString, literalPrefilter, alternationPrefilters); + const regexBase = params.fixedString ? escapeRegexLiteral(normalizedPattern) : normalizedPattern; + const sqlRegexPattern = params.wordMatch ? `\\b(?:${regexBase})\\b` : hasRegexMeta ? regexBase : void 0; return { pathFilter: buildPathFilter(targetPath), contentScanOnly: hasRegexMeta, likeOp: params.ignoreCase ? "ILIKE" : "LIKE", escapedPattern: sqlLike(params.pattern), + regexPattern: sqlRegexPattern, prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : void 0, - prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)) + prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)), + bm25QueryText: bm25QueryText ?? void 0, + limit: DEFAULT_GREP_CANDIDATE_LIMIT }; } +function buildSummaryBm25QueryText(pattern, fixedString, literalPrefilter, alternationPrefilters) { + const rawTokens = alternationPrefilters && alternationPrefilters.length > 0 ? alternationPrefilters : literalPrefilter ? [literalPrefilter] : [pattern]; + const cleaned = [...new Set(rawTokens.flatMap((token) => token.replace(/\\b/g, " ").replace(/[.*+?^${}()[\]{}|\\]/g, " ").split(/\s+/)).map((token) => token.trim()).filter((token) => token.length >= 2))]; + if (cleaned.length === 0) { + return fixedString && pattern.trim().length >= 2 ? pattern.trim() : null; + } + return cleaned.join(" "); +} function buildContentFilter(column, likeOp, patterns) { + const predicate = buildContentPredicate(column, likeOp, patterns); + return predicate ? ` AND ${predicate}` : ""; +} +function buildRegexFilter(column, pattern, ignoreCase) { + const predicate = buildRegexPredicate(column, pattern, ignoreCase); + return predicate ? ` AND ${predicate}` : ""; +} +function buildSummaryBm25Query(memoryTable, pathFilter, queryText, limit) { + return `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter} ORDER BY (summary <#> '${sqlStr(queryText)}') DESC LIMIT ${limit}`; +} +function toSqlRegexPattern(pattern, ignoreCase) { + if (!pattern) + return null; + if (ignoreCase) + return null; + try { + new RegExp(pattern); + return translateRegexPatternToSql(pattern); + } catch { + return pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + } +} +function isSqlRegexPushdownSafe(pattern) { + return !/[\\[\]{}^$]/.test(pattern) && !/\(\?/.test(pattern); +} +function unwrapWholeRegexGroup(pattern) { + if (!pattern.startsWith("(") || !pattern.endsWith(")")) + return pattern; + let depth = 0; + let escaped = false; + for (let i = 0; i < pattern.length; i++) { + const ch = pattern[i]; + if (escaped) { + escaped = false; + continue; + } + if (ch === "\\") { + escaped = true; + continue; + } + if (ch === "(") + depth++; + if (ch === ")") { + depth--; + if (depth === 0 && i !== pattern.length - 1) + return pattern; + } + } + if (depth !== 0) + return pattern; + if (pattern.startsWith("(?:")) + return pattern.slice(3, -1); + return pattern.slice(1, -1); +} +function translateRegexPatternToSql(pattern) { + let out = ""; + for (let i = 0; i < pattern.length; i++) { + const ch = pattern[i]; + if (ch === "\\") { + const next = pattern[i + 1]; + if (!next) + return pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + i++; + switch (next) { + case "d": + out += "[[:digit:]]"; + continue; + case "D": + out += "[^[:digit:]]"; + continue; + case "s": + out += "[[:space:]]"; + continue; + case "S": + out += "[^[:space:]]"; + continue; + case "w": + out += "[[:alnum:]_]"; + continue; + case "W": + out += "[^[:alnum:]_]"; + continue; + case "b": + out += "\\y"; + continue; + case "A": + case "B": + case "G": + case "K": + case "P": + case "p": + case "z": + return null; + default: + out += `\\${next}`; + continue; + } + } + if (ch === "(" && pattern.startsWith("(?:", i)) { + out += "("; + i += 2; + continue; + } + if (ch === "(" && /^[(]\?<[^>]+>/.test(pattern.slice(i))) { + const named = pattern.slice(i).match(/^\(\?<[^>]+>/); + if (!named) + return null; + out += "("; + i += named[0].length - 1; + continue; + } + if (ch === "(" && pattern[i + 1] === "?") + return null; + out += ch; + } + return out; +} +function buildContentPredicate(column, likeOp, patterns) { if (patterns.length === 0) return ""; if (patterns.length === 1) - return ` AND ${column} ${likeOp} '%${patterns[0]}%'`; - return ` AND (${patterns.map((pattern) => `${column} ${likeOp} '%${pattern}%'`).join(" OR ")})`; + return `${column} ${likeOp} '%${patterns[0]}%'`; + return `(${patterns.map((pattern) => `${column} ${likeOp} '%${pattern}%'`).join(" OR ")})`; +} +function buildRegexPredicate(column, pattern, ignoreCase) { + if (!pattern) + return ""; + if (!isSqlRegexPushdownSafe(pattern)) + return ""; + const sqlPattern = toSqlRegexPattern(pattern, ignoreCase); + if (!sqlPattern) + return ""; + return `${column} ~ '${sqlStr(sqlPattern)}'`; } function compileGrepRegex(params) { - let reStr = params.fixedString ? params.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") : params.pattern; + const normalizedPattern = params.fixedString ? params.pattern : normalizeGrepRegexPattern(params.pattern); + let reStr = params.fixedString ? escapeRegexLiteral(normalizedPattern) : normalizedPattern; if (params.wordMatch) - reStr = `\\b${reStr}\\b`; + reStr = `\\b(?:${reStr})\\b`; try { return new RegExp(reStr, params.ignoreCase ? "i" : ""); } catch { - return new RegExp(params.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), params.ignoreCase ? "i" : ""); + return new RegExp(escapeRegexLiteral(normalizedPattern), params.ignoreCase ? "i" : ""); } } function refineGrepMatches(rows, params, forceMultiFilePrefix) { @@ -786,12 +984,12 @@ function refineGrepMatches(rows, params, forceMultiFilePrefix) { } return output; } -async function grepBothTables(api, memoryTable, sessionsTable, params, targetPath) { +async function grepBothTables(api, memoryTable, sessionsTable, params, targetPath, forceMultiFilePrefix) { const rows = await searchDeeplakeTables(api, memoryTable, sessionsTable, buildGrepSearchOptions(params, targetPath)); const seen = /* @__PURE__ */ new Set(); const unique = rows.filter((r) => seen.has(r.path) ? false : (seen.add(r.path), true)); const normalized = unique.map((r) => ({ path: r.path, content: normalizeContent(r.path, r.content) })); - return refineGrepMatches(normalized, params); + return refineGrepMatches(normalized, params, forceMultiFilePrefix); } // dist/src/hooks/grep-direct.js @@ -877,7 +1075,7 @@ function parseBashGrep(cmd) { const tokens = tokenizeGrepStage(first); if (!tokens || tokens.length === 0) return null; - let ignoreCase = false, wordMatch = false, filesOnly = false, countOnly = false, lineNumber = false, invertMatch = false, fixedString = isFixed; + let recursive = false, ignoreCase = false, wordMatch = false, filesOnly = false, countOnly = false, lineNumber = false, invertMatch = false, fixedString = isFixed; const explicitPatterns = []; let ti = 1; while (ti < tokens.length) { @@ -969,6 +1167,8 @@ function parseBashGrep(cmd) { break; case "r": case "R": + recursive = true; + break; case "E": break; case "A": @@ -1010,6 +1210,7 @@ function parseBashGrep(cmd) { return { pattern, targetPath: target, + recursive, ignoreCase, wordMatch, filesOnly, @@ -1032,25 +1233,240 @@ async function handleGrepDirect(api, table, sessionsTable, params) { invertMatch: params.invertMatch, fixedString: params.fixedString }; - const output = await grepBothTables(api, table, sessionsTable, matchParams, params.targetPath); + const output = await grepBothTables(api, table, sessionsTable, matchParams, params.targetPath, params.recursive ? true : void 0); return output.join("\n") || "(no matches)"; } +// dist/src/utils/summary-format.js +function escapeRegex(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} +function basename(path) { + const trimmed = path.replace(/\/+$/, ""); + const idx = trimmed.lastIndexOf("/"); + return idx === -1 ? trimmed : trimmed.slice(idx + 1); +} +function extractSection(text, heading) { + const re = new RegExp(`^## ${escapeRegex(heading)}\\s*\\n([\\s\\S]*?)(?=\\n## |$)`, "m"); + const match = text.match(re); + return match ? match[1].trim() : null; +} +function extractHeaderField(text, field) { + const re = new RegExp(`^- \\*\\*${escapeRegex(field)}\\*\\*:\\s*(.+)$`, "m"); + const match = text.match(re); + return match ? match[1].trim() : null; +} +function compactText(value) { + return value.replace(/\s+/g, " ").trim(); +} +function splitMetadataList(value) { + if (!value) + return []; + return [...new Set(value.split(/\s*(?:,|;|&|\band\b)\s*/i).map((part) => compactText(part)).filter((part) => part.length >= 2 && !/^unknown$/i.test(part)))]; +} +function extractBullets(section, limit = 3) { + if (!section) + return []; + return section.split("\n").map((line) => line.trim()).filter((line) => line.startsWith("- ")).map((line) => compactText(line.slice(2))).filter(Boolean).slice(0, limit); +} +function extractSummaryDate(text) { + return extractHeaderField(text, "Date") ?? extractHeaderField(text, "Started"); +} +function extractSummaryParticipants(text) { + return extractHeaderField(text, "Participants") ?? extractHeaderField(text, "Speakers"); +} +function extractSummaryTopics(text) { + return extractHeaderField(text, "Topics"); +} +function extractSummarySource(text) { + return extractHeaderField(text, "Source"); +} +function buildSummaryBlurb(text) { + const participants = extractSummaryParticipants(text); + const topics = extractSummaryTopics(text); + const factBullets = extractBullets(extractSection(text, "Searchable Facts"), 3); + const keyBullets = factBullets.length > 0 ? factBullets : extractBullets(extractSection(text, "Key Facts"), 3); + const whatHappened = compactText(extractSection(text, "What Happened") ?? ""); + const parts = []; + if (participants) + parts.push(participants); + if (topics) + parts.push(topics); + if (keyBullets.length > 0) + parts.push(keyBullets.join("; ")); + if (parts.length === 0 && whatHappened) + parts.push(whatHappened); + const blurb = parts.join(" | ").slice(0, 300).trim(); + return blurb || "completed"; +} +function truncate(value, max) { + return value.length > max ? `${value.slice(0, max - 1).trimEnd()}\u2026` : value; +} +function formatIndexTimestamp(value) { + if (!value) + return ""; + if (!/^\d{4}-\d{2}-\d{2}T/.test(value)) + return value; + const parsed = Date.parse(value); + if (!Number.isFinite(parsed)) + return value; + const ts = new Date(parsed); + const yyyy = ts.getUTCFullYear(); + const mm = String(ts.getUTCMonth() + 1).padStart(2, "0"); + const dd = String(ts.getUTCDate()).padStart(2, "0"); + const hh = String(ts.getUTCHours()).padStart(2, "0"); + const min = String(ts.getUTCMinutes()).padStart(2, "0"); + return `${yyyy}-${mm}-${dd} ${hh}:${min} UTC`; +} +function buildSummaryIndexEntry(row) { + const path = typeof row.path === "string" ? row.path : ""; + if (!path) + return null; + if (path.startsWith("/summaries/") && !/^\/summaries\/[^/]+\/[^/]+$/.test(path)) + return null; + const summary = typeof row.summary === "string" ? row.summary : ""; + const project = typeof row.project === "string" ? row.project.trim() : ""; + const description = typeof row.description === "string" ? compactText(row.description) : ""; + const creationDate = typeof row.creation_date === "string" ? row.creation_date : ""; + const lastUpdateDate = typeof row.last_update_date === "string" ? row.last_update_date : ""; + const label = basename(path) || path; + const date = summary ? extractSummaryDate(summary) ?? creationDate : creationDate; + const participantsText = summary ? extractSummaryParticipants(summary) ?? "" : ""; + const topicsText = summary ? extractSummaryTopics(summary) ?? "" : ""; + const source = summary ? extractSummarySource(summary) ?? "" : ""; + const structuredBlurb = summary ? buildSummaryBlurb(summary) : ""; + const blurb = structuredBlurb && structuredBlurb !== "completed" ? structuredBlurb : truncate(description, 220); + return { + path, + label, + project, + description, + date, + createdAt: creationDate, + updatedAt: lastUpdateDate, + sortDate: lastUpdateDate || creationDate || date, + participantsText, + participants: splitMetadataList(participantsText), + topicsText, + topics: splitMetadataList(topicsText), + source, + blurb + }; +} +function formatSummaryIndexEntry(entry) { + const parts = [`- [summary: ${entry.label}](${entry.path})`]; + if (entry.source) + parts.push(`[session](${entry.source})`); + if (entry.date) + parts.push(truncate(entry.date, 40)); + const visibleTime = entry.updatedAt || entry.createdAt; + if (visibleTime) + parts.push(`updated: ${truncate(formatIndexTimestamp(visibleTime), 24)}`); + if (entry.participantsText) + parts.push(truncate(entry.participantsText, 80)); + if (entry.topicsText) + parts.push(`topics: ${truncate(entry.topicsText, 90)}`); + if (entry.project) + parts.push(`[${truncate(entry.project, 40)}]`); + if (entry.blurb && entry.blurb !== "completed") + parts.push(truncate(entry.blurb, 220)); + return parts.join(" \u2014 "); +} +function buildSummaryIndexLine(row) { + const entry = "label" in row && typeof row.label === "string" ? row : buildSummaryIndexEntry(row); + return entry ? formatSummaryIndexEntry(entry) : null; +} + // dist/src/hooks/virtual-table-query.js function normalizeSessionPart(path, content) { return normalizeContent(path, content); } function buildVirtualIndexContent(rows) { - const lines = ["# Memory Index", "", `${rows.length} sessions:`, ""]; - for (const row of rows) { - const path = row["path"]; - const project = row["project"] || ""; - const description = (row["description"] || "").slice(0, 120); - const date = (row["creation_date"] || "").slice(0, 10); - lines.push(`- [${path}](${path}) ${date} ${project ? `[${project}]` : ""} ${description}`); + const entries = rows.map((row) => buildSummaryIndexEntry(row)).filter((entry) => entry !== null).sort((a, b) => (b.sortDate || "").localeCompare(a.sortDate || "") || a.path.localeCompare(b.path)); + const lines = [ + "# Memory Index", + "", + "Persistent wiki directory. Start here, open the linked summary first, then open the paired raw session if you need exact wording or temporal grounding.", + "", + "## How To Use", + "", + "- Use the People section when the question names a person.", + "- In the catalog, each row links to both the summary page and its source session.", + "- Once you have a likely match, open that exact summary or session instead of broadening into wide grep scans.", + "" + ]; + const peopleLines = buildPeopleDirectory(entries); + if (peopleLines.length > 0) { + lines.push("## People"); + lines.push(""); + lines.push(...peopleLines); + lines.push(""); + } + const projectLines = buildProjectDirectory(entries); + if (projectLines.length > 0) { + lines.push("## Projects"); + lines.push(""); + lines.push(...projectLines); + lines.push(""); + } + lines.push("## Summary To Session Catalog"); + lines.push(""); + for (const entry of entries) { + const line = buildSummaryIndexLine(entry); + if (line) + lines.push(line); } return lines.join("\n"); } +function formatEntryLink(entry) { + const session = entry.source ? ` -> [session](${entry.source})` : ""; + return `[${entry.label}](${entry.path})${session}`; +} +function topList(counts, limit) { + return [...counts.entries()].sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0])).slice(0, limit).map(([value]) => value); +} +function buildPeopleDirectory(entries) { + const people = /* @__PURE__ */ new Map(); + for (const entry of entries) { + for (const person of entry.participants) { + const current = people.get(person) ?? { count: 0, topics: /* @__PURE__ */ new Map(), recent: [] }; + current.count += 1; + for (const topic of entry.topics) { + current.topics.set(topic, (current.topics.get(topic) ?? 0) + 1); + } + current.recent.push(entry); + people.set(person, current); + } + } + return [...people.entries()].sort((a, b) => b[1].count - a[1].count || a[0].localeCompare(b[0])).map(([person, info]) => { + const topics = topList(info.topics, 3); + const recent = info.recent.slice(0, 2).map((entry) => formatEntryLink(entry)).join(", "); + const parts = [`- ${person} \u2014 ${info.count} summaries`]; + if (topics.length > 0) + parts.push(`topics: ${topics.join("; ")}`); + if (recent) + parts.push(`recent: ${recent}`); + return parts.join(" \u2014 "); + }); +} +function buildProjectDirectory(entries) { + const projects = /* @__PURE__ */ new Map(); + for (const entry of entries) { + if (!entry.project) + continue; + const current = projects.get(entry.project) ?? { count: 0, recent: [] }; + current.count += 1; + current.recent.push(entry); + projects.set(entry.project, current); + } + return [...projects.entries()].sort((a, b) => b[1].count - a[1].count || a[0].localeCompare(b[0])).map(([project, info]) => { + const recent = info.recent.slice(0, 2).map((entry) => formatEntryLink(entry)).join(", "); + const parts = [`- ${project} \u2014 ${info.count} summaries`]; + if (recent) + parts.push(`recent: ${recent}`); + return parts.join(" \u2014 "); + }); +} function buildUnionQuery(memoryQuery, sessionsQuery) { return `SELECT path, content, size_bytes, creation_date, source_order FROM ((${memoryQuery}) UNION ALL (${sessionsQuery})) AS combined ORDER BY path, source_order, creation_date`; } @@ -1065,6 +1481,9 @@ function buildDirFilter(dirs) { return ` WHERE ${clauses.join(" OR ")}`; } async function queryUnionRows(api, memoryQuery, sessionsQuery) { + if (isSessionsOnlyMode()) { + return api.query(`SELECT path, content, size_bytes, creation_date, source_order FROM (${sessionsQuery}) AS combined ORDER BY path, source_order, creation_date`); + } const unionQuery = buildUnionQuery(memoryQuery, sessionsQuery); try { return await api.query(unionQuery); @@ -1081,7 +1500,13 @@ async function readVirtualPathContents(api, memoryTable, sessionsTable, virtualP const result = new Map(uniquePaths.map((path) => [path, null])); if (uniquePaths.length === 0) return result; - const inList = buildInList(uniquePaths); + if (isIndexDisabled() && uniquePaths.includes("/index.md")) { + result.set("/index.md", null); + } + const queryPaths = isIndexDisabled() ? uniquePaths.filter((path) => path !== "/index.md") : uniquePaths; + if (queryPaths.length === 0) + return result; + const inList = buildInList(queryPaths); const rows = await queryUnionRows(api, `SELECT path, summary::text AS content, NULL::bigint AS size_bytes, '' AS creation_date, 0 AS source_order FROM "${memoryTable}" WHERE path IN (${inList})`, `SELECT path, message::text AS content, NULL::bigint AS size_bytes, COALESCE(creation_date::text, '') AS creation_date, 1 AS source_order FROM "${sessionsTable}" WHERE path IN (${inList})`); const memoryHits = /* @__PURE__ */ new Map(); const sessionHits = /* @__PURE__ */ new Map(); @@ -1099,7 +1524,7 @@ async function readVirtualPathContents(api, memoryTable, sessionsTable, virtualP sessionHits.set(path, current); } } - for (const path of uniquePaths) { + for (const path of queryPaths) { if (memoryHits.has(path)) { result.set(path, memoryHits.get(path) ?? null); continue; @@ -1109,8 +1534,8 @@ async function readVirtualPathContents(api, memoryTable, sessionsTable, virtualP result.set(path, sessionParts.join("\n")); } } - if (result.get("/index.md") === null && uniquePaths.includes("/index.md")) { - const rows2 = await api.query(`SELECT path, project, description, creation_date FROM "${memoryTable}" WHERE path LIKE '/summaries/%' ORDER BY creation_date DESC`).catch(() => []); + if (!isSessionsOnlyMode() && !isIndexDisabled() && result.get("/index.md") === null && uniquePaths.includes("/index.md")) { + const rows2 = await api.query(`SELECT path, project, description, summary, creation_date, last_update_date FROM "${memoryTable}" WHERE path LIKE '/summaries/%' ORDER BY last_update_date DESC, creation_date DESC`).catch(() => []); result.set("/index.md", buildVirtualIndexContent(rows2)); } return result; @@ -1172,11 +1597,19 @@ function splitTopLevel(input, operators) { const parts = []; let current = ""; let quote = null; + let escaped = false; for (let i = 0; i < input.length; i++) { const ch = input[i]; + if (escaped) { + current += ch; + escaped = false; + continue; + } if (quote) { if (ch === quote) quote = null; + else if (ch === "\\" && quote === '"') + escaped = true; current += ch; continue; } @@ -1185,6 +1618,11 @@ function splitTopLevel(input, operators) { current += ch; continue; } + if (ch === "\\" && i + 1 < input.length) { + current += ch; + escaped = true; + continue; + } const matched = operators.find((op) => input.startsWith(op, i)); if (matched) { const trimmed2 = current.trim(); @@ -1196,7 +1634,7 @@ function splitTopLevel(input, operators) { } current += ch; } - if (quote) + if (quote || escaped) return null; const trimmed = current.trim(); if (trimmed) @@ -1260,8 +1698,8 @@ function expandBraceToken(token) { return variants.flatMap((variant) => expandBraceToken(`${prefix}${variant}${suffix}`)); } function stripAllowedModifiers(segment) { - const ignoreMissing = /\s2>\/dev\/null\s*$/.test(segment); - const clean = segment.replace(/\s2>\/dev\/null\s*$/g, "").replace(/\s2>&1\s*/g, " ").trim(); + const ignoreMissing = /\s2>\/dev\/null(?=\s*(?:\||$))/.test(segment); + const clean = segment.replace(/\s2>\/dev\/null(?=\s*(?:\||$))/g, "").replace(/\s2>&1(?=\s*(?:\||$))/g, "").trim(); return { clean, ignoreMissing }; } function hasUnsupportedRedirection(segment) { @@ -1330,7 +1768,7 @@ function isValidPipelineHeadTailStage(stage) { return tokens[1] === "-n" && /^-?\d+$/.test(tokens[2]); return false; } -function parseFindNamePatterns(tokens) { +function parseFindSpec(tokens) { const patterns = []; for (let i = 2; i < tokens.length; i++) { const token = tokens[i]; @@ -1348,9 +1786,22 @@ function parseFindNamePatterns(tokens) { i += 1; continue; } + if (token === "-exec") { + const execTokens = tokens.slice(i + 1); + if (patterns.length === 0 || execTokens.length < 4) + return null; + const terminator = execTokens.at(-1); + const target = execTokens.at(-2); + if (terminator !== "\\;" && terminator !== ";" || target !== "{}") + return null; + return { + patterns, + execGrepCmd: execTokens.slice(0, -1).join(" ") + }; + } return null; } - return patterns.length > 0 ? patterns : null; + return patterns.length > 0 ? { patterns, execGrepCmd: null } : null; } function parseCompiledSegment(segment) { const { clean, ignoreMissing } = stripAllowedModifiers(segment); @@ -1437,15 +1888,32 @@ function parseCompiledSegment(segment) { const dir = tokens[1]; if (!dir) return null; - const patterns = parseFindNamePatterns(tokens); - if (!patterns) + const spec = parseFindSpec(tokens); + if (!spec) return null; + const { patterns, execGrepCmd } = spec; const countOnly = pipeline.length === 2 && /^wc\s+-l\s*$/.test(pipeline[1].trim()); if (countOnly) { if (patterns.length !== 1) return null; return { kind: "find", dir, pattern: patterns[0], countOnly }; } + if (execGrepCmd) { + const grepParams2 = parseBashGrep(execGrepCmd); + if (!grepParams2) + return null; + let lineLimit = 0; + if (pipeline.length === 2) { + const headStage = pipeline[1].trim(); + if (!isValidPipelineHeadTailStage(headStage)) + return null; + const headTail = parseHeadTailStage(headStage); + if (!headTail || headTail.fromEnd) + return null; + lineLimit = headTail.lineLimit; + } + return { kind: "find_grep", dir, patterns, params: grepParams2, lineLimit }; + } if (pipeline.length >= 2) { const xargsTokens = tokenizeShellWords(pipeline[1].trim()); if (!xargsTokens || xargsTokens[0] !== "xargs") @@ -1631,20 +2099,35 @@ async function executeCompiledBashCommand(api, memoryTable, sessionsTable, cmd, } // dist/src/hooks/query-cache.js -import { mkdirSync as mkdirSync2, readFileSync as readFileSync3, rmSync, writeFileSync as writeFileSync2 } from "node:fs"; +import { mkdirSync as mkdirSync2, readFileSync as readFileSync3, rmSync, statSync, writeFileSync as writeFileSync2 } from "node:fs"; import { join as join4 } from "node:path"; import { homedir as homedir3 } from "node:os"; var log3 = (msg) => log("query-cache", msg); var DEFAULT_CACHE_ROOT = join4(homedir3(), ".deeplake", "query-cache"); var INDEX_CACHE_FILE = "index.md"; +var INDEX_CACHE_TTL_MS = 15 * 60 * 1e3; function getSessionQueryCacheDir(sessionId, deps = {}) { const { cacheRoot = DEFAULT_CACHE_ROOT } = deps; return join4(cacheRoot, sessionId); } +function clearSessionQueryCache(sessionId, deps = {}) { + const { logFn = log3 } = deps; + try { + rmSync(getSessionQueryCacheDir(sessionId, deps), { recursive: true, force: true }); + } catch (e) { + logFn(`clear failed for session=${sessionId}: ${e.message}`); + } +} function readCachedIndexContent(sessionId, deps = {}) { const { logFn = log3 } = deps; try { - return readFileSync3(join4(getSessionQueryCacheDir(sessionId, deps), INDEX_CACHE_FILE), "utf-8"); + const cachePath = join4(getSessionQueryCacheDir(sessionId, deps), INDEX_CACHE_FILE); + const stats = statSync(cachePath); + if (Date.now() - stats.mtimeMs > INDEX_CACHE_TTL_MS) { + clearSessionQueryCache(sessionId, deps); + return null; + } + return readFileSync3(cachePath, "utf-8"); } catch (e) { if (e?.code === "ENOENT") return null; @@ -1762,11 +2245,66 @@ var SAFE_BUILTINS = /* @__PURE__ */ new Set([ "case", "esac" ]); +function splitSafeStages(cmd) { + const stages = []; + let current = ""; + let quote = null; + let escaped = false; + for (let i = 0; i < cmd.length; i++) { + const ch = cmd[i]; + if (escaped) { + current += ch; + escaped = false; + continue; + } + if (quote) { + current += ch; + if (ch === quote) { + quote = null; + } else if (ch === "\\" && quote === '"') { + escaped = true; + } + continue; + } + if (ch === "\\" && i + 1 < cmd.length) { + current += ch; + escaped = true; + continue; + } + if (ch === "'" || ch === '"') { + quote = ch; + current += ch; + continue; + } + const twoChar = cmd.slice(i, i + 2); + if (twoChar === "&&" || twoChar === "||") { + if (current.trim()) + stages.push(current.trim()); + current = ""; + i += 1; + continue; + } + if (ch === "|" || ch === ";" || ch === "\n") { + if (current.trim()) + stages.push(current.trim()); + current = ""; + continue; + } + current += ch; + } + if (quote || escaped) + return null; + if (current.trim()) + stages.push(current.trim()); + return stages; +} function isSafe(cmd) { if (/\$\(|`|<\(/.test(cmd)) return false; const stripped = cmd.replace(/'[^']*'/g, "''").replace(/"[^"]*"/g, '""'); - const stages = stripped.split(/\||;|&&|\|\||\n/); + const stages = splitSafeStages(stripped); + if (!stages) + return false; for (const stage of stages) { const firstToken = stage.trim().split(/\s+/)[0] ?? ""; if (firstToken && !SAFE_BUILTINS.has(firstToken)) @@ -1848,6 +2386,7 @@ function extractGrepParams(toolName, toolInput, shellCmd) { return { pattern: toolInput.pattern ?? "", targetPath: rewritePaths(toolInput.path ?? "") || "/", + recursive: true, ignoreCase: !!toolInput["-i"], wordMatch: false, filesOnly: outputMode === "files_with_matches", @@ -1884,7 +2423,7 @@ async function processPreToolUse(input, deps = {}) { const readVirtualPathContentsWithCache = async (cachePaths) => { const uniquePaths = [...new Set(cachePaths)]; const result = new Map(uniquePaths.map((path) => [path, null])); - const cachedIndex = uniquePaths.includes("/index.md") ? readCachedIndexContentFn(input.session_id) : null; + const cachedIndex = !isIndexDisabled() && uniquePaths.includes("/index.md") ? readCachedIndexContentFn(input.session_id) : null; const remainingPaths = cachedIndex === null ? uniquePaths : uniquePaths.filter((path) => path !== "/index.md"); if (cachedIndex !== null) { result.set("/index.md", cachedIndex); @@ -1974,21 +2513,13 @@ async function processPreToolUse(input, deps = {}) { } if (virtualPath && !virtualPath.endsWith("/")) { logFn(`direct read: ${virtualPath}`); - let content = virtualPath === "/index.md" ? readCachedIndexContentFn(input.session_id) : null; + let content = !isIndexDisabled() && virtualPath === "/index.md" ? readCachedIndexContentFn(input.session_id) : null; if (content === null) { content = await readVirtualPathContentFn(api, table, sessionsTable, virtualPath); } - if (content === null && virtualPath === "/index.md") { - const idxRows = await api.query(`SELECT path, project, description, creation_date FROM "${table}" WHERE path LIKE '/summaries/%' ORDER BY creation_date DESC`); - const lines = ["# Memory Index", "", `${idxRows.length} sessions:`, ""]; - for (const r of idxRows) { - const p = r["path"]; - const proj = r["project"] || ""; - const desc = (r["description"] || "").slice(0, 120); - const date = (r["creation_date"] || "").slice(0, 10); - lines.push(`- [${p}](${p}) ${date} ${proj ? `[${proj}]` : ""} ${desc}`); - } - content = lines.join("\n"); + if (content === null && virtualPath === "/index.md" && !isSessionsOnlyMode() && !isIndexDisabled()) { + const idxRows = await api.query(`SELECT path, project, description, summary, creation_date, last_update_date FROM "${table}" WHERE path LIKE '/summaries/%' ORDER BY last_update_date DESC, creation_date DESC`); + content = buildVirtualIndexContent(idxRows); } if (content !== null) { if (virtualPath === "/index.md") { diff --git a/claude-code/bundle/shell/deeplake-shell.js b/claude-code/bundle/shell/deeplake-shell.js index 5872059..84cf810 100755 --- a/claude-code/bundle/shell/deeplake-shell.js +++ b/claude-code/bundle/shell/deeplake-shell.js @@ -66987,6 +66987,29 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + /** Create the standard BM25 summary index for a memory table. */ + async createSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const indexName = this.buildLookupIndexName(table, "summary_bm25"); + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" USING deeplake_index ("summary")`); + } + /** Ensure the standard BM25 summary index exists, using a local freshness marker to avoid repeated CREATEs. */ + async ensureSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const suffix = "summary_bm25"; + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + try { + await this.createSummaryBm25Index(table); + this.markLookupIndexReady(table, suffix); + } catch (e6) { + if (isDuplicateIndexError(e6)) { + this.markLookupIndexReady(table, suffix); + return; + } + throw e6; + } + } buildLookupIndexName(table, suffix) { return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); } @@ -67099,10 +67122,31 @@ var DeeplakeApi = class { }; // dist/src/shell/deeplake-fs.js -import { basename as basename4, posix } from "node:path"; +import { basename as basename5, posix } from "node:path"; import { randomUUID as randomUUID2 } from "node:crypto"; +// dist/src/utils/retrieval-mode.js +function isSessionsOnlyMode() { + const raw = process.env["HIVEMIND_SESSIONS_ONLY"] ?? process.env["DEEPLAKE_SESSIONS_ONLY"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} +function isIndexDisabled() { + const raw = process.env["HIVEMIND_DISABLE_INDEX"] ?? process.env["DEEPLAKE_DISABLE_INDEX"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} +function isSummaryBm25Disabled() { + const raw = process.env["HIVEMIND_DISABLE_SUMMARY_BM25"] ?? process.env["DEEPLAKE_DISABLE_SUMMARY_BM25"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} + // dist/src/shell/grep-core.js +var DEFAULT_GREP_CANDIDATE_LIMIT = Number(process.env["HIVEMIND_GREP_LIMIT"] ?? process.env["DEEPLAKE_GREP_LIMIT"] ?? 500); +function escapeRegexLiteral(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} +function normalizeGrepRegexPattern(pattern) { + return pattern.replace(/\\([|(){}+?])/g, "$1").replace(/\\/g, "\\b"); +} var TOOL_INPUT_FIELDS = [ "command", "file_path", @@ -67265,24 +67309,9 @@ function normalizeContent(path2, raw) { } catch { return raw; } - if (Array.isArray(obj.turns)) { - const header = []; - if (obj.date_time) - header.push(`date: ${obj.date_time}`); - if (obj.speakers) { - const s10 = obj.speakers; - const names = [s10.speaker_a, s10.speaker_b].filter(Boolean).join(", "); - if (names) - header.push(`speakers: ${names}`); - } - const lines = obj.turns.map((t6) => { - const sp = String(t6?.speaker ?? t6?.name ?? "?").trim(); - const tx = String(t6?.text ?? t6?.content ?? "").replace(/\s+/g, " ").trim(); - const tag = t6?.dia_id ? `[${t6.dia_id}] ` : ""; - return `${tag}${sp}: ${tx}`; - }); - const out2 = [...header, ...lines].join("\n"); - return out2.trim() ? out2 : raw; + if (Array.isArray(obj.turns) || Array.isArray(obj.dialogue)) { + return `${JSON.stringify(obj, null, 2)} +`; } const stripRecalled = (t6) => { const i11 = t6.indexOf(""); @@ -67326,14 +67355,34 @@ function buildPathCondition(targetPath) { return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%')`; } async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { - const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns } = opts; - const limit = opts.limit ?? 100; + const { pathFilter, contentScanOnly, likeOp, escapedPattern, regexPattern, prefilterPattern, prefilterPatterns, bm25QueryText } = opts; + const limit = opts.limit ?? DEFAULT_GREP_CANDIDATE_LIMIT; const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : [escapedPattern]; - const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns); - const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns); - const memQuery = `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; - const sessQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; - const rows = await api.query(`SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`); + const ignoreCase = likeOp === "ILIKE"; + const likeMemFilter = buildContentFilter("summary::text", likeOp, filterPatterns); + const likeSessFilter = buildContentFilter("message::text", likeOp, filterPatterns); + const regexMemFilter = regexPattern ? buildRegexFilter("summary::text", regexPattern, ignoreCase) : ""; + const regexSessFilter = regexPattern ? buildRegexFilter("message::text", regexPattern, ignoreCase) : ""; + const primarySessFilter = `${likeSessFilter}${regexSessFilter}`; + const fallbackSessFilter = likeSessFilter; + const hasSqlRegexFilter = Boolean(regexMemFilter || regexSessFilter); + const sessionsOnly = isSessionsOnlyMode(); + const useSummaryBm25 = !sessionsOnly && !isSummaryBm25Disabled() && Boolean(bm25QueryText); + const shouldUseFallbackCapablePrimary = useSummaryBm25 || hasSqlRegexFilter; + const ensureSummaryBm25Index = api.ensureSummaryBm25Index; + if (useSummaryBm25 && typeof ensureSummaryBm25Index === "function") { + await ensureSummaryBm25Index.call(api, memoryTable).catch(() => { + }); + } + const buildCombinedQuery = (memFilter, sessFilter, useBm25Summary = false) => { + const memQuery = useBm25Summary ? buildSummaryBm25Query(memoryTable, pathFilter, bm25QueryText ?? "", limit) : `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; + const sessQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; + return sessionsOnly ? `SELECT path, content, source_order, creation_date FROM (${sessQuery}) AS combined ORDER BY path, source_order, creation_date` : `SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`; + }; + const primaryMemFilter = useSummaryBm25 ? "" : `${likeMemFilter}${regexMemFilter}`; + const primaryQuery = buildCombinedQuery(primaryMemFilter, primarySessFilter, useSummaryBm25); + const fallbackQuery = buildCombinedQuery(likeMemFilter, fallbackSessFilter, false); + const rows = shouldUseFallbackCapablePrimary ? await api.query(primaryQuery).catch(() => api.query(fallbackQuery)) : await api.query(primaryQuery); return rows.map((row) => ({ path: String(row["path"]), content: String(row["content"] ?? "") @@ -67364,6 +67413,10 @@ function extractRegexLiteralPrefilter(pattern) { const next = pattern[i11 + 1]; if (!next) return null; + if (/[bByYmM<>]/.test(next)) { + i11++; + continue; + } if (/[dDsSwWbBAZzGkKpP]/.test(next)) return null; current += next; @@ -67390,13 +67443,14 @@ function extractRegexLiteralPrefilter(pattern) { return literal.length >= 2 ? literal : null; } function extractRegexAlternationPrefilters(pattern) { - if (!pattern.includes("|")) + const unwrapped = unwrapWholeRegexGroup(pattern); + if (!unwrapped.includes("|")) return null; const parts = []; let current = ""; let escaped = false; - for (let i11 = 0; i11 < pattern.length; i11++) { - const ch = pattern[i11]; + for (let i11 = 0; i11 < unwrapped.length; i11++) { + const ch = unwrapped[i11]; if (escaped) { current += `\\${ch}`; escaped = false; @@ -67424,33 +67478,177 @@ function extractRegexAlternationPrefilters(pattern) { return literals.length > 0 ? literals : null; } function buildGrepSearchOptions(params, targetPath) { - const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(params.pattern); - const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(params.pattern) : null; - const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(params.pattern) : null; + const normalizedPattern = params.fixedString ? params.pattern : normalizeGrepRegexPattern(params.pattern); + const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(normalizedPattern); + const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(normalizedPattern) : null; + const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(normalizedPattern) : null; + const bm25QueryText = buildSummaryBm25QueryText(normalizedPattern, params.fixedString, literalPrefilter, alternationPrefilters); + const regexBase = params.fixedString ? escapeRegexLiteral(normalizedPattern) : normalizedPattern; + const sqlRegexPattern = params.wordMatch ? `\\b(?:${regexBase})\\b` : hasRegexMeta ? regexBase : void 0; return { pathFilter: buildPathFilter(targetPath), contentScanOnly: hasRegexMeta, likeOp: params.ignoreCase ? "ILIKE" : "LIKE", escapedPattern: sqlLike(params.pattern), + regexPattern: sqlRegexPattern, prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : void 0, - prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)) + prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)), + bm25QueryText: bm25QueryText ?? void 0, + limit: DEFAULT_GREP_CANDIDATE_LIMIT }; } +function buildSummaryBm25QueryText(pattern, fixedString, literalPrefilter, alternationPrefilters) { + const rawTokens = alternationPrefilters && alternationPrefilters.length > 0 ? alternationPrefilters : literalPrefilter ? [literalPrefilter] : [pattern]; + const cleaned = [...new Set(rawTokens.flatMap((token) => token.replace(/\\b/g, " ").replace(/[.*+?^${}()[\]{}|\\]/g, " ").split(/\s+/)).map((token) => token.trim()).filter((token) => token.length >= 2))]; + if (cleaned.length === 0) { + return fixedString && pattern.trim().length >= 2 ? pattern.trim() : null; + } + return cleaned.join(" "); +} function buildContentFilter(column, likeOp, patterns) { + const predicate = buildContentPredicate(column, likeOp, patterns); + return predicate ? ` AND ${predicate}` : ""; +} +function buildRegexFilter(column, pattern, ignoreCase) { + const predicate = buildRegexPredicate(column, pattern, ignoreCase); + return predicate ? ` AND ${predicate}` : ""; +} +function buildSummaryBm25Query(memoryTable, pathFilter, queryText, limit) { + return `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter} ORDER BY (summary <#> '${sqlStr(queryText)}') DESC LIMIT ${limit}`; +} +function toSqlRegexPattern(pattern, ignoreCase) { + if (!pattern) + return null; + if (ignoreCase) + return null; + try { + new RegExp(pattern); + return translateRegexPatternToSql(pattern); + } catch { + return pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + } +} +function isSqlRegexPushdownSafe(pattern) { + return !/[\\[\]{}^$]/.test(pattern) && !/\(\?/.test(pattern); +} +function unwrapWholeRegexGroup(pattern) { + if (!pattern.startsWith("(") || !pattern.endsWith(")")) + return pattern; + let depth = 0; + let escaped = false; + for (let i11 = 0; i11 < pattern.length; i11++) { + const ch = pattern[i11]; + if (escaped) { + escaped = false; + continue; + } + if (ch === "\\") { + escaped = true; + continue; + } + if (ch === "(") + depth++; + if (ch === ")") { + depth--; + if (depth === 0 && i11 !== pattern.length - 1) + return pattern; + } + } + if (depth !== 0) + return pattern; + if (pattern.startsWith("(?:")) + return pattern.slice(3, -1); + return pattern.slice(1, -1); +} +function translateRegexPatternToSql(pattern) { + let out = ""; + for (let i11 = 0; i11 < pattern.length; i11++) { + const ch = pattern[i11]; + if (ch === "\\") { + const next = pattern[i11 + 1]; + if (!next) + return pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + i11++; + switch (next) { + case "d": + out += "[[:digit:]]"; + continue; + case "D": + out += "[^[:digit:]]"; + continue; + case "s": + out += "[[:space:]]"; + continue; + case "S": + out += "[^[:space:]]"; + continue; + case "w": + out += "[[:alnum:]_]"; + continue; + case "W": + out += "[^[:alnum:]_]"; + continue; + case "b": + out += "\\y"; + continue; + case "A": + case "B": + case "G": + case "K": + case "P": + case "p": + case "z": + return null; + default: + out += `\\${next}`; + continue; + } + } + if (ch === "(" && pattern.startsWith("(?:", i11)) { + out += "("; + i11 += 2; + continue; + } + if (ch === "(" && /^[(]\?<[^>]+>/.test(pattern.slice(i11))) { + const named = pattern.slice(i11).match(/^\(\?<[^>]+>/); + if (!named) + return null; + out += "("; + i11 += named[0].length - 1; + continue; + } + if (ch === "(" && pattern[i11 + 1] === "?") + return null; + out += ch; + } + return out; +} +function buildContentPredicate(column, likeOp, patterns) { if (patterns.length === 0) return ""; if (patterns.length === 1) - return ` AND ${column} ${likeOp} '%${patterns[0]}%'`; - return ` AND (${patterns.map((pattern) => `${column} ${likeOp} '%${pattern}%'`).join(" OR ")})`; + return `${column} ${likeOp} '%${patterns[0]}%'`; + return `(${patterns.map((pattern) => `${column} ${likeOp} '%${pattern}%'`).join(" OR ")})`; +} +function buildRegexPredicate(column, pattern, ignoreCase) { + if (!pattern) + return ""; + if (!isSqlRegexPushdownSafe(pattern)) + return ""; + const sqlPattern = toSqlRegexPattern(pattern, ignoreCase); + if (!sqlPattern) + return ""; + return `${column} ~ '${sqlStr(sqlPattern)}'`; } function compileGrepRegex(params) { - let reStr = params.fixedString ? params.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") : params.pattern; + const normalizedPattern = params.fixedString ? params.pattern : normalizeGrepRegexPattern(params.pattern); + let reStr = params.fixedString ? escapeRegexLiteral(normalizedPattern) : normalizedPattern; if (params.wordMatch) - reStr = `\\b${reStr}\\b`; + reStr = `\\b(?:${reStr})\\b`; try { return new RegExp(reStr, params.ignoreCase ? "i" : ""); } catch { - return new RegExp(params.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), params.ignoreCase ? "i" : ""); + return new RegExp(escapeRegexLiteral(normalizedPattern), params.ignoreCase ? "i" : ""); } } function refineGrepMatches(rows, params, forceMultiFilePrefix) { @@ -67485,6 +67683,234 @@ function refineGrepMatches(rows, params, forceMultiFilePrefix) { return output; } +// dist/src/utils/summary-format.js +function escapeRegex(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} +function basename4(path2) { + const trimmed = path2.replace(/\/+$/, ""); + const idx = trimmed.lastIndexOf("/"); + return idx === -1 ? trimmed : trimmed.slice(idx + 1); +} +function extractSection(text, heading) { + const re9 = new RegExp(`^## ${escapeRegex(heading)}\\s*\\n([\\s\\S]*?)(?=\\n## |$)`, "m"); + const match2 = text.match(re9); + return match2 ? match2[1].trim() : null; +} +function extractHeaderField(text, field) { + const re9 = new RegExp(`^- \\*\\*${escapeRegex(field)}\\*\\*:\\s*(.+)$`, "m"); + const match2 = text.match(re9); + return match2 ? match2[1].trim() : null; +} +function compactText(value) { + return value.replace(/\s+/g, " ").trim(); +} +function splitMetadataList(value) { + if (!value) + return []; + return [...new Set(value.split(/\s*(?:,|;|&|\band\b)\s*/i).map((part) => compactText(part)).filter((part) => part.length >= 2 && !/^unknown$/i.test(part)))]; +} +function extractBullets(section, limit = 3) { + if (!section) + return []; + return section.split("\n").map((line) => line.trim()).filter((line) => line.startsWith("- ")).map((line) => compactText(line.slice(2))).filter(Boolean).slice(0, limit); +} +function extractSummaryDate(text) { + return extractHeaderField(text, "Date") ?? extractHeaderField(text, "Started"); +} +function extractSummaryParticipants(text) { + return extractHeaderField(text, "Participants") ?? extractHeaderField(text, "Speakers"); +} +function extractSummaryTopics(text) { + return extractHeaderField(text, "Topics"); +} +function extractSummarySource(text) { + return extractHeaderField(text, "Source"); +} +function buildSummaryBlurb(text) { + const participants = extractSummaryParticipants(text); + const topics = extractSummaryTopics(text); + const factBullets = extractBullets(extractSection(text, "Searchable Facts"), 3); + const keyBullets = factBullets.length > 0 ? factBullets : extractBullets(extractSection(text, "Key Facts"), 3); + const whatHappened = compactText(extractSection(text, "What Happened") ?? ""); + const parts = []; + if (participants) + parts.push(participants); + if (topics) + parts.push(topics); + if (keyBullets.length > 0) + parts.push(keyBullets.join("; ")); + if (parts.length === 0 && whatHappened) + parts.push(whatHappened); + const blurb = parts.join(" | ").slice(0, 300).trim(); + return blurb || "completed"; +} +function truncate(value, max) { + return value.length > max ? `${value.slice(0, max - 1).trimEnd()}\u2026` : value; +} +function formatIndexTimestamp(value) { + if (!value) + return ""; + if (!/^\d{4}-\d{2}-\d{2}T/.test(value)) + return value; + const parsed = Date.parse(value); + if (!Number.isFinite(parsed)) + return value; + const ts3 = new Date(parsed); + const yyyy = ts3.getUTCFullYear(); + const mm = String(ts3.getUTCMonth() + 1).padStart(2, "0"); + const dd = String(ts3.getUTCDate()).padStart(2, "0"); + const hh = String(ts3.getUTCHours()).padStart(2, "0"); + const min = String(ts3.getUTCMinutes()).padStart(2, "0"); + return `${yyyy}-${mm}-${dd} ${hh}:${min} UTC`; +} +function buildSummaryIndexEntry(row) { + const path2 = typeof row.path === "string" ? row.path : ""; + if (!path2) + return null; + if (path2.startsWith("/summaries/") && !/^\/summaries\/[^/]+\/[^/]+$/.test(path2)) + return null; + const summary = typeof row.summary === "string" ? row.summary : ""; + const project = typeof row.project === "string" ? row.project.trim() : ""; + const description = typeof row.description === "string" ? compactText(row.description) : ""; + const creationDate = typeof row.creation_date === "string" ? row.creation_date : ""; + const lastUpdateDate = typeof row.last_update_date === "string" ? row.last_update_date : ""; + const label = basename4(path2) || path2; + const date = summary ? extractSummaryDate(summary) ?? creationDate : creationDate; + const participantsText = summary ? extractSummaryParticipants(summary) ?? "" : ""; + const topicsText = summary ? extractSummaryTopics(summary) ?? "" : ""; + const source = summary ? extractSummarySource(summary) ?? "" : ""; + const structuredBlurb = summary ? buildSummaryBlurb(summary) : ""; + const blurb = structuredBlurb && structuredBlurb !== "completed" ? structuredBlurb : truncate(description, 220); + return { + path: path2, + label, + project, + description, + date, + createdAt: creationDate, + updatedAt: lastUpdateDate, + sortDate: lastUpdateDate || creationDate || date, + participantsText, + participants: splitMetadataList(participantsText), + topicsText, + topics: splitMetadataList(topicsText), + source, + blurb + }; +} +function formatSummaryIndexEntry(entry) { + const parts = [`- [summary: ${entry.label}](${entry.path})`]; + if (entry.source) + parts.push(`[session](${entry.source})`); + if (entry.date) + parts.push(truncate(entry.date, 40)); + const visibleTime = entry.updatedAt || entry.createdAt; + if (visibleTime) + parts.push(`updated: ${truncate(formatIndexTimestamp(visibleTime), 24)}`); + if (entry.participantsText) + parts.push(truncate(entry.participantsText, 80)); + if (entry.topicsText) + parts.push(`topics: ${truncate(entry.topicsText, 90)}`); + if (entry.project) + parts.push(`[${truncate(entry.project, 40)}]`); + if (entry.blurb && entry.blurb !== "completed") + parts.push(truncate(entry.blurb, 220)); + return parts.join(" \u2014 "); +} +function buildSummaryIndexLine(row) { + const entry = "label" in row && typeof row.label === "string" ? row : buildSummaryIndexEntry(row); + return entry ? formatSummaryIndexEntry(entry) : null; +} + +// dist/src/hooks/virtual-table-query.js +function buildVirtualIndexContent(rows) { + const entries = rows.map((row) => buildSummaryIndexEntry(row)).filter((entry) => entry !== null).sort((a15, b26) => (b26.sortDate || "").localeCompare(a15.sortDate || "") || a15.path.localeCompare(b26.path)); + const lines = [ + "# Memory Index", + "", + "Persistent wiki directory. Start here, open the linked summary first, then open the paired raw session if you need exact wording or temporal grounding.", + "", + "## How To Use", + "", + "- Use the People section when the question names a person.", + "- In the catalog, each row links to both the summary page and its source session.", + "- Once you have a likely match, open that exact summary or session instead of broadening into wide grep scans.", + "" + ]; + const peopleLines = buildPeopleDirectory(entries); + if (peopleLines.length > 0) { + lines.push("## People"); + lines.push(""); + lines.push(...peopleLines); + lines.push(""); + } + const projectLines = buildProjectDirectory(entries); + if (projectLines.length > 0) { + lines.push("## Projects"); + lines.push(""); + lines.push(...projectLines); + lines.push(""); + } + lines.push("## Summary To Session Catalog"); + lines.push(""); + for (const entry of entries) { + const line = buildSummaryIndexLine(entry); + if (line) + lines.push(line); + } + return lines.join("\n"); +} +function formatEntryLink(entry) { + const session = entry.source ? ` -> [session](${entry.source})` : ""; + return `[${entry.label}](${entry.path})${session}`; +} +function topList(counts, limit) { + return [...counts.entries()].sort((a15, b26) => b26[1] - a15[1] || a15[0].localeCompare(b26[0])).slice(0, limit).map(([value]) => value); +} +function buildPeopleDirectory(entries) { + const people = /* @__PURE__ */ new Map(); + for (const entry of entries) { + for (const person of entry.participants) { + const current = people.get(person) ?? { count: 0, topics: /* @__PURE__ */ new Map(), recent: [] }; + current.count += 1; + for (const topic of entry.topics) { + current.topics.set(topic, (current.topics.get(topic) ?? 0) + 1); + } + current.recent.push(entry); + people.set(person, current); + } + } + return [...people.entries()].sort((a15, b26) => b26[1].count - a15[1].count || a15[0].localeCompare(b26[0])).map(([person, info]) => { + const topics = topList(info.topics, 3); + const recent = info.recent.slice(0, 2).map((entry) => formatEntryLink(entry)).join(", "); + const parts = [`- ${person} \u2014 ${info.count} summaries`]; + if (topics.length > 0) + parts.push(`topics: ${topics.join("; ")}`); + if (recent) + parts.push(`recent: ${recent}`); + return parts.join(" \u2014 "); + }); +} +function buildProjectDirectory(entries) { + const projects = /* @__PURE__ */ new Map(); + for (const entry of entries) { + if (!entry.project) + continue; + const current = projects.get(entry.project) ?? { count: 0, recent: [] }; + current.count += 1; + current.recent.push(entry); + projects.set(entry.project, current); + } + return [...projects.entries()].sort((a15, b26) => b26[1].count - a15[1].count || a15[0].localeCompare(b26[0])).map(([project, info]) => { + const recent = info.recent.slice(0, 2).map((entry) => formatEntryLink(entry)).join(", "); + const parts = [`- ${project} \u2014 ${info.count} summaries`]; + if (recent) + parts.push(`recent: ${recent}`); + return parts.join(" \u2014 "); + }); +} + // dist/src/shell/deeplake-fs.js var BATCH_SIZE = 10; var PREFETCH_BATCH_SIZE = 50; @@ -67542,6 +67968,8 @@ var DeeplakeFs = class _DeeplakeFs { // Paths that live in the sessions table (multi-row, read by concatenation) sessionPaths = /* @__PURE__ */ new Set(); sessionsTable = null; + sessionsOnly = false; + indexDisabled = false; constructor(client, table, mountPoint) { this.client = client; this.table = table; @@ -67553,9 +67981,11 @@ var DeeplakeFs = class _DeeplakeFs { static async create(client, table, mount = "/memory", sessionsTable) { const fs3 = new _DeeplakeFs(client, table, mount); fs3.sessionsTable = sessionsTable ?? null; + fs3.sessionsOnly = isSessionsOnlyMode(); + fs3.indexDisabled = isIndexDisabled(); await client.ensureTable(); let sessionSyncOk = true; - const memoryBootstrap = (async () => { + const memoryBootstrap = fs3.sessionsOnly ? Promise.resolve() : (async () => { const sql = `SELECT path, size_bytes, mime_type FROM "${table}" ORDER BY path`; try { const rows = await client.query(sql); @@ -67611,7 +68041,7 @@ var DeeplakeFs = class _DeeplakeFs { this.pending.delete(filePath); this.flushed.delete(filePath); const parent = parentOf(filePath); - this.dirs.get(parent)?.delete(basename4(filePath)); + this.dirs.get(parent)?.delete(basename5(filePath)); } // ── flush / write batching ──────────────────────────────────────────────── scheduleFlush() { @@ -67674,46 +68104,8 @@ var DeeplakeFs = class _DeeplakeFs { } // ── Virtual index.md generation ──────────────────────────────────────────── async generateVirtualIndex() { - const rows = await this.client.query(`SELECT path, project, description, creation_date, last_update_date FROM "${this.table}" WHERE path LIKE '${sqlStr("/summaries/")}%' ORDER BY last_update_date DESC`); - const sessionPathsByKey = /* @__PURE__ */ new Map(); - for (const sp of this.sessionPaths) { - const hivemind = sp.match(/\/sessions\/[^/]+\/[^/]+_([^.]+)\.jsonl$/); - if (hivemind) { - sessionPathsByKey.set(hivemind[1], sp.slice(1)); - } else { - const fname = sp.split("/").pop() ?? ""; - const stem = fname.replace(/\.[^.]+$/, ""); - if (stem) - sessionPathsByKey.set(stem, sp.slice(1)); - } - } - const lines = [ - "# Session Index", - "", - "List of all Claude Code sessions with summaries.", - "", - "| Session | Conversation | Created | Last Updated | Project | Description |", - "|---------|-------------|---------|--------------|---------|-------------|" - ]; - for (const row of rows) { - const p22 = row["path"]; - const match2 = p22.match(/\/summaries\/([^/]+)\/([^/]+)\.md$/); - if (!match2) - continue; - const summaryUser = match2[1]; - const sessionId = match2[2]; - const relPath = `summaries/${summaryUser}/${sessionId}.md`; - const baseName = sessionId.replace(/_summary$/, ""); - const convPath = sessionPathsByKey.get(sessionId) ?? sessionPathsByKey.get(baseName); - const convLink = convPath ? `[messages](${convPath})` : ""; - const project = row["project"] || ""; - const description = row["description"] || ""; - const creationDate = row["creation_date"] || ""; - const lastUpdateDate = row["last_update_date"] || ""; - lines.push(`| [${sessionId}](${relPath}) | ${convLink} | ${creationDate} | ${lastUpdateDate} | ${project} | ${description} |`); - } - lines.push(""); - return lines.join("\n"); + const rows = await this.client.query(`SELECT path, project, description, summary, creation_date, last_update_date FROM "${this.table}" WHERE path LIKE '${sqlStr("/summaries/")}%' ORDER BY last_update_date DESC, creation_date DESC`); + return buildVirtualIndexContent(rows); } // ── batch prefetch ──────────────────────────────────────────────────────── /** @@ -67802,7 +68194,7 @@ var DeeplakeFs = class _DeeplakeFs { const p22 = normPath(path2); if (this.dirs.has(p22) && !this.files.has(p22)) throw fsErr("EISDIR", "illegal operation on a directory", p22); - if (p22 === "/index.md" && !this.files.has(p22)) { + if (!this.sessionsOnly && !this.indexDisabled && p22 === "/index.md" && !this.files.has(p22)) { const realRows = await this.client.query(`SELECT summary FROM "${this.table}" WHERE path = '${sqlStr("/index.md")}' LIMIT 1`); if (realRows.length > 0 && realRows[0]["summary"]) { const text2 = realRows[0]["summary"]; @@ -67847,13 +68239,13 @@ var DeeplakeFs = class _DeeplakeFs { throw fsErr("EISDIR", "illegal operation on a directory", p22); const text = typeof content === "string" ? content : Buffer.from(content).toString("utf-8"); const buf = Buffer.from(text, "utf-8"); - const mime = guessMime(basename4(p22)); + const mime = guessMime(basename5(p22)); this.files.set(p22, buf); this.meta.set(p22, { size: buf.length, mime, mtime: /* @__PURE__ */ new Date() }); this.addToTree(p22); this.pending.set(p22, { path: p22, - filename: basename4(p22), + filename: basename5(p22), contentText: text, mimeType: mime, sizeBytes: buf.length, @@ -67872,13 +68264,13 @@ var DeeplakeFs = class _DeeplakeFs { throw fsErr("EISDIR", "illegal operation on a directory", p22); const text = typeof content === "string" ? content : Buffer.from(content).toString("utf-8"); const buf = Buffer.from(text, "utf-8"); - const mime = guessMime(basename4(p22)); + const mime = guessMime(basename5(p22)); this.files.set(p22, buf); this.meta.set(p22, { size: buf.length, mime, mtime: /* @__PURE__ */ new Date() }); this.addToTree(p22); this.pending.set(p22, { path: p22, - filename: basename4(p22), + filename: basename5(p22), contentText: text, mimeType: mime, sizeBytes: buf.length @@ -67910,7 +68302,7 @@ var DeeplakeFs = class _DeeplakeFs { // ── IFileSystem: metadata ───────────────────────────────────────────────── async exists(path2) { const p22 = normPath(path2); - if (p22 === "/index.md") + if (!this.sessionsOnly && !this.indexDisabled && p22 === "/index.md") return true; return this.files.has(p22) || this.dirs.has(p22); } @@ -67918,7 +68310,7 @@ var DeeplakeFs = class _DeeplakeFs { const p22 = normPath(path2); const isFile = this.files.has(p22); const isDir = this.dirs.has(p22); - if (p22 === "/index.md" && !isFile && !isDir) { + if (!this.sessionsOnly && !this.indexDisabled && p22 === "/index.md" && !isFile && !isDir) { return { isFile: true, isDirectory: false, @@ -67958,7 +68350,7 @@ var DeeplakeFs = class _DeeplakeFs { } async realpath(path2) { const p22 = normPath(path2); - if (p22 === "/index.md") + if (!this.sessionsOnly && !this.indexDisabled && p22 === "/index.md") return p22; if (!this.files.has(p22) && !this.dirs.has(p22)) throw fsErr("ENOENT", "no such file or directory", p22); @@ -67983,14 +68375,14 @@ var DeeplakeFs = class _DeeplakeFs { const parent = parentOf(p22); if (!this.dirs.has(parent)) this.dirs.set(parent, /* @__PURE__ */ new Set()); - this.dirs.get(parent).add(basename4(p22)); + this.dirs.get(parent).add(basename5(p22)); } async readdir(path2) { const p22 = normPath(path2); if (!this.dirs.has(p22)) throw fsErr("ENOTDIR", "not a directory", p22); const entries = [...this.dirs.get(p22) ?? []]; - if (p22 === "/" && !entries.includes("index.md")) { + if (!this.sessionsOnly && !this.indexDisabled && p22 === "/" && !entries.includes("index.md")) { entries.push("index.md"); } return entries; @@ -68002,7 +68394,7 @@ var DeeplakeFs = class _DeeplakeFs { const child = p22 === "/" ? `/${name}` : `${p22}/${name}`; return { name, - isFile: (this.files.has(child) || child === "/index.md") && !this.dirs.has(child), + isFile: (this.files.has(child) || !this.sessionsOnly && !this.indexDisabled && child === "/index.md") && !this.dirs.has(child), isDirectory: this.dirs.has(child), isSymbolicLink: false }; @@ -68038,7 +68430,7 @@ var DeeplakeFs = class _DeeplakeFs { for (const fp of safeToDelete) this.removeFromTree(fp); this.dirs.delete(p22); - this.dirs.get(parentOf(p22))?.delete(basename4(p22)); + this.dirs.get(parentOf(p22))?.delete(basename5(p22)); if (safeToDelete.length > 0) { const inList = safeToDelete.map((fp) => `'${sqlStr(fp)}'`).join(", "); await this.client.query(`DELETE FROM "${this.table}" WHERE path IN (${inList})`); @@ -69112,8 +69504,7 @@ function createGrepCommand(client, fs3, table, sessionsTable) { try { const searchOptions = { ...buildGrepSearchOptions(matchParams, targets[0] ?? ctx.cwd), - pathFilter: buildPathFilterForTargets(targets), - limit: 100 + pathFilter: buildPathFilterForTargets(targets) }; const queryRows = await Promise.race([ searchDeeplakeTables(client, table, sessionsTable ?? "sessions", searchOptions), @@ -69136,7 +69527,8 @@ function createGrepCommand(client, fs3, table, sessionsTable) { } } const normalized = rows.map((r10) => ({ path: r10.path, content: normalizeContent(r10.path, r10.content) })); - const output = refineGrepMatches(normalized, matchParams); + const forceMultiFilePrefix = parsed.r || parsed.R || parsed.recursive ? true : void 0; + const output = refineGrepMatches(normalized, matchParams, forceMultiFilePrefix); return { stdout: output.length > 0 ? output.join("\n") + "\n" : "", stderr: "", diff --git a/claude-code/tests/bash-command-compiler.test.ts b/claude-code/tests/bash-command-compiler.test.ts index 3bb90a7..e6d49be 100644 --- a/claude-code/tests/bash-command-compiler.test.ts +++ b/claude-code/tests/bash-command-compiler.test.ts @@ -61,6 +61,10 @@ describe("bash-command-compiler parsing", () => { clean: "cat /a", ignoreMissing: true, }); + expect(stripAllowedModifiers("find /sessions -name '*.json' -exec grep -l 'Melanie' {} \\; 2>/dev/null | head -10")).toEqual({ + clean: "find /sessions -name '*.json' -exec grep -l 'Melanie' {} \\; | head -10", + ignoreMissing: true, + }); expect(stripAllowedModifiers("cat /a 2>&1 | head -2")).toEqual({ clean: "cat /a | head -2", ignoreMissing: false, @@ -161,6 +165,7 @@ describe("bash-command-compiler parsing", () => { params: { pattern: "foo", targetPath: "/summaries", + recursive: false, ignoreCase: false, wordMatch: false, filesOnly: false, @@ -176,6 +181,7 @@ describe("bash-command-compiler parsing", () => { params: { pattern: "foo", targetPath: "/summaries", + recursive: false, ignoreCase: false, wordMatch: false, filesOnly: false, @@ -191,6 +197,7 @@ describe("bash-command-compiler parsing", () => { params: { pattern: "foo", targetPath: "/summaries", + recursive: false, ignoreCase: false, wordMatch: false, filesOnly: false, @@ -208,6 +215,7 @@ describe("bash-command-compiler parsing", () => { params: { pattern: "launch", targetPath: "/", + recursive: false, ignoreCase: false, wordMatch: false, filesOnly: true, @@ -225,6 +233,7 @@ describe("bash-command-compiler parsing", () => { params: { pattern: "launch", targetPath: "/", + recursive: false, ignoreCase: false, wordMatch: false, filesOnly: true, @@ -235,6 +244,40 @@ describe("bash-command-compiler parsing", () => { }, lineLimit: 1, }); + expect(parseCompiledSegment("find /sessions -name '*.json' -exec grep -l 'Melanie' {} \\; 2>/dev/null | head -10")).toEqual({ + kind: "find_grep", + dir: "/sessions", + patterns: ["*.json"], + params: { + pattern: "Melanie", + targetPath: "{}", + recursive: false, + ignoreCase: false, + wordMatch: false, + filesOnly: true, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }, + lineLimit: 10, + }); + expect(parseCompiledSegment("grep -i 'age\\|birthday\\|born.*19\\|born.*20' /sessions/*.json 2>/dev/null | head -3")).toEqual({ + kind: "grep", + params: { + pattern: "age\\|birthday\\|born.*19\\|born.*20", + targetPath: "/sessions/*.json", + recursive: false, + ignoreCase: true, + wordMatch: false, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }, + lineLimit: 3, + }); }); it("rejects unsupported segments and command shapes", () => { @@ -454,4 +497,47 @@ describe("bash-command-compiler execution", () => { ); expect(output).toBe("/summaries/a.md"); }); + + it("compiles benchmark trace find -exec grep -l pipelines into the same find_grep plan", async () => { + const findVirtualPathsFn = vi.fn(async () => [ + "/sessions/conv_0_session_2.json", + "/sessions/conv_0_session_5.json", + "/sessions/conv_0_session_8.json", + ]); + const readVirtualPathContentsFn = vi.fn(async () => new Map([ + ["/sessions/conv_0_session_2.json", "{\"dialogue\":[{\"speaker\":\"Melanie\",\"text\":\"camping next month\"}]}"], + ["/sessions/conv_0_session_5.json", "{\"dialogue\":[{\"speaker\":\"Caroline\",\"text\":\"book club\"}]}"], + ["/sessions/conv_0_session_8.json", "{\"dialogue\":[{\"speaker\":\"Melanie\",\"text\":\"museum trip\"}]}"], + ])); + + const output = await executeCompiledBashCommand( + { query: vi.fn() } as any, + "memory", + "sessions", + "find /sessions -name '*.json' -exec grep -l 'Melanie' {} \\; 2>/dev/null | head -10", + { + findVirtualPathsFn: findVirtualPathsFn as any, + readVirtualPathContentsFn: readVirtualPathContentsFn as any, + }, + ); + + expect(findVirtualPathsFn).toHaveBeenCalledWith( + expect.anything(), + "memory", + "sessions", + "/sessions", + "%.json", + ); + expect(readVirtualPathContentsFn).toHaveBeenCalledWith( + expect.anything(), + "memory", + "sessions", + [ + "/sessions/conv_0_session_2.json", + "/sessions/conv_0_session_5.json", + "/sessions/conv_0_session_8.json", + ], + ); + expect(output).toBe("/sessions/conv_0_session_2.json\n/sessions/conv_0_session_8.json"); + }); }); diff --git a/claude-code/tests/benchmark-replay-parity.test.ts b/claude-code/tests/benchmark-replay-parity.test.ts new file mode 100644 index 0000000..e1e0c66 --- /dev/null +++ b/claude-code/tests/benchmark-replay-parity.test.ts @@ -0,0 +1,352 @@ +import { execFileSync } from "node:child_process"; +import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; +import { afterEach, describe, expect, it, vi } from "vitest"; +import { executeCompiledBashCommand } from "../../src/hooks/bash-command-compiler.js"; +import { handleGrepDirect, parseBashGrep } from "../../src/hooks/grep-direct.js"; +import { processPreToolUse } from "../../src/hooks/pre-tool-use.js"; + +type FixtureFile = { path: string; content: string }; + +type SessionTurn = { + speaker: string; + dia_id: string; + text: string; +}; + +const baseConfig = { + token: "token", + orgId: "org-1", + orgName: "Acme", + userName: "alice", + workspaceId: "default", + apiUrl: "https://api.example.com", + tableName: "memory", + sessionsTableName: "sessions", + memoryPath: "/tmp/.deeplake/memory", +}; + +function buildSessionFile( + sessionNumber: number, + turns: SessionTurn[], + dateTime = "8:56 pm on 20 July, 2023", +): FixtureFile { + const session = { + conversation_id: 0, + session_number: sessionNumber, + date_time: dateTime, + speakers: { + speaker_a: "Caroline", + speaker_b: "Melanie", + }, + turns, + }; + return { + path: `/sessions/conv_0_session_${sessionNumber}.json`, + content: `${JSON.stringify(session, null, 2)}\n`, + }; +} + +function rewriteForLocalRoot(command: string, root: string): string { + return command + .replaceAll("/sessions", `${root}/sessions`) + .replaceAll("/summaries", `${root}/summaries`) + .replaceAll("/index.md", `${root}/index.md`); +} + +function runLocalBash(root: string, command: string): string { + const localCommand = rewriteForLocalRoot(command, root); + try { + return execFileSync("/bin/bash", ["-lc", localCommand], { + encoding: "utf8", + }).trim(); + } catch (error: any) { + return String(error?.stdout ?? "").trim(); + } +} + +function writeFixture(files: FixtureFile[]): string { + const root = mkdtempSync(join(tmpdir(), "hivemind-benchmark-replay-")); + for (const file of files) { + const fullPath = join(root, file.path.slice(1)); + mkdirSync(join(fullPath, ".."), { recursive: true }); + writeFileSync(fullPath, file.content); + } + return root; +} + +function makeQueryRows(files: FixtureFile[]) { + return files.map((file) => ({ + path: file.path, + content: file.content, + })); +} + +function likePatternToRegExp(pattern: string): RegExp { + const escaped = pattern.replace(/[.+^${}()|[\]\\]/g, "\\$&"); + return new RegExp(`^${escaped.replaceAll("%", ".*").replaceAll("_", ".")}$`); +} + +async function runVirtualCommand(files: FixtureFile[], command: string): Promise { + const queryRows = makeQueryRows(files); + const grepHandler = async (_api: any, _memory: string, _sessions: string, params: any) => { + const api = { query: vi.fn().mockResolvedValue(queryRows) } as any; + return (await handleGrepDirect(api, "memory", "sessions", params)) ?? ""; + }; + + const compiled = await executeCompiledBashCommand( + { query: vi.fn() } as any, + "memory", + "sessions", + command, + { + readVirtualPathContentsFn: vi.fn(async (_api, _memory, _sessions, paths: string[]) => new Map( + paths.map((path) => [path, files.find((file) => file.path === path)?.content ?? null]), + )) as any, + listVirtualPathRowsForDirsFn: vi.fn(async (_api, _memory, _sessions, dirs: string[]) => new Map( + dirs.map((dir) => [ + dir, + files + .filter((file) => file.path === dir || file.path.startsWith(`${dir.replace(/\/+$/, "")}/`)) + .map((file) => ({ path: file.path, size_bytes: Buffer.byteLength(file.content) })), + ]), + )) as any, + findVirtualPathsFn: vi.fn(async (_api, _memory, _sessions, dir: string, filenamePattern: string) => { + const dirPrefix = dir.replace(/\/+$/, "") || "/"; + const matcher = likePatternToRegExp(filenamePattern); + return files + .filter((file) => file.path.startsWith(`${dirPrefix}/`)) + .map((file) => file.path) + .filter((path) => matcher.test(path.slice(path.lastIndexOf("/") + 1))); + }) as any, + handleGrepDirectFn: grepHandler as any, + }, + ); + if (compiled !== null) return compiled.trim(); + + const grepParams = parseBashGrep(command); + if (!grepParams) { + throw new Error(`Command is neither compiled nor grep-direct: ${command}`); + } + return (await grepHandler(null, "memory", "sessions", grepParams)).trim(); +} + +describe("benchmark replay parity", () => { + const roots: string[] = []; + + afterEach(() => { + while (roots.length > 0) { + rmSync(roots.pop()!, { recursive: true, force: true }); + } + }); + + it("matches raw output for relationship-status grep", async () => { + const files = [ + buildSessionFile(13, [ + { dia_id: "D13:1", speaker: "Caroline", text: "I'm single and planning to adopt as a single parent." }, + { dia_id: "D13:2", speaker: "Caroline", text: "As a transgender woman, the support group changed my life." }, + ]), + ]; + const root = writeFixture(files); + roots.push(root); + + const command = "grep -r -E 'relationship|dating|partner|married|single|girlfriend|boyfriend' /sessions/conv_0_session_13.json"; + const local = runLocalBash(root, command); + const virtual = await runVirtualCommand(files, command); + + expect(local.replaceAll(root, "")).toEqual(virtual); + expect(virtual).toContain('"text": "I\'m single and planning to adopt as a single parent."'); + }); + + it("matches raw output for camping-location grep", async () => { + const files = [ + buildSessionFile(10, [ + { dia_id: "D10:12", speaker: "Melanie", text: "We camped near a mountain lake in a state park last summer." }, + { dia_id: "D10:13", speaker: "Caroline", text: "That sounds beautiful." }, + ]), + ]; + const root = writeFixture(files); + roots.push(root); + + const command = "grep -r -E 'mountain|lake|forest|state|park|location|where|place' /sessions/conv_0_session_10.json"; + const local = runLocalBash(root, command); + const virtual = await runVirtualCommand(files, command); + + expect(local.replaceAll(root, "")).toEqual(virtual); + expect(virtual).toContain('"text": "We camped near a mountain lake in a state park last summer."'); + }); + + it("matches raw output for Dr. Seuss bookshelf grep", async () => { + const files = [ + buildSessionFile(6, [ + { dia_id: "D6:1", speaker: "Melanie", text: "We keep classic kids' books like Dr. Seuss on the bookshelf." }, + { dia_id: "D6:2", speaker: "Caroline", text: "That sounds perfect for the kids." }, + ]), + buildSessionFile(7, [ + { dia_id: "D7:1", speaker: "Caroline", text: "I just started a new counseling course." }, + ]), + ]; + const root = writeFixture(files); + roots.push(root); + + const command = "grep -r -E 'Dr. Seuss|bookshelf|books' /sessions/*.json"; + const local = runLocalBash(root, command); + const virtual = await runVirtualCommand(files, command); + + expect(local.replaceAll(root, "")).toEqual(virtual); + expect(virtual).toContain('"text": "We keep classic kids\' books like Dr. Seuss on the bookshelf."'); + }); + + it("keeps the 18th-birthday shell-loop case explicitly divergent by returning retry guidance", async () => { + const files = [ + buildSessionFile(12, [ + { dia_id: "D12:1", speaker: "Caroline", text: "A friend made it for my 18th birthday ten years ago." }, + { dia_id: "D12:2", speaker: "Melanie", text: "That's really thoughtful." }, + ]), + ]; + const root = writeFixture(files); + roots.push(root); + + const localCommand = "for file in /sessions/conv_0_session_*.json; do echo \"=== $(basename $file) ===\"; grep -i \"age\\|year.*old\\|born\\|birthday\\|turn.*18\" \"$file\" 2>/dev/null | head -3; done | grep -B 1 -i \"age\\|birthday\\|born\""; + const local = runLocalBash(root, localCommand); + expect(local).toContain("18th birthday"); + + const decision = await processPreToolUse({ + session_id: "s1", + tool_name: "Bash", + tool_input: { + command: "for file in ~/.deeplake/memory/sessions/conv_0_session_*.json; do echo \"=== $(basename $file) ===\"; grep -i \"age\\|year.*old\\|born\\|birthday\\|turn.*18\" \"$file\" 2>/dev/null | head -3; done | grep -B 1 -i \"age\\|birthday\\|born\"", + }, + tool_use_id: "tu-bm-q12", + }, { + config: baseConfig as any, + }); + + expect(decision?.command).toContain("RETRY REQUIRED"); + expect(decision?.description).toContain("unsupported command"); + }); + + it("matches raw output for the direct-grep fallback after the blocked 18th-birthday loop", async () => { + const files = [ + buildSessionFile(12, [ + { dia_id: "D12:1", speaker: "Caroline", text: "A friend made it for my 18th birthday ten years ago." }, + { dia_id: "D12:2", speaker: "Melanie", text: "That's really thoughtful." }, + ]), + ]; + const root = writeFixture(files); + roots.push(root); + + const command = "grep -r -i -E 'age|birthday|born.*19|born.*20' /sessions/"; + const local = runLocalBash(root, command); + const virtual = await runVirtualCommand(files, command); + + expect(local.replaceAll(root, "")).toEqual(virtual); + expect(virtual).toContain('"text": "A friend made it for my 18th birthday ten years ago."'); + }); + + it("matches raw output for support-group date searches", async () => { + const files = [ + buildSessionFile(10, [ + { dia_id: "D10:1", speaker: "Caroline", text: "I joined the LGBTQ support group last Tuesday, July 18, 2023." }, + { dia_id: "D10:2", speaker: "Melanie", text: "That sounds like such a good step." }, + ]), + ]; + const root = writeFixture(files); + roots.push(root); + + const command = "grep -r -i -E 'support group|lgbtq support' /sessions/"; + const local = runLocalBash(root, command); + const virtual = await runVirtualCommand(files, command); + + expect(local.replaceAll(root, "")).toEqual(virtual); + expect(virtual).toContain('"text": "I joined the LGBTQ support group last Tuesday, July 18, 2023."'); + }); + + it("matches raw output for move-from-four-years-ago searches", async () => { + const files = [ + buildSessionFile(11, [ + { dia_id: "D11:1", speaker: "Caroline", text: "I moved here from Sweden four years ago." }, + { dia_id: "D11:2", speaker: "Melanie", text: "That must have been a big change." }, + ]), + ]; + const root = writeFixture(files); + roots.push(root); + + const command = "grep -r -i -E 'moved from|four year|4 year|sweden' /sessions/"; + const local = runLocalBash(root, command); + const virtual = await runVirtualCommand(files, command); + + expect(local.replaceAll(root, "")).toEqual(virtual); + expect(virtual).toContain('"text": "I moved here from Sweden four years ago."'); + }); + + it("matches raw output for Melanie activity aggregation searches", async () => { + const files = [ + buildSessionFile(8, [ + { dia_id: "D8:1", speaker: "Melanie", text: "We tried a pottery workshop, went swimming, and planned our annual camping trip." }, + { dia_id: "D8:2", speaker: "Caroline", text: "That sounds like a full summer." }, + ]), + ]; + const root = writeFixture(files); + roots.push(root); + + const command = "grep -r -i -E 'pottery|swimming|camping' /sessions/"; + const local = runLocalBash(root, command); + const virtual = await runVirtualCommand(files, command); + + expect(local.replaceAll(root, "")).toEqual(virtual); + expect(virtual).toContain('"text": "We tried a pottery workshop, went swimming, and planned our annual camping trip."'); + }); + + it("matches raw output for Melanie destress searches", async () => { + const files = [ + buildSessionFile(9, [ + { dia_id: "D9:1", speaker: "Melanie", text: "Running helps me destress after busy weeks." }, + { dia_id: "D9:2", speaker: "Caroline", text: "That makes sense." }, + ]), + ]; + const root = writeFixture(files); + roots.push(root); + + const command = "grep -r -i -E 'destress|stress|running|painting' /sessions/"; + const local = runLocalBash(root, command); + const virtual = await runVirtualCommand(files, command); + + expect(local.replaceAll(root, "")).toEqual(virtual); + expect(virtual).toContain('"text": "Running helps me destress after busy weeks."'); + }); + + it("matches raw output for q23-style summary markdown searches", async () => { + const files: FixtureFile[] = [ + { + path: "/summaries/locomo/conv_0_session_6_summary.md", + content: [ + "# Session 6", + "## Searchable Facts", + "- Melanie said Charlotte's Web was her favorite book as a child.", + "- The family keeps classic kids' books on the bookshelf.", + "", + ].join("\n"), + }, + { + path: "/summaries/locomo/conv_0_session_7_summary.md", + content: [ + "# Session 7", + "## Searchable Facts", + "- Caroline started a new counseling course.", + "", + ].join("\n"), + }, + ]; + const root = writeFixture(files); + roots.push(root); + + const command = "grep -r -i -E 'book|read' /summaries/locomo/conv_0_session_*.md"; + const local = runLocalBash(root, command); + const virtual = await runVirtualCommand(files, command); + + expect(local.replaceAll(root, "")).toEqual(virtual); + expect(local).toContain("Charlotte's Web"); + }); +}); diff --git a/claude-code/tests/grep-core.test.ts b/claude-code/tests/grep-core.test.ts index 2a9a409..6c4623a 100644 --- a/claude-code/tests/grep-core.test.ts +++ b/claude-code/tests/grep-core.test.ts @@ -1,15 +1,18 @@ import { describe, it, expect, vi } from "vitest"; import { buildGrepSearchOptions, + buildSummaryBm25QueryText, normalizeContent, buildPathFilter, buildPathFilterForTargets, compileGrepRegex, extractRegexAlternationPrefilters, extractRegexLiteralPrefilter, + normalizeGrepRegexPattern, refineGrepMatches, searchDeeplakeTables, grepBothTables, + toSqlRegexPattern, } from "../../src/shell/grep-core.js"; // ── normalizeContent ──────────────────────────────────────────────────────── @@ -36,7 +39,7 @@ describe("normalizeContent: passthrough for non-session paths", () => { }); }); -describe("normalizeContent: turn-array session shape", () => { +describe("normalizeContent: transcript session shape", () => { const raw = JSON.stringify({ date_time: "1:56 pm on 8 May, 2023", speakers: { speaker_a: "Avery", speaker_b: "Jordan" }, @@ -46,77 +49,82 @@ describe("normalizeContent: turn-array session shape", () => { ], }); - it("emits date and speakers header", () => { + it("pretty-prints transcript JSON", () => { const out = normalizeContent("/sessions/alice/chat_1.json", raw); - expect(out).toContain("date: 1:56 pm on 8 May, 2023"); - expect(out).toContain("speakers: Avery, Jordan"); + expect(out).toBe(`${JSON.stringify(JSON.parse(raw), null, 2)}\n`); }); - it("emits one line per turn with dia_id tag", () => { + it("preserves dia_id and turn text in the raw JSON view", () => { const out = normalizeContent("/sessions/alice/chat_1.json", raw); - expect(out).toContain("[D1:1] Avery: Hey Jordan!"); - expect(out).toContain("[D1:2] Jordan: Hi Avery."); + expect(out).toContain('"dia_id": "D1:1"'); + expect(out).toContain('"text": "Hey Jordan!"'); + expect(out).toContain('"speaker": "Jordan"'); }); - it("falls back gracefully on turns without speaker/text", () => { + it("keeps sparse turns as canonical JSON", () => { const weird = JSON.stringify({ turns: [{}, { speaker: "X" }] }); const out = normalizeContent("/sessions/alice/chat_1.json", weird); - // Must not crash; includes placeholder `?` for missing speaker - expect(out).toContain("?: "); - expect(out).toContain("X: "); + expect(out).toBe(`${JSON.stringify(JSON.parse(weird), null, 2)}\n`); }); - it("omits speakers header when both speaker fields are empty", () => { + it("preserves empty speaker metadata instead of synthesizing headers", () => { const raw = JSON.stringify({ turns: [{ speaker: "A", text: "hi" }], speakers: { speaker_a: "", speaker_b: "" }, }); const out = normalizeContent("/sessions/alice/chat_1.json", raw); - expect(out).not.toContain("speakers:"); - expect(out).toContain("A: hi"); + expect(out).toContain('"speaker_a": ""'); + expect(out).toContain('"text": "hi"'); }); - it("emits only speaker_a when speaker_b is missing", () => { + it("preserves single-speaker metadata", () => { const raw = JSON.stringify({ turns: [{ speaker: "A", text: "hi" }], speakers: { speaker_a: "Alice" }, }); const out = normalizeContent("/sessions/alice/chat_1.json", raw); - expect(out).toContain("speakers: Alice"); + expect(out).toContain('"speaker_a": "Alice"'); }); - it("falls back speaker->name when speaker field is absent on a turn", () => { + it("keeps alternate turn keys in the raw JSON view", () => { const raw = JSON.stringify({ turns: [{ name: "Avery", text: "hi" }] }); const out = normalizeContent("/sessions/alice/chat_1.json", raw); - expect(out).toContain("Avery: hi"); + expect(out).toContain('"name": "Avery"'); }); - it("falls back text->content when text field is absent on a turn", () => { + it("keeps content fallback fields in the raw JSON view", () => { const raw = JSON.stringify({ turns: [{ speaker: "X", content: "fallback" }] }); const out = normalizeContent("/sessions/alice/chat_1.json", raw); - expect(out).toContain("X: fallback"); + expect(out).toContain('"content": "fallback"'); }); - it("omits dia_id prefix when the turn has no dia_id", () => { + it("leaves missing dia_id fields absent", () => { const raw = JSON.stringify({ turns: [{ speaker: "A", text: "hi" }] }); const out = normalizeContent("/sessions/alice/chat_1.json", raw); - expect(out).toContain("A: hi"); - expect(out).not.toMatch(/\[\]/); + expect(out).toContain('"speaker": "A"'); + expect(out).not.toContain('"dia_id"'); }); - it("emits turns without date/speakers when both are missing", () => { + it("keeps transcript rows without date or speakers", () => { const raw = JSON.stringify({ turns: [{ speaker: "A", text: "hi" }] }); const out = normalizeContent("/sessions/alice/chat_1.json", raw); expect(out).not.toContain("date:"); expect(out).not.toContain("speakers:"); - expect(out).toContain("A: hi"); + expect(out).toContain('"speaker": "A"'); }); - it("returns raw when turns produce an empty serialization", () => { + it("pretty-prints empty transcript arrays", () => { const empty = JSON.stringify({ turns: [] }); - // No header, no turns → trimmed output is empty → fallback to raw const out = normalizeContent("/sessions/alice/chat_1.json", empty); - expect(out).toBe(empty); + expect(out).toBe(`${JSON.stringify(JSON.parse(empty), null, 2)}\n`); + }); + + it("pretty-prints dialogue-array transcripts too", () => { + const dialogue = JSON.stringify({ + dialogue: [{ speaker: "Melanie", text: "camping next month" }], + }); + const out = normalizeContent("/sessions/conv_0_session_2.json", dialogue); + expect(out).toBe(`${JSON.stringify(JSON.parse(dialogue), null, 2)}\n`); }); }); @@ -627,6 +635,26 @@ describe("searchDeeplakeTables", () => { expect(sql).toContain("UNION ALL"); }); + it("uses text BM25 operator for summary searches before ILIKE fallback", async () => { + const api = { + query: vi.fn().mockImplementationOnce(async () => []), + ensureSummaryBm25Index: vi.fn().mockResolvedValue(undefined), + } as any; + await searchDeeplakeTables(api, "memory", "sessions", { + pathFilter: " AND (path = '/x' OR path LIKE '/x/%')", + contentScanOnly: false, + likeOp: "ILIKE", + escapedPattern: "book", + bm25QueryText: "book novel literature", + limit: 50, + }); + expect(api.ensureSummaryBm25Index).toHaveBeenCalledWith("memory"); + expect(api.query).toHaveBeenCalledTimes(1); + const sql = api.query.mock.calls[0][0] as string; + expect(sql).toContain("ORDER BY (summary <#> 'book novel literature') DESC"); + expect(sql).toContain('FROM "sessions"'); + }); + it("skips LIKE filter when contentScanOnly is true (regex-in-memory mode)", async () => { const api = mockApi([]); await searchDeeplakeTables(api, "m", "s", { @@ -638,6 +666,7 @@ describe("searchDeeplakeTables", () => { const sql = api.query.mock.calls[0][0] as string; expect(sql).not.toContain("summary::text LIKE"); expect(sql).not.toContain("message::text LIKE"); + expect(sql).not.toContain("message::text ~"); }); it("uses a safe literal prefilter for regex scans when available", async () => { @@ -647,27 +676,123 @@ describe("searchDeeplakeTables", () => { contentScanOnly: true, likeOp: "LIKE", escapedPattern: "foo.*bar", + regexPattern: "foo.*bar", prefilterPattern: "foo", }); const sql = api.query.mock.calls[0][0] as string; - expect(sql).toContain("summary::text LIKE '%foo%'"); - expect(sql).toContain("message::text LIKE '%foo%'"); + expect(sql).toContain("summary::text ~ 'foo.*bar'"); + expect(sql).toContain("message::text ~ 'foo.*bar'"); + expect(sql).toContain("LIKE '%foo%'"); }); - it("expands alternation prefilters into OR clauses instead of literal pipes", async () => { + it("uses regex predicates for alternation patterns instead of literal pipes", async () => { const api = mockApi([]); await searchDeeplakeTables(api, "m", "s", { pathFilter: "", contentScanOnly: true, likeOp: "LIKE", escapedPattern: "relationship|partner|married", + regexPattern: "relationship|partner|married", prefilterPatterns: ["relationship", "partner", "married"], }); const sql = api.query.mock.calls[0][0] as string; - expect(sql).toContain("summary::text LIKE '%relationship%'"); - expect(sql).toContain("summary::text LIKE '%partner%'"); - expect(sql).toContain("summary::text LIKE '%married%'"); - expect(sql).not.toContain("relationship|partner|married"); + expect(sql).toContain("summary::text ~ 'relationship|partner|married'"); + expect(sql).toContain("message::text ~ 'relationship|partner|married'"); + }); + + it("skips SQL regex pushdown for ignore-case regex scans", async () => { + const api = mockApi([]); + await searchDeeplakeTables(api, "m", "s", { + pathFilter: "", + contentScanOnly: true, + likeOp: "ILIKE", + escapedPattern: "relationship|partner|married", + regexPattern: "relationship|partner|married", + prefilterPatterns: ["relationship", "partner", "married"], + }); + const sql = api.query.mock.calls[0][0] as string; + expect(sql).toContain("summary::text ILIKE '%relationship%'"); + expect(sql).not.toContain("summary::text ~"); + expect(sql).not.toContain("message::text ~"); + }); + + it("uses OR ILIKE prefilters for grep BRE alternation patterns", async () => { + const api = mockApi([]); + const opts = buildGrepSearchOptions({ + pattern: "book\\|novel\\|literature", + ignoreCase: true, + wordMatch: false, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }, "/"); + await searchDeeplakeTables(api, "m", "s", opts); + const sql = api.query.mock.calls[0][0] as string; + expect(sql).toContain("ORDER BY (summary <#> 'book novel literature') DESC"); + expect(sql).toContain("message::text"); + expect(sql).toContain("ILIKE '%book%'"); + expect(sql).toContain("ILIKE '%novel%'"); + expect(sql).toContain("ILIKE '%literature%'"); + expect(sql).not.toContain("ILIKE '%book|novel|literature%'"); + }); + + it("keeps unsupported bracketed regex patterns out of SQL pushdown", async () => { + const api = mockApi([]); + await searchDeeplakeTables(api, "m", "s", { + pathFilter: " AND path = '/index.md'", + contentScanOnly: true, + likeOp: "LIKE", + escapedPattern: "^- [conv_0_session_.*\\]", + regexPattern: "^- [conv_0_session_.*\\]", + }); + const sql = api.query.mock.calls[0][0] as string; + expect(sql).toContain("path = '/index.md'"); + expect(sql).not.toContain("summary::text ~"); + expect(sql).not.toContain("message::text ~"); + }); + + it("falls back to OR LIKE prefilters when regex SQL is rejected", async () => { + const api = { + query: vi.fn() + .mockRejectedValueOnce(new Error("regex operator not supported")) + .mockResolvedValueOnce([]), + } as any; + await searchDeeplakeTables(api, "m", "s", { + pathFilter: "", + contentScanOnly: true, + likeOp: "LIKE", + escapedPattern: "relationship|partner|married", + regexPattern: "relationship|partner|married", + prefilterPatterns: ["relationship", "partner", "married"], + }); + expect(api.query).toHaveBeenCalledTimes(2); + const fallbackSql = api.query.mock.calls[1][0] as string; + expect(fallbackSql).toContain("summary::text LIKE '%relationship%'"); + expect(fallbackSql).toContain("summary::text LIKE '%partner%'"); + expect(fallbackSql).toContain("summary::text LIKE '%married%'"); + expect(fallbackSql).not.toContain("relationship|partner|married"); + }); + + it("falls back to summary ILIKE when BM25 query is rejected", async () => { + const api = { + query: vi.fn() + .mockRejectedValueOnce(new Error("bm25 operator not supported")) + .mockResolvedValueOnce([]), + ensureSummaryBm25Index: vi.fn().mockResolvedValue(undefined), + } as any; + await searchDeeplakeTables(api, "m", "s", { + pathFilter: "", + contentScanOnly: false, + likeOp: "ILIKE", + escapedPattern: "book", + bm25QueryText: "book novel literature", + }); + expect(api.query).toHaveBeenCalledTimes(2); + const fallbackSql = api.query.mock.calls[1][0] as string; + expect(fallbackSql).toContain("summary::text ILIKE '%book%'"); + expect(fallbackSql).not.toContain("<#>"); }); it("concatenates rows from both tables into {path, content}", async () => { @@ -704,6 +829,7 @@ describe("searchDeeplakeTables", () => { const api = { query: vi.fn() .mockRejectedValueOnce(new Error("bad union")) + .mockRejectedValueOnce(new Error("bad union")), } as any; await expect(searchDeeplakeTables(api, "m", "s", { pathFilter: "", contentScanOnly: false, likeOp: "LIKE", escapedPattern: "x", @@ -717,7 +843,25 @@ describe("searchDeeplakeTables", () => { pathFilter: "", contentScanOnly: false, likeOp: "LIKE", escapedPattern: "x", }); const sql = api.query.mock.calls[0][0] as string; - expect(sql).toContain("LIMIT 100"); + expect(sql).toContain("LIMIT 500"); + }); + + it("queries only the sessions table in sessions-only mode", async () => { + const prev = process.env.HIVEMIND_SESSIONS_ONLY; + process.env.HIVEMIND_SESSIONS_ONLY = "1"; + try { + const api = { query: vi.fn().mockResolvedValue([]) } as any; + await searchDeeplakeTables(api, "m", "s", { + pathFilter: "", contentScanOnly: false, likeOp: "ILIKE", escapedPattern: "foo", + }); + const sql = api.query.mock.calls[0][0] as string; + expect(sql).not.toContain('FROM "m"'); + expect(sql).toContain('FROM "s"'); + expect(sql).not.toContain("UNION ALL"); + } finally { + if (prev === undefined) delete process.env.HIVEMIND_SESSIONS_ONLY; + else process.env.HIVEMIND_SESSIONS_ONLY = prev; + } }); }); @@ -754,7 +898,7 @@ describe("grepBothTables", () => { expect(out.length).toBe(1); }); - it("normalizes session JSON before refinement (turn-array sessions)", async () => { + it("greps against canonical raw JSON for transcript sessions", async () => { const sessionContent = JSON.stringify({ turns: [ { dia_id: "D1:1", speaker: "Alice", text: "project foo update" }, @@ -766,8 +910,7 @@ describe("grepBothTables", () => { .mockResolvedValueOnce([{ path: "/sessions/alice/chat_1.json", content: sessionContent }]), } as any; const out = await grepBothTables(api, "m", "s", baseParams, "/"); - // Only the matching turn is returned, not the whole JSON blob - expect(out.some(l => l.includes("[D1:1] Alice: project foo update"))).toBe(true); + expect(out.some(l => l.includes('"text": "project foo update"'))).toBe(true); expect(out.some(l => l.includes("unrelated"))).toBe(false); }); @@ -783,7 +926,8 @@ describe("grepBothTables", () => { const api = mockApi([{ path: "/a", content: "foo middle bar" }]); await grepBothTables(api, "m", "s", { ...baseParams, pattern: "foo.*bar" }, "/"); const [sql] = api.query.mock.calls.map((c: unknown[]) => c[0] as string); - expect(sql).toContain("summary::text LIKE '%foo%'"); + expect(sql).toContain("ORDER BY (summary <#> 'foo') DESC"); + expect(sql).toContain("message::text LIKE '%foo%'"); }); it("routes to ILIKE when ignoreCase is set", async () => { @@ -842,7 +986,9 @@ describe("regex literal prefilter", () => { expect(opts.contentScanOnly).toBe(true); expect(opts.likeOp).toBe("ILIKE"); + expect(opts.regexPattern).toBe("foo.*bar"); expect(opts.prefilterPattern).toBe("foo"); + expect(opts.bm25QueryText).toBe("foo"); expect(opts.pathFilter).toContain("/summaries"); }); @@ -865,14 +1011,44 @@ describe("regex literal prefilter", () => { }, "/summaries"); expect(opts.contentScanOnly).toBe(true); + expect(opts.regexPattern).toBe("relationship|partner|married"); expect(opts.prefilterPatterns).toEqual(["relationship", "partner", "married"]); + expect(opts.bm25QueryText).toBe("relationship partner married"); + }); + + it("unwraps simple grouping around alternations", () => { + expect(extractRegexAlternationPrefilters("(foo|bar)")).toEqual(["foo", "bar"]); + expect(extractRegexAlternationPrefilters("(?:foo|bar)")).toEqual(["foo", "bar"]); + }); + + it("normalizes grep BRE alternation before building search options", () => { + expect(normalizeGrepRegexPattern("book\\|novel\\|literature")).toBe("book|novel|literature"); + + const opts = buildGrepSearchOptions({ + pattern: "book\\|novel\\|literature", + ignoreCase: true, + wordMatch: false, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }, "/summaries"); + + expect(opts.contentScanOnly).toBe(true); + expect(opts.regexPattern).toBe("book|novel|literature"); + expect(opts.prefilterPatterns).toEqual(["book", "novel", "literature"]); + expect(opts.bm25QueryText).toBe("book novel literature"); }); it("rejects alternation prefilters when grouping makes them unsafe", () => { - expect(extractRegexAlternationPrefilters("(foo|bar)")).toBeNull(); expect(extractRegexAlternationPrefilters("foo|bar.*baz")).toEqual(["foo", "bar"]); }); + it("extracts literals through word-boundary escapes", () => { + expect(extractRegexLiteralPrefilter("\\bcountry\\b")).toBe("country"); + }); + it("preserves escaped alternation characters inside a literal branch", () => { expect(extractRegexAlternationPrefilters("foo\\|bar|baz")).toEqual(["foo|bar", "baz"]); expect(extractRegexAlternationPrefilters("foo|bar\\.md")).toEqual(["foo", "bar.md"]); @@ -891,7 +1067,215 @@ describe("regex literal prefilter", () => { }, "/summaries/alice/s1.md"); expect(opts.contentScanOnly).toBe(false); + expect(opts.regexPattern).toBeUndefined(); expect(opts.prefilterPattern).toBeUndefined(); + expect(opts.bm25QueryText).toBe("foo bar"); expect(opts.pathFilter).toBe(" AND path = '/summaries/alice/s1.md'"); }); + + it("builds BM25 query text from regex literals conservatively", () => { + expect(buildSummaryBm25QueryText("home country", false, null, null)).toBe("home country"); + expect(buildSummaryBm25QueryText("book|novel|literature", false, null, ["book", "novel", "literature"])).toBe("book novel literature"); + expect(buildSummaryBm25QueryText(".*", false, null, null)).toBeNull(); + }); + + it("builds SQL-safe regex patterns conservatively", () => { + expect(toSqlRegexPattern("foo.*bar", false)).toBe("foo.*bar"); + expect(toSqlRegexPattern("foo.*bar", true)).toBeNull(); + expect(toSqlRegexPattern("^- [conv_0_session_.*\\]", false)).toBe("\\^- \\[conv_0_session_\\.\\*\\\\\\]"); + expect(toSqlRegexPattern("\\bitem\\d+", false)).toBe("\\yitem[[:digit:]]+"); + expect(toSqlRegexPattern("foo(?=bar)", false)).toBeNull(); + }); + + it("compiles grep BRE alternation as real alternation", () => { + const re = compileGrepRegex({ + pattern: "book\\|novel\\|literature", + ignoreCase: true, + wordMatch: false, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }); + + expect(re.test("She loves literature")).toBe(true); + expect(re.test("A novel inspired her")).toBe(true); + expect(re.test("No match here")).toBe(false); + }); + + describe("benchmark parity", () => { + function mockQueryRows(rows: { path: string; content: string }[]) { + return { query: vi.fn().mockResolvedValueOnce(rows) } as any; + } + + async function runParityCase( + rows: { path: string; content: string }[], + params: { + pattern: string; + ignoreCase: boolean; + wordMatch: boolean; + filesOnly: boolean; + countOnly: boolean; + lineNumber: boolean; + invertMatch: boolean; + fixedString: boolean; + }, + targetPath = "/", + ) { + const api = mockQueryRows(rows); + const remote = await grepBothTables(api, "memory", "sessions", params, targetPath); + const local = refineGrepMatches( + rows.map((row) => ({ path: row.path, content: normalizeContent(row.path, row.content) })), + params, + ); + return { api, remote, local }; + } + + it("matches local grep for LoCoMo-style relationship status lookups", async () => { + const rows = [ + { + path: "/summaries/locomo/conv_0_session_3_summary.md", + content: "## Searchable Facts\n- Relationship status: Single.\n- Caroline is researching adoption agencies.\n", + }, + { + path: "/sessions/conv_0_session_3.json", + content: JSON.stringify({ + turns: [ + { dia_id: "D1:1", speaker: "Caroline", text: "I'm single and still planning to adopt on my own." }, + { dia_id: "D1:2", speaker: "Melanie", text: "That sounds like a good plan." }, + ], + }), + }, + ]; + + const params = { + pattern: "single", + ignoreCase: true, + wordMatch: true, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }; + + const { api, remote, local } = await runParityCase(rows, params, "/summaries"); + expect(remote).toEqual(local); + const sql = api.query.mock.calls[0][0] as string; + expect(sql).toContain("ORDER BY (summary <#> 'single') DESC"); + expect(sql).toContain("message::text ILIKE '%single%'"); + }); + + it("matches local grep for LoCoMo-style title and reading mentions", async () => { + const rows = [ + { + path: "/sessions/conv_0_session_6.json", + content: JSON.stringify({ + turns: [ + { dia_id: "D6:1", speaker: "Melanie", text: "Charlotte's Web was my favorite book as a kid." }, + { dia_id: "D6:2", speaker: "Caroline", text: "I can recommend another book if you want." }, + ], + }), + }, + ]; + + const params = { + pattern: "Charlotte\\|book", + ignoreCase: true, + wordMatch: false, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }; + + const { api, remote, local } = await runParityCase(rows, params, "/sessions"); + expect(remote).toEqual(local); + expect(remote.some((line) => line.includes("Charlotte's Web"))).toBe(true); + const sql = api.query.mock.calls[0][0] as string; + expect(sql).toContain("ILIKE '%Charlotte%'"); + expect(sql).toContain("ILIKE '%book%'"); + }); + + it("matches local grep for LoCoMo-style relative-time phrases", async () => { + const rows = [ + { + path: "/sessions/conv_0_session_12.json", + content: JSON.stringify({ + turns: [ + { dia_id: "D12:1", speaker: "Caroline", text: "A friend made it for my 18th birthday ten years ago." }, + { dia_id: "D12:2", speaker: "Melanie", text: "That's really thoughtful." }, + ], + }), + }, + ]; + + const params = { + pattern: "ten years ago", + ignoreCase: true, + wordMatch: false, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }; + + const { api, remote, local } = await runParityCase(rows, params, "/sessions"); + expect(remote).toEqual(local); + const sql = api.query.mock.calls[0][0] as string; + expect(sql).toContain("message::text ILIKE '%ten years ago%'"); + }); + + it("avoids SQL regex pushdown for bracket-anchored patterns and matches the raw-json local output", async () => { + const rows = [ + { + path: "/sessions/conv_0_session_2.json", + content: JSON.stringify({ + turns: [ + { dia_id: "D2:1", speaker: "Melanie", text: "We're thinking about going camping next month." }, + { dia_id: "D2:2", speaker: "Caroline", text: "That should be fun." }, + ], + }), + }, + ]; + + const params = { + pattern: "^\\[D2:1\\]", + ignoreCase: false, + wordMatch: false, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }; + + const { api, remote, local } = await runParityCase(rows, params, "/sessions"); + expect(remote).toEqual(local); + expect(remote).toEqual([]); + const sql = api.query.mock.calls[0][0] as string; + expect(sql).not.toContain("summary::text ~"); + expect(sql).not.toContain("message::text ~"); + }); + }); + + it("compiles word-boundary alternation with grouping", () => { + const re = compileGrepRegex({ + pattern: "book\\|novel", + ignoreCase: true, + wordMatch: true, + filesOnly: false, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }); + + expect(re.test("book club")).toBe(true); + expect(re.test("graphic novel")).toBe(true); + expect(re.test("storybook")).toBe(false); + }); }); diff --git a/claude-code/tests/grep-direct.test.ts b/claude-code/tests/grep-direct.test.ts index 0f56c9a..9e5e3fd 100644 --- a/claude-code/tests/grep-direct.test.ts +++ b/claude-code/tests/grep-direct.test.ts @@ -3,7 +3,7 @@ import { parseBashGrep, handleGrepDirect, type GrepParams } from "../../src/hook describe("handleGrepDirect", () => { const baseParams: GrepParams = { - pattern: "foo", targetPath: "/", + pattern: "foo", targetPath: "/", recursive: false, ignoreCase: false, wordMatch: false, filesOnly: false, countOnly: false, lineNumber: false, invertMatch: false, fixedString: false, }; @@ -188,7 +188,7 @@ describe("parseBashGrep", () => { it("parses combined flags -ri", () => { const r = parseBashGrep("grep -ri 'pattern' /dir"); expect(r!.ignoreCase).toBe(true); - // -r is no-op (recursive implied) + expect(r!.recursive).toBe(true); }); it("parses combined flags -wni", () => { @@ -201,6 +201,7 @@ describe("parseBashGrep", () => { it("parses -rl flags", () => { const r = parseBashGrep("grep -rl 'pattern' /dir"); expect(r!.filesOnly).toBe(true); + expect(r!.recursive).toBe(true); }); // ── Variants ── diff --git a/claude-code/tests/grep-interceptor.test.ts b/claude-code/tests/grep-interceptor.test.ts index ba7e67b..83c44bd 100644 --- a/claude-code/tests/grep-interceptor.test.ts +++ b/claude-code/tests/grep-interceptor.test.ts @@ -23,12 +23,12 @@ function makeCtx(fs: DeeplakeFs, cwd = "/memory") { // ── Tests ───────────────────────────────────────────────────────────────────── // -// The interceptor now queries both `memory` and `sessions` in parallel with -// LIKE/ILIKE (no more BM25 — the `<#>` query returned 400 on every call), -// and each SQL row returns { path, content } so we no longer need a -// prefetch round-trip to read file content for the regex pass. Prefetch is -// only used as a fallback when SQL returns zero rows and we scan the FS -// cache. Tests below assert that new contract. +// The interceptor now queries both `memory` and `sessions` through grep-core. +// Summary retrieval may use BM25 (`<#>`) while sessions keep LIKE/ILIKE +// filtering, and each SQL row returns { path, content } so we no longer need +// a prefetch round-trip to read file content for the regex pass. Prefetch is +// only used as a fallback when SQL returns zero rows and we scan the FS cache. +// Tests below assert that contract. describe("grep interceptor", () => { it("returns exitCode=1 when the pattern is missing", async () => { @@ -78,8 +78,7 @@ describe("grep interceptor", () => { const sqls = client.query.mock.calls.map((c: unknown[]) => c[0] as string); expect(sqls.some(s => /FROM "test"/.test(s) && /ILIKE|LIKE/.test(s))).toBe(true); expect(sqls.some(s => /FROM "sessions"/.test(s) && /ILIKE|LIKE/.test(s))).toBe(true); - // No BM25 in the new path - expect(sqls.some(s => s.includes("<#>"))).toBe(false); + expect(sqls.some(s => s.includes("<#>"))).toBe(true); expect(result.stdout).toContain("hello world"); expect(result.exitCode).toBe(0); }); diff --git a/claude-code/tests/hooks-source.test.ts b/claude-code/tests/hooks-source.test.ts index 10c4595..4e576c4 100644 --- a/claude-code/tests/hooks-source.test.ts +++ b/claude-code/tests/hooks-source.test.ts @@ -254,6 +254,8 @@ describe("claude pre-tool source", () => { expect(touchesMemory("cat ~/.deeplake/memory/index.md")).toBe(true); expect(rewritePaths("cat ~/.deeplake/memory/index.md")).toBe("cat /index.md"); expect(isSafe("cat /index.md | head -20")).toBe(true); + expect(isSafe("find /sessions -name '*.json' -exec grep -l 'Melanie' {} \\; 2>/dev/null | head -10")).toBe(true); + expect(isSafe("for file in /sessions/conv_0_session_*.json; do echo \"=== $(basename $file) ===\"; grep -i \"age\\|birthday\\|born\" \"$file\" 2>/dev/null | head -3; done | grep -B 1 -i \"age\\|birthday\\|born\"")).toBe(false); expect(isSafe("python3 -c 'print(1)' /index.md")).toBe(false); }); @@ -302,6 +304,38 @@ describe("claude pre-tool source", () => { expect(passthrough).toBeNull(); }); + it("keeps benchmark-style find -exec grep pipelines on the compiled path and rejects shell-loop variants", async () => { + const compiled = await processPreToolUse({ + session_id: "s1", + tool_name: "Bash", + tool_input: { + command: "find ~/.deeplake/memory/sessions -name '*.json' -exec grep -l 'Melanie' {} \\; 2>/dev/null | head -10", + }, + tool_use_id: "tu-bm-1", + }, { + config: baseConfig, + executeCompiledBashCommandFn: vi.fn(async (_api, _table, _sessions, cmd) => { + expect(cmd).toBe("find /sessions -name '*.json' -exec grep -l 'Melanie' {} \\; 2>/dev/null | head -10"); + return "/sessions/conv_0_session_2.json"; + }) as any, + }); + expect(compiled?.command).toContain("/sessions/conv_0_session_2.json"); + expect(compiled?.description).toContain("DeepLake compiled"); + + const guidance = await processPreToolUse({ + session_id: "s1", + tool_name: "Bash", + tool_input: { + command: "for file in ~/.deeplake/memory/sessions/conv_0_session_*.json; do echo \"=== $(basename $file) ===\"; grep -i \"age\\|birthday\\|born\" \"$file\" 2>/dev/null | head -3; done | grep -B 1 -i \"age\\|birthday\\|born\"", + }, + tool_use_id: "tu-bm-2", + }, { + config: baseConfig, + }); + expect(guidance?.command).toContain("RETRY REQUIRED"); + expect(guidance?.description).toContain("unsupported command"); + }); + it("uses direct grep, direct reads, listings, finds, and shell fallback", async () => { const grepDecision = await processPreToolUse({ session_id: "s1", @@ -595,6 +629,25 @@ describe("claude session start source", () => { expect(context).not.toContain("Hivemind v"); }); + it("switches to sessions-only guidance when the env flag is set", () => { + const prev = process.env.HIVEMIND_SESSIONS_ONLY; + process.env.HIVEMIND_SESSIONS_ONLY = "1"; + try { + const context = buildSessionStartAdditionalContext({ + authCommand: "/tmp/auth-login.js", + creds: baseCreds, + currentVersion: null, + latestVersion: null, + }); + expect(context).toContain("SESSIONS-ONLY mode"); + expect(context).toContain("do NOT start with index.md or summaries"); + expect(context).not.toContain("Always read index.md first"); + } finally { + if (prev === undefined) delete process.env.HIVEMIND_SESSIONS_ONLY; + else process.env.HIVEMIND_SESSIONS_ONLY = prev; + } + }); + it("logs authenticated startup without backfilling when the username is already present", async () => { const logFn = vi.fn(); const save = vi.fn(); diff --git a/claude-code/tests/sessions-table.test.ts b/claude-code/tests/sessions-table.test.ts index 40a254f..c1d0553 100644 --- a/claude-code/tests/sessions-table.test.ts +++ b/claude-code/tests/sessions-table.test.ts @@ -77,6 +77,32 @@ function makeClient(memoryRows: Row[] = [], sessionRows: Row[] = []) { // ── Tests ─────────────────────────────────────────────────────────────────── describe("DeeplakeFs — sessions table multi-row read", () => { + it("reads transcript-shaped session files as canonical pretty JSON", async () => { + const transcript = { + conversation_id: 0, + session_number: 6, + turns: [ + { dia_id: "D6:1", speaker: "Melanie", text: "Charlotte's Web was my favorite book as a kid." }, + { dia_id: "D6:2", speaker: "Caroline", text: "I can recommend another book if you want." }, + ], + }; + const sessionRows: Row[] = [ + { + path: "/sessions/conv_0_session_6.json", + text_content: JSON.stringify(transcript), + size_bytes: 128, + mime_type: "application/json", + creation_date: "2026-01-01T00:00:01Z", + }, + ]; + + const client = makeClient([], sessionRows); + const fs = await DeeplakeFs.create(client as never, "memory", "/", "sessions"); + + const content = await fs.readFile("/sessions/conv_0_session_6.json"); + expect(content).toBe(`${JSON.stringify(transcript, null, 2)}\n`); + }); + it("reads session file by normalizing rows ordered by creation_date", async () => { const sessionRows: Row[] = [ { path: "/sessions/alice/alice_org_default_s1.jsonl", text_content: '{"type":"user_message","content":"hello"}', size_bytes: 40, mime_type: "application/json", creation_date: "2026-01-01T00:00:01Z" }, diff --git a/claude-code/tests/virtual-table-query.test.ts b/claude-code/tests/virtual-table-query.test.ts index bcace78..6ca34b7 100644 --- a/claude-code/tests/virtual-table-query.test.ts +++ b/claude-code/tests/virtual-table-query.test.ts @@ -16,10 +16,23 @@ describe("virtual-table-query", () => { project: "repo", description: "session summary", creation_date: "2026-01-01T00:00:00.000Z", + summary: `# Session s1 +- **Source**: /sessions/a/s1.jsonl +- **Date**: 2026-01-01 +- **Participants**: Alice, Bob +- **Topics**: auth, retries + +## Searchable Facts +- Auth tokens refresh automatically. +`, }, ]); expect(content).toContain("# Memory Index"); - expect(content).toContain("/summaries/alice/s1.md"); + expect(content).toContain("## People"); + expect(content).toContain("## Summary To Session Catalog"); + expect(content).toContain("s1.md"); + expect(content).toContain("Alice, Bob"); + expect(content).toContain("[session](/sessions/a/s1.jsonl)"); }); it("builds index rows when project metadata is missing", () => { @@ -28,7 +41,7 @@ describe("virtual-table-query", () => { path: "/summaries/alice/s2.md", }, ]); - expect(content).toContain("/summaries/alice/s2.md"); + expect(content).toContain("s2.md"); expect(content).toContain("# Memory Index"); }); @@ -54,17 +67,36 @@ describe("virtual-table-query", () => { expect(api.query).not.toHaveBeenCalled(); }); - it("normalizes session rows for exact path reads", async () => { + it("pretty-prints transcript session rows for exact path reads", async () => { const api = { query: vi.fn().mockResolvedValueOnce([ - { path: "/sessions/a.jsonl", content: "{\"type\":\"user_message\",\"content\":\"hello\"}", source_order: 1 }, - { path: "/sessions/a.jsonl", content: "{\"type\":\"assistant_message\",\"content\":\"hi\"}", source_order: 1 }, + { + path: "/sessions/a.json", + content: "{\"conversation_id\":0,\"session_number\":1,\"turns\":[{\"speaker\":\"Caroline\",\"text\":\"hello\"},{\"speaker\":\"Melanie\",\"text\":\"hi\"}]}", + source_order: 1, + }, ]), } as any; - const content = await readVirtualPathContent(api, "memory", "sessions", "/sessions/a.jsonl"); - - expect(content).toBe("[user] hello\n[assistant] hi"); + const content = await readVirtualPathContent(api, "memory", "sessions", "/sessions/a.json"); + + expect(content).toBe([ + "{", + " \"conversation_id\": 0,", + " \"session_number\": 1,", + " \"turns\": [", + " {", + " \"speaker\": \"Caroline\",", + " \"text\": \"hello\"", + " },", + " {", + " \"speaker\": \"Melanie\",", + " \"text\": \"hi\"", + " }", + " ]", + "}", + "", + ].join("\n")); }); it("reads multiple exact paths in a single query and synthesizes /index.md when needed", async () => { @@ -79,6 +111,11 @@ describe("virtual-table-query", () => { project: "repo", description: "session summary", creation_date: "2026-01-01T00:00:00.000Z", + summary: `# Session s1 +- **Source**: /sessions/a/s1.jsonl +- **Date**: 2026-01-01 +- **Participants**: Alice, Bob +`, }, ]), } as any; @@ -90,6 +127,66 @@ describe("virtual-table-query", () => { expect(api.query).toHaveBeenCalledTimes(2); }); + it("skips memory and does not synthesize /index.md in sessions-only mode", async () => { + const prev = process.env.HIVEMIND_SESSIONS_ONLY; + process.env.HIVEMIND_SESSIONS_ONLY = "1"; + try { + const api = { + query: vi.fn().mockResolvedValueOnce([ + { + path: "/sessions/a.json", + content: "{\"conversation_id\":0,\"turns\":[{\"speaker\":\"Caroline\",\"text\":\"hello\"}]}", + source_order: 1, + creation_date: "", + }, + ]), + } as any; + + const content = await readVirtualPathContents(api, "memory", "sessions", ["/sessions/a.json", "/index.md"]); + + expect(content.get("/sessions/a.json")).toBe([ + "{", + " \"conversation_id\": 0,", + " \"turns\": [", + " {", + " \"speaker\": \"Caroline\",", + " \"text\": \"hello\"", + " }", + " ]", + "}", + "", + ].join("\n")); + expect(content.get("/index.md")).toBeNull(); + expect(api.query).toHaveBeenCalledTimes(1); + expect(String(api.query.mock.calls[0]?.[0])).not.toContain('FROM "memory"'); + } finally { + if (prev === undefined) delete process.env.HIVEMIND_SESSIONS_ONLY; + else process.env.HIVEMIND_SESSIONS_ONLY = prev; + } + }); + + it("does not synthesize /index.md when index is disabled but still reads summaries", async () => { + const prev = process.env.HIVEMIND_DISABLE_INDEX; + process.env.HIVEMIND_DISABLE_INDEX = "1"; + try { + const api = { + query: vi.fn().mockResolvedValueOnce([ + { path: "/summaries/a.md", content: "summary body", source_order: 0 }, + ]), + } as any; + + const content = await readVirtualPathContents(api, "memory", "sessions", ["/summaries/a.md", "/index.md"]); + + expect(content.get("/summaries/a.md")).toBe("summary body"); + expect(content.get("/index.md")).toBeNull(); + expect(api.query).toHaveBeenCalledTimes(1); + expect(String(api.query.mock.calls[0]?.[0])).not.toContain("'/index.md'"); + } finally { + if (prev === undefined) delete process.env.HIVEMIND_DISABLE_INDEX; + else process.env.HIVEMIND_DISABLE_INDEX = prev; + } + }); + it("ignores invalid exact-read rows before merging content", async () => { const api = { query: vi.fn().mockResolvedValueOnce([ diff --git a/codex/bundle/pre-tool-use.js b/codex/bundle/pre-tool-use.js index a31916a..8d1aad6 100755 --- a/codex/bundle/pre-tool-use.js +++ b/codex/bundle/pre-tool-use.js @@ -290,6 +290,29 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + /** Create the standard BM25 summary index for a memory table. */ + async createSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const indexName = this.buildLookupIndexName(table, "summary_bm25"); + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" USING deeplake_index ("summary")`); + } + /** Ensure the standard BM25 summary index exists, using a local freshness marker to avoid repeated CREATEs. */ + async ensureSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const suffix = "summary_bm25"; + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + try { + await this.createSummaryBm25Index(table); + this.markLookupIndexReady(table, suffix); + } catch (e) { + if (isDuplicateIndexError(e)) { + this.markLookupIndexReady(table, suffix); + return; + } + throw e; + } + } buildLookupIndexName(table, suffix) { return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); } @@ -401,7 +424,28 @@ var DeeplakeApi = class { } }; +// dist/src/utils/retrieval-mode.js +function isSessionsOnlyMode() { + const raw = process.env["HIVEMIND_SESSIONS_ONLY"] ?? process.env["DEEPLAKE_SESSIONS_ONLY"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} +function isIndexDisabled() { + const raw = process.env["HIVEMIND_DISABLE_INDEX"] ?? process.env["DEEPLAKE_DISABLE_INDEX"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} +function isSummaryBm25Disabled() { + const raw = process.env["HIVEMIND_DISABLE_SUMMARY_BM25"] ?? process.env["DEEPLAKE_DISABLE_SUMMARY_BM25"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} + // dist/src/shell/grep-core.js +var DEFAULT_GREP_CANDIDATE_LIMIT = Number(process.env["HIVEMIND_GREP_LIMIT"] ?? process.env["DEEPLAKE_GREP_LIMIT"] ?? 500); +function escapeRegexLiteral(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} +function normalizeGrepRegexPattern(pattern) { + return pattern.replace(/\\([|(){}+?])/g, "$1").replace(/\\/g, "\\b"); +} var TOOL_INPUT_FIELDS = [ "command", "file_path", @@ -564,24 +608,9 @@ function normalizeContent(path, raw) { } catch { return raw; } - if (Array.isArray(obj.turns)) { - const header = []; - if (obj.date_time) - header.push(`date: ${obj.date_time}`); - if (obj.speakers) { - const s = obj.speakers; - const names = [s.speaker_a, s.speaker_b].filter(Boolean).join(", "); - if (names) - header.push(`speakers: ${names}`); - } - const lines = obj.turns.map((t) => { - const sp = String(t?.speaker ?? t?.name ?? "?").trim(); - const tx = String(t?.text ?? t?.content ?? "").replace(/\s+/g, " ").trim(); - const tag = t?.dia_id ? `[${t.dia_id}] ` : ""; - return `${tag}${sp}: ${tx}`; - }); - const out2 = [...header, ...lines].join("\n"); - return out2.trim() ? out2 : raw; + if (Array.isArray(obj.turns) || Array.isArray(obj.dialogue)) { + return `${JSON.stringify(obj, null, 2)} +`; } const stripRecalled = (t) => { const i = t.indexOf(""); @@ -625,14 +654,34 @@ function buildPathCondition(targetPath) { return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%')`; } async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { - const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns } = opts; - const limit = opts.limit ?? 100; + const { pathFilter, contentScanOnly, likeOp, escapedPattern, regexPattern, prefilterPattern, prefilterPatterns, bm25QueryText } = opts; + const limit = opts.limit ?? DEFAULT_GREP_CANDIDATE_LIMIT; const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : [escapedPattern]; - const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns); - const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns); - const memQuery = `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; - const sessQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; - const rows = await api.query(`SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`); + const ignoreCase = likeOp === "ILIKE"; + const likeMemFilter = buildContentFilter("summary::text", likeOp, filterPatterns); + const likeSessFilter = buildContentFilter("message::text", likeOp, filterPatterns); + const regexMemFilter = regexPattern ? buildRegexFilter("summary::text", regexPattern, ignoreCase) : ""; + const regexSessFilter = regexPattern ? buildRegexFilter("message::text", regexPattern, ignoreCase) : ""; + const primarySessFilter = `${likeSessFilter}${regexSessFilter}`; + const fallbackSessFilter = likeSessFilter; + const hasSqlRegexFilter = Boolean(regexMemFilter || regexSessFilter); + const sessionsOnly = isSessionsOnlyMode(); + const useSummaryBm25 = !sessionsOnly && !isSummaryBm25Disabled() && Boolean(bm25QueryText); + const shouldUseFallbackCapablePrimary = useSummaryBm25 || hasSqlRegexFilter; + const ensureSummaryBm25Index = api.ensureSummaryBm25Index; + if (useSummaryBm25 && typeof ensureSummaryBm25Index === "function") { + await ensureSummaryBm25Index.call(api, memoryTable).catch(() => { + }); + } + const buildCombinedQuery = (memFilter, sessFilter, useBm25Summary = false) => { + const memQuery = useBm25Summary ? buildSummaryBm25Query(memoryTable, pathFilter, bm25QueryText ?? "", limit) : `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; + const sessQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; + return sessionsOnly ? `SELECT path, content, source_order, creation_date FROM (${sessQuery}) AS combined ORDER BY path, source_order, creation_date` : `SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`; + }; + const primaryMemFilter = useSummaryBm25 ? "" : `${likeMemFilter}${regexMemFilter}`; + const primaryQuery = buildCombinedQuery(primaryMemFilter, primarySessFilter, useSummaryBm25); + const fallbackQuery = buildCombinedQuery(likeMemFilter, fallbackSessFilter, false); + const rows = shouldUseFallbackCapablePrimary ? await api.query(primaryQuery).catch(() => api.query(fallbackQuery)) : await api.query(primaryQuery); return rows.map((row) => ({ path: String(row["path"]), content: String(row["content"] ?? "") @@ -653,6 +702,10 @@ function extractRegexLiteralPrefilter(pattern) { const next = pattern[i + 1]; if (!next) return null; + if (/[bByYmM<>]/.test(next)) { + i++; + continue; + } if (/[dDsSwWbBAZzGkKpP]/.test(next)) return null; current += next; @@ -679,13 +732,14 @@ function extractRegexLiteralPrefilter(pattern) { return literal.length >= 2 ? literal : null; } function extractRegexAlternationPrefilters(pattern) { - if (!pattern.includes("|")) + const unwrapped = unwrapWholeRegexGroup(pattern); + if (!unwrapped.includes("|")) return null; const parts = []; let current = ""; let escaped = false; - for (let i = 0; i < pattern.length; i++) { - const ch = pattern[i]; + for (let i = 0; i < unwrapped.length; i++) { + const ch = unwrapped[i]; if (escaped) { current += `\\${ch}`; escaped = false; @@ -713,33 +767,177 @@ function extractRegexAlternationPrefilters(pattern) { return literals.length > 0 ? literals : null; } function buildGrepSearchOptions(params, targetPath) { - const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(params.pattern); - const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(params.pattern) : null; - const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(params.pattern) : null; + const normalizedPattern = params.fixedString ? params.pattern : normalizeGrepRegexPattern(params.pattern); + const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(normalizedPattern); + const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(normalizedPattern) : null; + const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(normalizedPattern) : null; + const bm25QueryText = buildSummaryBm25QueryText(normalizedPattern, params.fixedString, literalPrefilter, alternationPrefilters); + const regexBase = params.fixedString ? escapeRegexLiteral(normalizedPattern) : normalizedPattern; + const sqlRegexPattern = params.wordMatch ? `\\b(?:${regexBase})\\b` : hasRegexMeta ? regexBase : void 0; return { pathFilter: buildPathFilter(targetPath), contentScanOnly: hasRegexMeta, likeOp: params.ignoreCase ? "ILIKE" : "LIKE", escapedPattern: sqlLike(params.pattern), + regexPattern: sqlRegexPattern, prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : void 0, - prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)) + prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)), + bm25QueryText: bm25QueryText ?? void 0, + limit: DEFAULT_GREP_CANDIDATE_LIMIT }; } +function buildSummaryBm25QueryText(pattern, fixedString, literalPrefilter, alternationPrefilters) { + const rawTokens = alternationPrefilters && alternationPrefilters.length > 0 ? alternationPrefilters : literalPrefilter ? [literalPrefilter] : [pattern]; + const cleaned = [...new Set(rawTokens.flatMap((token) => token.replace(/\\b/g, " ").replace(/[.*+?^${}()[\]{}|\\]/g, " ").split(/\s+/)).map((token) => token.trim()).filter((token) => token.length >= 2))]; + if (cleaned.length === 0) { + return fixedString && pattern.trim().length >= 2 ? pattern.trim() : null; + } + return cleaned.join(" "); +} function buildContentFilter(column, likeOp, patterns) { + const predicate = buildContentPredicate(column, likeOp, patterns); + return predicate ? ` AND ${predicate}` : ""; +} +function buildRegexFilter(column, pattern, ignoreCase) { + const predicate = buildRegexPredicate(column, pattern, ignoreCase); + return predicate ? ` AND ${predicate}` : ""; +} +function buildSummaryBm25Query(memoryTable, pathFilter, queryText, limit) { + return `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter} ORDER BY (summary <#> '${sqlStr(queryText)}') DESC LIMIT ${limit}`; +} +function toSqlRegexPattern(pattern, ignoreCase) { + if (!pattern) + return null; + if (ignoreCase) + return null; + try { + new RegExp(pattern); + return translateRegexPatternToSql(pattern); + } catch { + return pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + } +} +function isSqlRegexPushdownSafe(pattern) { + return !/[\\[\]{}^$]/.test(pattern) && !/\(\?/.test(pattern); +} +function unwrapWholeRegexGroup(pattern) { + if (!pattern.startsWith("(") || !pattern.endsWith(")")) + return pattern; + let depth = 0; + let escaped = false; + for (let i = 0; i < pattern.length; i++) { + const ch = pattern[i]; + if (escaped) { + escaped = false; + continue; + } + if (ch === "\\") { + escaped = true; + continue; + } + if (ch === "(") + depth++; + if (ch === ")") { + depth--; + if (depth === 0 && i !== pattern.length - 1) + return pattern; + } + } + if (depth !== 0) + return pattern; + if (pattern.startsWith("(?:")) + return pattern.slice(3, -1); + return pattern.slice(1, -1); +} +function translateRegexPatternToSql(pattern) { + let out = ""; + for (let i = 0; i < pattern.length; i++) { + const ch = pattern[i]; + if (ch === "\\") { + const next = pattern[i + 1]; + if (!next) + return pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + i++; + switch (next) { + case "d": + out += "[[:digit:]]"; + continue; + case "D": + out += "[^[:digit:]]"; + continue; + case "s": + out += "[[:space:]]"; + continue; + case "S": + out += "[^[:space:]]"; + continue; + case "w": + out += "[[:alnum:]_]"; + continue; + case "W": + out += "[^[:alnum:]_]"; + continue; + case "b": + out += "\\y"; + continue; + case "A": + case "B": + case "G": + case "K": + case "P": + case "p": + case "z": + return null; + default: + out += `\\${next}`; + continue; + } + } + if (ch === "(" && pattern.startsWith("(?:", i)) { + out += "("; + i += 2; + continue; + } + if (ch === "(" && /^[(]\?<[^>]+>/.test(pattern.slice(i))) { + const named = pattern.slice(i).match(/^\(\?<[^>]+>/); + if (!named) + return null; + out += "("; + i += named[0].length - 1; + continue; + } + if (ch === "(" && pattern[i + 1] === "?") + return null; + out += ch; + } + return out; +} +function buildContentPredicate(column, likeOp, patterns) { if (patterns.length === 0) return ""; if (patterns.length === 1) - return ` AND ${column} ${likeOp} '%${patterns[0]}%'`; - return ` AND (${patterns.map((pattern) => `${column} ${likeOp} '%${pattern}%'`).join(" OR ")})`; + return `${column} ${likeOp} '%${patterns[0]}%'`; + return `(${patterns.map((pattern) => `${column} ${likeOp} '%${pattern}%'`).join(" OR ")})`; +} +function buildRegexPredicate(column, pattern, ignoreCase) { + if (!pattern) + return ""; + if (!isSqlRegexPushdownSafe(pattern)) + return ""; + const sqlPattern = toSqlRegexPattern(pattern, ignoreCase); + if (!sqlPattern) + return ""; + return `${column} ~ '${sqlStr(sqlPattern)}'`; } function compileGrepRegex(params) { - let reStr = params.fixedString ? params.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") : params.pattern; + const normalizedPattern = params.fixedString ? params.pattern : normalizeGrepRegexPattern(params.pattern); + let reStr = params.fixedString ? escapeRegexLiteral(normalizedPattern) : normalizedPattern; if (params.wordMatch) - reStr = `\\b${reStr}\\b`; + reStr = `\\b(?:${reStr})\\b`; try { return new RegExp(reStr, params.ignoreCase ? "i" : ""); } catch { - return new RegExp(params.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), params.ignoreCase ? "i" : ""); + return new RegExp(escapeRegexLiteral(normalizedPattern), params.ignoreCase ? "i" : ""); } } function refineGrepMatches(rows, params, forceMultiFilePrefix) { @@ -773,12 +971,12 @@ function refineGrepMatches(rows, params, forceMultiFilePrefix) { } return output; } -async function grepBothTables(api, memoryTable, sessionsTable, params, targetPath) { +async function grepBothTables(api, memoryTable, sessionsTable, params, targetPath, forceMultiFilePrefix) { const rows = await searchDeeplakeTables(api, memoryTable, sessionsTable, buildGrepSearchOptions(params, targetPath)); const seen = /* @__PURE__ */ new Set(); const unique = rows.filter((r) => seen.has(r.path) ? false : (seen.add(r.path), true)); const normalized = unique.map((r) => ({ path: r.path, content: normalizeContent(r.path, r.content) })); - return refineGrepMatches(normalized, params); + return refineGrepMatches(normalized, params, forceMultiFilePrefix); } // dist/src/hooks/grep-direct.js @@ -864,7 +1062,7 @@ function parseBashGrep(cmd) { const tokens = tokenizeGrepStage(first); if (!tokens || tokens.length === 0) return null; - let ignoreCase = false, wordMatch = false, filesOnly = false, countOnly = false, lineNumber = false, invertMatch = false, fixedString = isFixed; + let recursive = false, ignoreCase = false, wordMatch = false, filesOnly = false, countOnly = false, lineNumber = false, invertMatch = false, fixedString = isFixed; const explicitPatterns = []; let ti = 1; while (ti < tokens.length) { @@ -956,6 +1154,8 @@ function parseBashGrep(cmd) { break; case "r": case "R": + recursive = true; + break; case "E": break; case "A": @@ -997,6 +1197,7 @@ function parseBashGrep(cmd) { return { pattern, targetPath: target, + recursive, ignoreCase, wordMatch, filesOnly, @@ -1019,25 +1220,240 @@ async function handleGrepDirect(api, table, sessionsTable, params) { invertMatch: params.invertMatch, fixedString: params.fixedString }; - const output = await grepBothTables(api, table, sessionsTable, matchParams, params.targetPath); + const output = await grepBothTables(api, table, sessionsTable, matchParams, params.targetPath, params.recursive ? true : void 0); return output.join("\n") || "(no matches)"; } +// dist/src/utils/summary-format.js +function escapeRegex(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} +function basename(path) { + const trimmed = path.replace(/\/+$/, ""); + const idx = trimmed.lastIndexOf("/"); + return idx === -1 ? trimmed : trimmed.slice(idx + 1); +} +function extractSection(text, heading) { + const re = new RegExp(`^## ${escapeRegex(heading)}\\s*\\n([\\s\\S]*?)(?=\\n## |$)`, "m"); + const match = text.match(re); + return match ? match[1].trim() : null; +} +function extractHeaderField(text, field) { + const re = new RegExp(`^- \\*\\*${escapeRegex(field)}\\*\\*:\\s*(.+)$`, "m"); + const match = text.match(re); + return match ? match[1].trim() : null; +} +function compactText(value) { + return value.replace(/\s+/g, " ").trim(); +} +function splitMetadataList(value) { + if (!value) + return []; + return [...new Set(value.split(/\s*(?:,|;|&|\band\b)\s*/i).map((part) => compactText(part)).filter((part) => part.length >= 2 && !/^unknown$/i.test(part)))]; +} +function extractBullets(section, limit = 3) { + if (!section) + return []; + return section.split("\n").map((line) => line.trim()).filter((line) => line.startsWith("- ")).map((line) => compactText(line.slice(2))).filter(Boolean).slice(0, limit); +} +function extractSummaryDate(text) { + return extractHeaderField(text, "Date") ?? extractHeaderField(text, "Started"); +} +function extractSummaryParticipants(text) { + return extractHeaderField(text, "Participants") ?? extractHeaderField(text, "Speakers"); +} +function extractSummaryTopics(text) { + return extractHeaderField(text, "Topics"); +} +function extractSummarySource(text) { + return extractHeaderField(text, "Source"); +} +function buildSummaryBlurb(text) { + const participants = extractSummaryParticipants(text); + const topics = extractSummaryTopics(text); + const factBullets = extractBullets(extractSection(text, "Searchable Facts"), 3); + const keyBullets = factBullets.length > 0 ? factBullets : extractBullets(extractSection(text, "Key Facts"), 3); + const whatHappened = compactText(extractSection(text, "What Happened") ?? ""); + const parts = []; + if (participants) + parts.push(participants); + if (topics) + parts.push(topics); + if (keyBullets.length > 0) + parts.push(keyBullets.join("; ")); + if (parts.length === 0 && whatHappened) + parts.push(whatHappened); + const blurb = parts.join(" | ").slice(0, 300).trim(); + return blurb || "completed"; +} +function truncate(value, max) { + return value.length > max ? `${value.slice(0, max - 1).trimEnd()}\u2026` : value; +} +function formatIndexTimestamp(value) { + if (!value) + return ""; + if (!/^\d{4}-\d{2}-\d{2}T/.test(value)) + return value; + const parsed = Date.parse(value); + if (!Number.isFinite(parsed)) + return value; + const ts = new Date(parsed); + const yyyy = ts.getUTCFullYear(); + const mm = String(ts.getUTCMonth() + 1).padStart(2, "0"); + const dd = String(ts.getUTCDate()).padStart(2, "0"); + const hh = String(ts.getUTCHours()).padStart(2, "0"); + const min = String(ts.getUTCMinutes()).padStart(2, "0"); + return `${yyyy}-${mm}-${dd} ${hh}:${min} UTC`; +} +function buildSummaryIndexEntry(row) { + const path = typeof row.path === "string" ? row.path : ""; + if (!path) + return null; + if (path.startsWith("/summaries/") && !/^\/summaries\/[^/]+\/[^/]+$/.test(path)) + return null; + const summary = typeof row.summary === "string" ? row.summary : ""; + const project = typeof row.project === "string" ? row.project.trim() : ""; + const description = typeof row.description === "string" ? compactText(row.description) : ""; + const creationDate = typeof row.creation_date === "string" ? row.creation_date : ""; + const lastUpdateDate = typeof row.last_update_date === "string" ? row.last_update_date : ""; + const label = basename(path) || path; + const date = summary ? extractSummaryDate(summary) ?? creationDate : creationDate; + const participantsText = summary ? extractSummaryParticipants(summary) ?? "" : ""; + const topicsText = summary ? extractSummaryTopics(summary) ?? "" : ""; + const source = summary ? extractSummarySource(summary) ?? "" : ""; + const structuredBlurb = summary ? buildSummaryBlurb(summary) : ""; + const blurb = structuredBlurb && structuredBlurb !== "completed" ? structuredBlurb : truncate(description, 220); + return { + path, + label, + project, + description, + date, + createdAt: creationDate, + updatedAt: lastUpdateDate, + sortDate: lastUpdateDate || creationDate || date, + participantsText, + participants: splitMetadataList(participantsText), + topicsText, + topics: splitMetadataList(topicsText), + source, + blurb + }; +} +function formatSummaryIndexEntry(entry) { + const parts = [`- [summary: ${entry.label}](${entry.path})`]; + if (entry.source) + parts.push(`[session](${entry.source})`); + if (entry.date) + parts.push(truncate(entry.date, 40)); + const visibleTime = entry.updatedAt || entry.createdAt; + if (visibleTime) + parts.push(`updated: ${truncate(formatIndexTimestamp(visibleTime), 24)}`); + if (entry.participantsText) + parts.push(truncate(entry.participantsText, 80)); + if (entry.topicsText) + parts.push(`topics: ${truncate(entry.topicsText, 90)}`); + if (entry.project) + parts.push(`[${truncate(entry.project, 40)}]`); + if (entry.blurb && entry.blurb !== "completed") + parts.push(truncate(entry.blurb, 220)); + return parts.join(" \u2014 "); +} +function buildSummaryIndexLine(row) { + const entry = "label" in row && typeof row.label === "string" ? row : buildSummaryIndexEntry(row); + return entry ? formatSummaryIndexEntry(entry) : null; +} + // dist/src/hooks/virtual-table-query.js function normalizeSessionPart(path, content) { return normalizeContent(path, content); } function buildVirtualIndexContent(rows) { - const lines = ["# Memory Index", "", `${rows.length} sessions:`, ""]; - for (const row of rows) { - const path = row["path"]; - const project = row["project"] || ""; - const description = (row["description"] || "").slice(0, 120); - const date = (row["creation_date"] || "").slice(0, 10); - lines.push(`- [${path}](${path}) ${date} ${project ? `[${project}]` : ""} ${description}`); + const entries = rows.map((row) => buildSummaryIndexEntry(row)).filter((entry) => entry !== null).sort((a, b) => (b.sortDate || "").localeCompare(a.sortDate || "") || a.path.localeCompare(b.path)); + const lines = [ + "# Memory Index", + "", + "Persistent wiki directory. Start here, open the linked summary first, then open the paired raw session if you need exact wording or temporal grounding.", + "", + "## How To Use", + "", + "- Use the People section when the question names a person.", + "- In the catalog, each row links to both the summary page and its source session.", + "- Once you have a likely match, open that exact summary or session instead of broadening into wide grep scans.", + "" + ]; + const peopleLines = buildPeopleDirectory(entries); + if (peopleLines.length > 0) { + lines.push("## People"); + lines.push(""); + lines.push(...peopleLines); + lines.push(""); + } + const projectLines = buildProjectDirectory(entries); + if (projectLines.length > 0) { + lines.push("## Projects"); + lines.push(""); + lines.push(...projectLines); + lines.push(""); + } + lines.push("## Summary To Session Catalog"); + lines.push(""); + for (const entry of entries) { + const line = buildSummaryIndexLine(entry); + if (line) + lines.push(line); } return lines.join("\n"); } +function formatEntryLink(entry) { + const session = entry.source ? ` -> [session](${entry.source})` : ""; + return `[${entry.label}](${entry.path})${session}`; +} +function topList(counts, limit) { + return [...counts.entries()].sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0])).slice(0, limit).map(([value]) => value); +} +function buildPeopleDirectory(entries) { + const people = /* @__PURE__ */ new Map(); + for (const entry of entries) { + for (const person of entry.participants) { + const current = people.get(person) ?? { count: 0, topics: /* @__PURE__ */ new Map(), recent: [] }; + current.count += 1; + for (const topic of entry.topics) { + current.topics.set(topic, (current.topics.get(topic) ?? 0) + 1); + } + current.recent.push(entry); + people.set(person, current); + } + } + return [...people.entries()].sort((a, b) => b[1].count - a[1].count || a[0].localeCompare(b[0])).map(([person, info]) => { + const topics = topList(info.topics, 3); + const recent = info.recent.slice(0, 2).map((entry) => formatEntryLink(entry)).join(", "); + const parts = [`- ${person} \u2014 ${info.count} summaries`]; + if (topics.length > 0) + parts.push(`topics: ${topics.join("; ")}`); + if (recent) + parts.push(`recent: ${recent}`); + return parts.join(" \u2014 "); + }); +} +function buildProjectDirectory(entries) { + const projects = /* @__PURE__ */ new Map(); + for (const entry of entries) { + if (!entry.project) + continue; + const current = projects.get(entry.project) ?? { count: 0, recent: [] }; + current.count += 1; + current.recent.push(entry); + projects.set(entry.project, current); + } + return [...projects.entries()].sort((a, b) => b[1].count - a[1].count || a[0].localeCompare(b[0])).map(([project, info]) => { + const recent = info.recent.slice(0, 2).map((entry) => formatEntryLink(entry)).join(", "); + const parts = [`- ${project} \u2014 ${info.count} summaries`]; + if (recent) + parts.push(`recent: ${recent}`); + return parts.join(" \u2014 "); + }); +} function buildUnionQuery(memoryQuery, sessionsQuery) { return `SELECT path, content, size_bytes, creation_date, source_order FROM ((${memoryQuery}) UNION ALL (${sessionsQuery})) AS combined ORDER BY path, source_order, creation_date`; } @@ -1052,6 +1468,9 @@ function buildDirFilter(dirs) { return ` WHERE ${clauses.join(" OR ")}`; } async function queryUnionRows(api, memoryQuery, sessionsQuery) { + if (isSessionsOnlyMode()) { + return api.query(`SELECT path, content, size_bytes, creation_date, source_order FROM (${sessionsQuery}) AS combined ORDER BY path, source_order, creation_date`); + } const unionQuery = buildUnionQuery(memoryQuery, sessionsQuery); try { return await api.query(unionQuery); @@ -1068,7 +1487,13 @@ async function readVirtualPathContents(api, memoryTable, sessionsTable, virtualP const result = new Map(uniquePaths.map((path) => [path, null])); if (uniquePaths.length === 0) return result; - const inList = buildInList(uniquePaths); + if (isIndexDisabled() && uniquePaths.includes("/index.md")) { + result.set("/index.md", null); + } + const queryPaths = isIndexDisabled() ? uniquePaths.filter((path) => path !== "/index.md") : uniquePaths; + if (queryPaths.length === 0) + return result; + const inList = buildInList(queryPaths); const rows = await queryUnionRows(api, `SELECT path, summary::text AS content, NULL::bigint AS size_bytes, '' AS creation_date, 0 AS source_order FROM "${memoryTable}" WHERE path IN (${inList})`, `SELECT path, message::text AS content, NULL::bigint AS size_bytes, COALESCE(creation_date::text, '') AS creation_date, 1 AS source_order FROM "${sessionsTable}" WHERE path IN (${inList})`); const memoryHits = /* @__PURE__ */ new Map(); const sessionHits = /* @__PURE__ */ new Map(); @@ -1086,7 +1511,7 @@ async function readVirtualPathContents(api, memoryTable, sessionsTable, virtualP sessionHits.set(path, current); } } - for (const path of uniquePaths) { + for (const path of queryPaths) { if (memoryHits.has(path)) { result.set(path, memoryHits.get(path) ?? null); continue; @@ -1096,8 +1521,8 @@ async function readVirtualPathContents(api, memoryTable, sessionsTable, virtualP result.set(path, sessionParts.join("\n")); } } - if (result.get("/index.md") === null && uniquePaths.includes("/index.md")) { - const rows2 = await api.query(`SELECT path, project, description, creation_date FROM "${memoryTable}" WHERE path LIKE '/summaries/%' ORDER BY creation_date DESC`).catch(() => []); + if (!isSessionsOnlyMode() && !isIndexDisabled() && result.get("/index.md") === null && uniquePaths.includes("/index.md")) { + const rows2 = await api.query(`SELECT path, project, description, summary, creation_date, last_update_date FROM "${memoryTable}" WHERE path LIKE '/summaries/%' ORDER BY last_update_date DESC, creation_date DESC`).catch(() => []); result.set("/index.md", buildVirtualIndexContent(rows2)); } return result; @@ -1159,11 +1584,19 @@ function splitTopLevel(input, operators) { const parts = []; let current = ""; let quote = null; + let escaped = false; for (let i = 0; i < input.length; i++) { const ch = input[i]; + if (escaped) { + current += ch; + escaped = false; + continue; + } if (quote) { if (ch === quote) quote = null; + else if (ch === "\\" && quote === '"') + escaped = true; current += ch; continue; } @@ -1172,6 +1605,11 @@ function splitTopLevel(input, operators) { current += ch; continue; } + if (ch === "\\" && i + 1 < input.length) { + current += ch; + escaped = true; + continue; + } const matched = operators.find((op) => input.startsWith(op, i)); if (matched) { const trimmed2 = current.trim(); @@ -1183,7 +1621,7 @@ function splitTopLevel(input, operators) { } current += ch; } - if (quote) + if (quote || escaped) return null; const trimmed = current.trim(); if (trimmed) @@ -1247,8 +1685,8 @@ function expandBraceToken(token) { return variants.flatMap((variant) => expandBraceToken(`${prefix}${variant}${suffix}`)); } function stripAllowedModifiers(segment) { - const ignoreMissing = /\s2>\/dev\/null\s*$/.test(segment); - const clean = segment.replace(/\s2>\/dev\/null\s*$/g, "").replace(/\s2>&1\s*/g, " ").trim(); + const ignoreMissing = /\s2>\/dev\/null(?=\s*(?:\||$))/.test(segment); + const clean = segment.replace(/\s2>\/dev\/null(?=\s*(?:\||$))/g, "").replace(/\s2>&1(?=\s*(?:\||$))/g, "").trim(); return { clean, ignoreMissing }; } function hasUnsupportedRedirection(segment) { @@ -1317,7 +1755,7 @@ function isValidPipelineHeadTailStage(stage) { return tokens[1] === "-n" && /^-?\d+$/.test(tokens[2]); return false; } -function parseFindNamePatterns(tokens) { +function parseFindSpec(tokens) { const patterns = []; for (let i = 2; i < tokens.length; i++) { const token = tokens[i]; @@ -1335,9 +1773,22 @@ function parseFindNamePatterns(tokens) { i += 1; continue; } + if (token === "-exec") { + const execTokens = tokens.slice(i + 1); + if (patterns.length === 0 || execTokens.length < 4) + return null; + const terminator = execTokens.at(-1); + const target = execTokens.at(-2); + if (terminator !== "\\;" && terminator !== ";" || target !== "{}") + return null; + return { + patterns, + execGrepCmd: execTokens.slice(0, -1).join(" ") + }; + } return null; } - return patterns.length > 0 ? patterns : null; + return patterns.length > 0 ? { patterns, execGrepCmd: null } : null; } function parseCompiledSegment(segment) { const { clean, ignoreMissing } = stripAllowedModifiers(segment); @@ -1424,15 +1875,32 @@ function parseCompiledSegment(segment) { const dir = tokens[1]; if (!dir) return null; - const patterns = parseFindNamePatterns(tokens); - if (!patterns) + const spec = parseFindSpec(tokens); + if (!spec) return null; + const { patterns, execGrepCmd } = spec; const countOnly = pipeline.length === 2 && /^wc\s+-l\s*$/.test(pipeline[1].trim()); if (countOnly) { if (patterns.length !== 1) return null; return { kind: "find", dir, pattern: patterns[0], countOnly }; } + if (execGrepCmd) { + const grepParams2 = parseBashGrep(execGrepCmd); + if (!grepParams2) + return null; + let lineLimit = 0; + if (pipeline.length === 2) { + const headStage = pipeline[1].trim(); + if (!isValidPipelineHeadTailStage(headStage)) + return null; + const headTail = parseHeadTailStage(headStage); + if (!headTail || headTail.fromEnd) + return null; + lineLimit = headTail.lineLimit; + } + return { kind: "find_grep", dir, patterns, params: grepParams2, lineLimit }; + } if (pipeline.length >= 2) { const xargsTokens = tokenizeShellWords(pipeline[1].trim()); if (!xargsTokens || xargsTokens[0] !== "xargs") @@ -1618,20 +2086,35 @@ async function executeCompiledBashCommand(api, memoryTable, sessionsTable, cmd, } // dist/src/hooks/query-cache.js -import { mkdirSync as mkdirSync2, readFileSync as readFileSync3, rmSync, writeFileSync as writeFileSync2 } from "node:fs"; +import { mkdirSync as mkdirSync2, readFileSync as readFileSync3, rmSync, statSync, writeFileSync as writeFileSync2 } from "node:fs"; import { join as join4 } from "node:path"; import { homedir as homedir3 } from "node:os"; var log3 = (msg) => log("query-cache", msg); var DEFAULT_CACHE_ROOT = join4(homedir3(), ".deeplake", "query-cache"); var INDEX_CACHE_FILE = "index.md"; +var INDEX_CACHE_TTL_MS = 15 * 60 * 1e3; function getSessionQueryCacheDir(sessionId, deps = {}) { const { cacheRoot = DEFAULT_CACHE_ROOT } = deps; return join4(cacheRoot, sessionId); } +function clearSessionQueryCache(sessionId, deps = {}) { + const { logFn = log3 } = deps; + try { + rmSync(getSessionQueryCacheDir(sessionId, deps), { recursive: true, force: true }); + } catch (e) { + logFn(`clear failed for session=${sessionId}: ${e.message}`); + } +} function readCachedIndexContent(sessionId, deps = {}) { const { logFn = log3 } = deps; try { - return readFileSync3(join4(getSessionQueryCacheDir(sessionId, deps), INDEX_CACHE_FILE), "utf-8"); + const cachePath = join4(getSessionQueryCacheDir(sessionId, deps), INDEX_CACHE_FILE); + const stats = statSync(cachePath); + if (Date.now() - stats.mtimeMs > INDEX_CACHE_TTL_MS) { + clearSessionQueryCache(sessionId, deps); + return null; + } + return readFileSync3(cachePath, "utf-8"); } catch (e) { if (e?.code === "ENOENT") return null; @@ -1763,11 +2246,66 @@ var SAFE_BUILTINS = /* @__PURE__ */ new Set([ "case", "esac" ]); +function splitSafeStages(cmd) { + const stages = []; + let current = ""; + let quote = null; + let escaped = false; + for (let i = 0; i < cmd.length; i++) { + const ch = cmd[i]; + if (escaped) { + current += ch; + escaped = false; + continue; + } + if (quote) { + current += ch; + if (ch === quote) { + quote = null; + } else if (ch === "\\" && quote === '"') { + escaped = true; + } + continue; + } + if (ch === "\\" && i + 1 < cmd.length) { + current += ch; + escaped = true; + continue; + } + if (ch === "'" || ch === '"') { + quote = ch; + current += ch; + continue; + } + const twoChar = cmd.slice(i, i + 2); + if (twoChar === "&&" || twoChar === "||") { + if (current.trim()) + stages.push(current.trim()); + current = ""; + i += 1; + continue; + } + if (ch === "|" || ch === ";" || ch === "\n") { + if (current.trim()) + stages.push(current.trim()); + current = ""; + continue; + } + current += ch; + } + if (quote || escaped) + return null; + if (current.trim()) + stages.push(current.trim()); + return stages; +} function isSafe(cmd) { if (/\$\(|`|<\(/.test(cmd)) return false; const stripped = cmd.replace(/'[^']*'/g, "''").replace(/"[^"]*"/g, '""'); - const stages = stripped.split(/\||;|&&|\|\||\n/); + const stages = splitSafeStages(stripped); + if (!stages) + return false; for (const stage of stages) { const firstToken = stage.trim().split(/\s+/)[0] ?? ""; if (firstToken && !SAFE_BUILTINS.has(firstToken)) @@ -1802,17 +2340,6 @@ function runVirtualShell(cmd, shellBundle = SHELL_BUNDLE, logFn = log4) { return ""; } } -function buildIndexContent(rows) { - const lines = ["# Memory Index", "", `${rows.length} sessions:`, ""]; - for (const row of rows) { - const path = row["path"]; - const project = row["project"] || ""; - const description = (row["description"] || "").slice(0, 120); - const date = (row["creation_date"] || "").slice(0, 10); - lines.push(`- [${path}](${path}) ${date} ${project ? `[${project}]` : ""} ${description}`); - } - return lines.join("\n"); -} async function processCodexPreToolUse(input, deps = {}) { const { config = loadConfig(), createApi = (table, activeConfig) => new DeeplakeApi(activeConfig.token, activeConfig.apiUrl, activeConfig.orgId, activeConfig.workspaceId, table), executeCompiledBashCommandFn = executeCompiledBashCommand, readVirtualPathContentsFn = readVirtualPathContents, readVirtualPathContentFn = readVirtualPathContent, listVirtualPathRowsFn = listVirtualPathRows, findVirtualPathsFn = findVirtualPaths, handleGrepDirectFn = handleGrepDirect, readCachedIndexContentFn = readCachedIndexContent, writeCachedIndexContentFn = writeCachedIndexContent, runVirtualShellFn = runVirtualShell, shellBundle = SHELL_BUNDLE, logFn = log4 } = deps; const cmd = input.tool_input?.command ?? ""; @@ -1836,7 +2363,7 @@ async function processCodexPreToolUse(input, deps = {}) { const readVirtualPathContentsWithCache = async (cachePaths) => { const uniquePaths = [...new Set(cachePaths)]; const result2 = new Map(uniquePaths.map((path) => [path, null])); - const cachedIndex = uniquePaths.includes("/index.md") ? readCachedIndexContentFn(input.session_id) : null; + const cachedIndex = !isIndexDisabled() && uniquePaths.includes("/index.md") ? readCachedIndexContentFn(input.session_id) : null; const remainingPaths = cachedIndex === null ? uniquePaths : uniquePaths.filter((path) => path !== "/index.md"); if (cachedIndex !== null) { result2.set("/index.md", cachedIndex); @@ -1907,13 +2434,13 @@ async function processCodexPreToolUse(input, deps = {}) { } if (virtualPath && !virtualPath.endsWith("/")) { logFn(`direct read: ${virtualPath}`); - let content = virtualPath === "/index.md" ? readCachedIndexContentFn(input.session_id) : null; + let content = !isIndexDisabled() && virtualPath === "/index.md" ? readCachedIndexContentFn(input.session_id) : null; if (content === null) { content = await readVirtualPathContentFn(api, table, sessionsTable, virtualPath); } - if (content === null && virtualPath === "/index.md") { - const idxRows = await api.query(`SELECT path, project, description, creation_date FROM "${table}" WHERE path LIKE '/summaries/%' ORDER BY creation_date DESC`); - content = buildIndexContent(idxRows); + if (content === null && virtualPath === "/index.md" && !isSessionsOnlyMode() && !isIndexDisabled()) { + const idxRows = await api.query(`SELECT path, project, description, summary, creation_date, last_update_date FROM "${table}" WHERE path LIKE '/summaries/%' ORDER BY last_update_date DESC, creation_date DESC`); + content = buildVirtualIndexContent(idxRows); } if (content !== null) { if (virtualPath === "/index.md") { diff --git a/codex/bundle/shell/deeplake-shell.js b/codex/bundle/shell/deeplake-shell.js index 5872059..84cf810 100755 --- a/codex/bundle/shell/deeplake-shell.js +++ b/codex/bundle/shell/deeplake-shell.js @@ -66987,6 +66987,29 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + /** Create the standard BM25 summary index for a memory table. */ + async createSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const indexName = this.buildLookupIndexName(table, "summary_bm25"); + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" USING deeplake_index ("summary")`); + } + /** Ensure the standard BM25 summary index exists, using a local freshness marker to avoid repeated CREATEs. */ + async ensureSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const suffix = "summary_bm25"; + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + try { + await this.createSummaryBm25Index(table); + this.markLookupIndexReady(table, suffix); + } catch (e6) { + if (isDuplicateIndexError(e6)) { + this.markLookupIndexReady(table, suffix); + return; + } + throw e6; + } + } buildLookupIndexName(table, suffix) { return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); } @@ -67099,10 +67122,31 @@ var DeeplakeApi = class { }; // dist/src/shell/deeplake-fs.js -import { basename as basename4, posix } from "node:path"; +import { basename as basename5, posix } from "node:path"; import { randomUUID as randomUUID2 } from "node:crypto"; +// dist/src/utils/retrieval-mode.js +function isSessionsOnlyMode() { + const raw = process.env["HIVEMIND_SESSIONS_ONLY"] ?? process.env["DEEPLAKE_SESSIONS_ONLY"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} +function isIndexDisabled() { + const raw = process.env["HIVEMIND_DISABLE_INDEX"] ?? process.env["DEEPLAKE_DISABLE_INDEX"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} +function isSummaryBm25Disabled() { + const raw = process.env["HIVEMIND_DISABLE_SUMMARY_BM25"] ?? process.env["DEEPLAKE_DISABLE_SUMMARY_BM25"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} + // dist/src/shell/grep-core.js +var DEFAULT_GREP_CANDIDATE_LIMIT = Number(process.env["HIVEMIND_GREP_LIMIT"] ?? process.env["DEEPLAKE_GREP_LIMIT"] ?? 500); +function escapeRegexLiteral(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} +function normalizeGrepRegexPattern(pattern) { + return pattern.replace(/\\([|(){}+?])/g, "$1").replace(/\\/g, "\\b"); +} var TOOL_INPUT_FIELDS = [ "command", "file_path", @@ -67265,24 +67309,9 @@ function normalizeContent(path2, raw) { } catch { return raw; } - if (Array.isArray(obj.turns)) { - const header = []; - if (obj.date_time) - header.push(`date: ${obj.date_time}`); - if (obj.speakers) { - const s10 = obj.speakers; - const names = [s10.speaker_a, s10.speaker_b].filter(Boolean).join(", "); - if (names) - header.push(`speakers: ${names}`); - } - const lines = obj.turns.map((t6) => { - const sp = String(t6?.speaker ?? t6?.name ?? "?").trim(); - const tx = String(t6?.text ?? t6?.content ?? "").replace(/\s+/g, " ").trim(); - const tag = t6?.dia_id ? `[${t6.dia_id}] ` : ""; - return `${tag}${sp}: ${tx}`; - }); - const out2 = [...header, ...lines].join("\n"); - return out2.trim() ? out2 : raw; + if (Array.isArray(obj.turns) || Array.isArray(obj.dialogue)) { + return `${JSON.stringify(obj, null, 2)} +`; } const stripRecalled = (t6) => { const i11 = t6.indexOf(""); @@ -67326,14 +67355,34 @@ function buildPathCondition(targetPath) { return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%')`; } async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { - const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns } = opts; - const limit = opts.limit ?? 100; + const { pathFilter, contentScanOnly, likeOp, escapedPattern, regexPattern, prefilterPattern, prefilterPatterns, bm25QueryText } = opts; + const limit = opts.limit ?? DEFAULT_GREP_CANDIDATE_LIMIT; const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : [escapedPattern]; - const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns); - const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns); - const memQuery = `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; - const sessQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; - const rows = await api.query(`SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`); + const ignoreCase = likeOp === "ILIKE"; + const likeMemFilter = buildContentFilter("summary::text", likeOp, filterPatterns); + const likeSessFilter = buildContentFilter("message::text", likeOp, filterPatterns); + const regexMemFilter = regexPattern ? buildRegexFilter("summary::text", regexPattern, ignoreCase) : ""; + const regexSessFilter = regexPattern ? buildRegexFilter("message::text", regexPattern, ignoreCase) : ""; + const primarySessFilter = `${likeSessFilter}${regexSessFilter}`; + const fallbackSessFilter = likeSessFilter; + const hasSqlRegexFilter = Boolean(regexMemFilter || regexSessFilter); + const sessionsOnly = isSessionsOnlyMode(); + const useSummaryBm25 = !sessionsOnly && !isSummaryBm25Disabled() && Boolean(bm25QueryText); + const shouldUseFallbackCapablePrimary = useSummaryBm25 || hasSqlRegexFilter; + const ensureSummaryBm25Index = api.ensureSummaryBm25Index; + if (useSummaryBm25 && typeof ensureSummaryBm25Index === "function") { + await ensureSummaryBm25Index.call(api, memoryTable).catch(() => { + }); + } + const buildCombinedQuery = (memFilter, sessFilter, useBm25Summary = false) => { + const memQuery = useBm25Summary ? buildSummaryBm25Query(memoryTable, pathFilter, bm25QueryText ?? "", limit) : `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; + const sessQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; + return sessionsOnly ? `SELECT path, content, source_order, creation_date FROM (${sessQuery}) AS combined ORDER BY path, source_order, creation_date` : `SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`; + }; + const primaryMemFilter = useSummaryBm25 ? "" : `${likeMemFilter}${regexMemFilter}`; + const primaryQuery = buildCombinedQuery(primaryMemFilter, primarySessFilter, useSummaryBm25); + const fallbackQuery = buildCombinedQuery(likeMemFilter, fallbackSessFilter, false); + const rows = shouldUseFallbackCapablePrimary ? await api.query(primaryQuery).catch(() => api.query(fallbackQuery)) : await api.query(primaryQuery); return rows.map((row) => ({ path: String(row["path"]), content: String(row["content"] ?? "") @@ -67364,6 +67413,10 @@ function extractRegexLiteralPrefilter(pattern) { const next = pattern[i11 + 1]; if (!next) return null; + if (/[bByYmM<>]/.test(next)) { + i11++; + continue; + } if (/[dDsSwWbBAZzGkKpP]/.test(next)) return null; current += next; @@ -67390,13 +67443,14 @@ function extractRegexLiteralPrefilter(pattern) { return literal.length >= 2 ? literal : null; } function extractRegexAlternationPrefilters(pattern) { - if (!pattern.includes("|")) + const unwrapped = unwrapWholeRegexGroup(pattern); + if (!unwrapped.includes("|")) return null; const parts = []; let current = ""; let escaped = false; - for (let i11 = 0; i11 < pattern.length; i11++) { - const ch = pattern[i11]; + for (let i11 = 0; i11 < unwrapped.length; i11++) { + const ch = unwrapped[i11]; if (escaped) { current += `\\${ch}`; escaped = false; @@ -67424,33 +67478,177 @@ function extractRegexAlternationPrefilters(pattern) { return literals.length > 0 ? literals : null; } function buildGrepSearchOptions(params, targetPath) { - const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(params.pattern); - const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(params.pattern) : null; - const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(params.pattern) : null; + const normalizedPattern = params.fixedString ? params.pattern : normalizeGrepRegexPattern(params.pattern); + const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(normalizedPattern); + const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(normalizedPattern) : null; + const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(normalizedPattern) : null; + const bm25QueryText = buildSummaryBm25QueryText(normalizedPattern, params.fixedString, literalPrefilter, alternationPrefilters); + const regexBase = params.fixedString ? escapeRegexLiteral(normalizedPattern) : normalizedPattern; + const sqlRegexPattern = params.wordMatch ? `\\b(?:${regexBase})\\b` : hasRegexMeta ? regexBase : void 0; return { pathFilter: buildPathFilter(targetPath), contentScanOnly: hasRegexMeta, likeOp: params.ignoreCase ? "ILIKE" : "LIKE", escapedPattern: sqlLike(params.pattern), + regexPattern: sqlRegexPattern, prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : void 0, - prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)) + prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)), + bm25QueryText: bm25QueryText ?? void 0, + limit: DEFAULT_GREP_CANDIDATE_LIMIT }; } +function buildSummaryBm25QueryText(pattern, fixedString, literalPrefilter, alternationPrefilters) { + const rawTokens = alternationPrefilters && alternationPrefilters.length > 0 ? alternationPrefilters : literalPrefilter ? [literalPrefilter] : [pattern]; + const cleaned = [...new Set(rawTokens.flatMap((token) => token.replace(/\\b/g, " ").replace(/[.*+?^${}()[\]{}|\\]/g, " ").split(/\s+/)).map((token) => token.trim()).filter((token) => token.length >= 2))]; + if (cleaned.length === 0) { + return fixedString && pattern.trim().length >= 2 ? pattern.trim() : null; + } + return cleaned.join(" "); +} function buildContentFilter(column, likeOp, patterns) { + const predicate = buildContentPredicate(column, likeOp, patterns); + return predicate ? ` AND ${predicate}` : ""; +} +function buildRegexFilter(column, pattern, ignoreCase) { + const predicate = buildRegexPredicate(column, pattern, ignoreCase); + return predicate ? ` AND ${predicate}` : ""; +} +function buildSummaryBm25Query(memoryTable, pathFilter, queryText, limit) { + return `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter} ORDER BY (summary <#> '${sqlStr(queryText)}') DESC LIMIT ${limit}`; +} +function toSqlRegexPattern(pattern, ignoreCase) { + if (!pattern) + return null; + if (ignoreCase) + return null; + try { + new RegExp(pattern); + return translateRegexPatternToSql(pattern); + } catch { + return pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + } +} +function isSqlRegexPushdownSafe(pattern) { + return !/[\\[\]{}^$]/.test(pattern) && !/\(\?/.test(pattern); +} +function unwrapWholeRegexGroup(pattern) { + if (!pattern.startsWith("(") || !pattern.endsWith(")")) + return pattern; + let depth = 0; + let escaped = false; + for (let i11 = 0; i11 < pattern.length; i11++) { + const ch = pattern[i11]; + if (escaped) { + escaped = false; + continue; + } + if (ch === "\\") { + escaped = true; + continue; + } + if (ch === "(") + depth++; + if (ch === ")") { + depth--; + if (depth === 0 && i11 !== pattern.length - 1) + return pattern; + } + } + if (depth !== 0) + return pattern; + if (pattern.startsWith("(?:")) + return pattern.slice(3, -1); + return pattern.slice(1, -1); +} +function translateRegexPatternToSql(pattern) { + let out = ""; + for (let i11 = 0; i11 < pattern.length; i11++) { + const ch = pattern[i11]; + if (ch === "\\") { + const next = pattern[i11 + 1]; + if (!next) + return pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + i11++; + switch (next) { + case "d": + out += "[[:digit:]]"; + continue; + case "D": + out += "[^[:digit:]]"; + continue; + case "s": + out += "[[:space:]]"; + continue; + case "S": + out += "[^[:space:]]"; + continue; + case "w": + out += "[[:alnum:]_]"; + continue; + case "W": + out += "[^[:alnum:]_]"; + continue; + case "b": + out += "\\y"; + continue; + case "A": + case "B": + case "G": + case "K": + case "P": + case "p": + case "z": + return null; + default: + out += `\\${next}`; + continue; + } + } + if (ch === "(" && pattern.startsWith("(?:", i11)) { + out += "("; + i11 += 2; + continue; + } + if (ch === "(" && /^[(]\?<[^>]+>/.test(pattern.slice(i11))) { + const named = pattern.slice(i11).match(/^\(\?<[^>]+>/); + if (!named) + return null; + out += "("; + i11 += named[0].length - 1; + continue; + } + if (ch === "(" && pattern[i11 + 1] === "?") + return null; + out += ch; + } + return out; +} +function buildContentPredicate(column, likeOp, patterns) { if (patterns.length === 0) return ""; if (patterns.length === 1) - return ` AND ${column} ${likeOp} '%${patterns[0]}%'`; - return ` AND (${patterns.map((pattern) => `${column} ${likeOp} '%${pattern}%'`).join(" OR ")})`; + return `${column} ${likeOp} '%${patterns[0]}%'`; + return `(${patterns.map((pattern) => `${column} ${likeOp} '%${pattern}%'`).join(" OR ")})`; +} +function buildRegexPredicate(column, pattern, ignoreCase) { + if (!pattern) + return ""; + if (!isSqlRegexPushdownSafe(pattern)) + return ""; + const sqlPattern = toSqlRegexPattern(pattern, ignoreCase); + if (!sqlPattern) + return ""; + return `${column} ~ '${sqlStr(sqlPattern)}'`; } function compileGrepRegex(params) { - let reStr = params.fixedString ? params.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") : params.pattern; + const normalizedPattern = params.fixedString ? params.pattern : normalizeGrepRegexPattern(params.pattern); + let reStr = params.fixedString ? escapeRegexLiteral(normalizedPattern) : normalizedPattern; if (params.wordMatch) - reStr = `\\b${reStr}\\b`; + reStr = `\\b(?:${reStr})\\b`; try { return new RegExp(reStr, params.ignoreCase ? "i" : ""); } catch { - return new RegExp(params.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), params.ignoreCase ? "i" : ""); + return new RegExp(escapeRegexLiteral(normalizedPattern), params.ignoreCase ? "i" : ""); } } function refineGrepMatches(rows, params, forceMultiFilePrefix) { @@ -67485,6 +67683,234 @@ function refineGrepMatches(rows, params, forceMultiFilePrefix) { return output; } +// dist/src/utils/summary-format.js +function escapeRegex(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} +function basename4(path2) { + const trimmed = path2.replace(/\/+$/, ""); + const idx = trimmed.lastIndexOf("/"); + return idx === -1 ? trimmed : trimmed.slice(idx + 1); +} +function extractSection(text, heading) { + const re9 = new RegExp(`^## ${escapeRegex(heading)}\\s*\\n([\\s\\S]*?)(?=\\n## |$)`, "m"); + const match2 = text.match(re9); + return match2 ? match2[1].trim() : null; +} +function extractHeaderField(text, field) { + const re9 = new RegExp(`^- \\*\\*${escapeRegex(field)}\\*\\*:\\s*(.+)$`, "m"); + const match2 = text.match(re9); + return match2 ? match2[1].trim() : null; +} +function compactText(value) { + return value.replace(/\s+/g, " ").trim(); +} +function splitMetadataList(value) { + if (!value) + return []; + return [...new Set(value.split(/\s*(?:,|;|&|\band\b)\s*/i).map((part) => compactText(part)).filter((part) => part.length >= 2 && !/^unknown$/i.test(part)))]; +} +function extractBullets(section, limit = 3) { + if (!section) + return []; + return section.split("\n").map((line) => line.trim()).filter((line) => line.startsWith("- ")).map((line) => compactText(line.slice(2))).filter(Boolean).slice(0, limit); +} +function extractSummaryDate(text) { + return extractHeaderField(text, "Date") ?? extractHeaderField(text, "Started"); +} +function extractSummaryParticipants(text) { + return extractHeaderField(text, "Participants") ?? extractHeaderField(text, "Speakers"); +} +function extractSummaryTopics(text) { + return extractHeaderField(text, "Topics"); +} +function extractSummarySource(text) { + return extractHeaderField(text, "Source"); +} +function buildSummaryBlurb(text) { + const participants = extractSummaryParticipants(text); + const topics = extractSummaryTopics(text); + const factBullets = extractBullets(extractSection(text, "Searchable Facts"), 3); + const keyBullets = factBullets.length > 0 ? factBullets : extractBullets(extractSection(text, "Key Facts"), 3); + const whatHappened = compactText(extractSection(text, "What Happened") ?? ""); + const parts = []; + if (participants) + parts.push(participants); + if (topics) + parts.push(topics); + if (keyBullets.length > 0) + parts.push(keyBullets.join("; ")); + if (parts.length === 0 && whatHappened) + parts.push(whatHappened); + const blurb = parts.join(" | ").slice(0, 300).trim(); + return blurb || "completed"; +} +function truncate(value, max) { + return value.length > max ? `${value.slice(0, max - 1).trimEnd()}\u2026` : value; +} +function formatIndexTimestamp(value) { + if (!value) + return ""; + if (!/^\d{4}-\d{2}-\d{2}T/.test(value)) + return value; + const parsed = Date.parse(value); + if (!Number.isFinite(parsed)) + return value; + const ts3 = new Date(parsed); + const yyyy = ts3.getUTCFullYear(); + const mm = String(ts3.getUTCMonth() + 1).padStart(2, "0"); + const dd = String(ts3.getUTCDate()).padStart(2, "0"); + const hh = String(ts3.getUTCHours()).padStart(2, "0"); + const min = String(ts3.getUTCMinutes()).padStart(2, "0"); + return `${yyyy}-${mm}-${dd} ${hh}:${min} UTC`; +} +function buildSummaryIndexEntry(row) { + const path2 = typeof row.path === "string" ? row.path : ""; + if (!path2) + return null; + if (path2.startsWith("/summaries/") && !/^\/summaries\/[^/]+\/[^/]+$/.test(path2)) + return null; + const summary = typeof row.summary === "string" ? row.summary : ""; + const project = typeof row.project === "string" ? row.project.trim() : ""; + const description = typeof row.description === "string" ? compactText(row.description) : ""; + const creationDate = typeof row.creation_date === "string" ? row.creation_date : ""; + const lastUpdateDate = typeof row.last_update_date === "string" ? row.last_update_date : ""; + const label = basename4(path2) || path2; + const date = summary ? extractSummaryDate(summary) ?? creationDate : creationDate; + const participantsText = summary ? extractSummaryParticipants(summary) ?? "" : ""; + const topicsText = summary ? extractSummaryTopics(summary) ?? "" : ""; + const source = summary ? extractSummarySource(summary) ?? "" : ""; + const structuredBlurb = summary ? buildSummaryBlurb(summary) : ""; + const blurb = structuredBlurb && structuredBlurb !== "completed" ? structuredBlurb : truncate(description, 220); + return { + path: path2, + label, + project, + description, + date, + createdAt: creationDate, + updatedAt: lastUpdateDate, + sortDate: lastUpdateDate || creationDate || date, + participantsText, + participants: splitMetadataList(participantsText), + topicsText, + topics: splitMetadataList(topicsText), + source, + blurb + }; +} +function formatSummaryIndexEntry(entry) { + const parts = [`- [summary: ${entry.label}](${entry.path})`]; + if (entry.source) + parts.push(`[session](${entry.source})`); + if (entry.date) + parts.push(truncate(entry.date, 40)); + const visibleTime = entry.updatedAt || entry.createdAt; + if (visibleTime) + parts.push(`updated: ${truncate(formatIndexTimestamp(visibleTime), 24)}`); + if (entry.participantsText) + parts.push(truncate(entry.participantsText, 80)); + if (entry.topicsText) + parts.push(`topics: ${truncate(entry.topicsText, 90)}`); + if (entry.project) + parts.push(`[${truncate(entry.project, 40)}]`); + if (entry.blurb && entry.blurb !== "completed") + parts.push(truncate(entry.blurb, 220)); + return parts.join(" \u2014 "); +} +function buildSummaryIndexLine(row) { + const entry = "label" in row && typeof row.label === "string" ? row : buildSummaryIndexEntry(row); + return entry ? formatSummaryIndexEntry(entry) : null; +} + +// dist/src/hooks/virtual-table-query.js +function buildVirtualIndexContent(rows) { + const entries = rows.map((row) => buildSummaryIndexEntry(row)).filter((entry) => entry !== null).sort((a15, b26) => (b26.sortDate || "").localeCompare(a15.sortDate || "") || a15.path.localeCompare(b26.path)); + const lines = [ + "# Memory Index", + "", + "Persistent wiki directory. Start here, open the linked summary first, then open the paired raw session if you need exact wording or temporal grounding.", + "", + "## How To Use", + "", + "- Use the People section when the question names a person.", + "- In the catalog, each row links to both the summary page and its source session.", + "- Once you have a likely match, open that exact summary or session instead of broadening into wide grep scans.", + "" + ]; + const peopleLines = buildPeopleDirectory(entries); + if (peopleLines.length > 0) { + lines.push("## People"); + lines.push(""); + lines.push(...peopleLines); + lines.push(""); + } + const projectLines = buildProjectDirectory(entries); + if (projectLines.length > 0) { + lines.push("## Projects"); + lines.push(""); + lines.push(...projectLines); + lines.push(""); + } + lines.push("## Summary To Session Catalog"); + lines.push(""); + for (const entry of entries) { + const line = buildSummaryIndexLine(entry); + if (line) + lines.push(line); + } + return lines.join("\n"); +} +function formatEntryLink(entry) { + const session = entry.source ? ` -> [session](${entry.source})` : ""; + return `[${entry.label}](${entry.path})${session}`; +} +function topList(counts, limit) { + return [...counts.entries()].sort((a15, b26) => b26[1] - a15[1] || a15[0].localeCompare(b26[0])).slice(0, limit).map(([value]) => value); +} +function buildPeopleDirectory(entries) { + const people = /* @__PURE__ */ new Map(); + for (const entry of entries) { + for (const person of entry.participants) { + const current = people.get(person) ?? { count: 0, topics: /* @__PURE__ */ new Map(), recent: [] }; + current.count += 1; + for (const topic of entry.topics) { + current.topics.set(topic, (current.topics.get(topic) ?? 0) + 1); + } + current.recent.push(entry); + people.set(person, current); + } + } + return [...people.entries()].sort((a15, b26) => b26[1].count - a15[1].count || a15[0].localeCompare(b26[0])).map(([person, info]) => { + const topics = topList(info.topics, 3); + const recent = info.recent.slice(0, 2).map((entry) => formatEntryLink(entry)).join(", "); + const parts = [`- ${person} \u2014 ${info.count} summaries`]; + if (topics.length > 0) + parts.push(`topics: ${topics.join("; ")}`); + if (recent) + parts.push(`recent: ${recent}`); + return parts.join(" \u2014 "); + }); +} +function buildProjectDirectory(entries) { + const projects = /* @__PURE__ */ new Map(); + for (const entry of entries) { + if (!entry.project) + continue; + const current = projects.get(entry.project) ?? { count: 0, recent: [] }; + current.count += 1; + current.recent.push(entry); + projects.set(entry.project, current); + } + return [...projects.entries()].sort((a15, b26) => b26[1].count - a15[1].count || a15[0].localeCompare(b26[0])).map(([project, info]) => { + const recent = info.recent.slice(0, 2).map((entry) => formatEntryLink(entry)).join(", "); + const parts = [`- ${project} \u2014 ${info.count} summaries`]; + if (recent) + parts.push(`recent: ${recent}`); + return parts.join(" \u2014 "); + }); +} + // dist/src/shell/deeplake-fs.js var BATCH_SIZE = 10; var PREFETCH_BATCH_SIZE = 50; @@ -67542,6 +67968,8 @@ var DeeplakeFs = class _DeeplakeFs { // Paths that live in the sessions table (multi-row, read by concatenation) sessionPaths = /* @__PURE__ */ new Set(); sessionsTable = null; + sessionsOnly = false; + indexDisabled = false; constructor(client, table, mountPoint) { this.client = client; this.table = table; @@ -67553,9 +67981,11 @@ var DeeplakeFs = class _DeeplakeFs { static async create(client, table, mount = "/memory", sessionsTable) { const fs3 = new _DeeplakeFs(client, table, mount); fs3.sessionsTable = sessionsTable ?? null; + fs3.sessionsOnly = isSessionsOnlyMode(); + fs3.indexDisabled = isIndexDisabled(); await client.ensureTable(); let sessionSyncOk = true; - const memoryBootstrap = (async () => { + const memoryBootstrap = fs3.sessionsOnly ? Promise.resolve() : (async () => { const sql = `SELECT path, size_bytes, mime_type FROM "${table}" ORDER BY path`; try { const rows = await client.query(sql); @@ -67611,7 +68041,7 @@ var DeeplakeFs = class _DeeplakeFs { this.pending.delete(filePath); this.flushed.delete(filePath); const parent = parentOf(filePath); - this.dirs.get(parent)?.delete(basename4(filePath)); + this.dirs.get(parent)?.delete(basename5(filePath)); } // ── flush / write batching ──────────────────────────────────────────────── scheduleFlush() { @@ -67674,46 +68104,8 @@ var DeeplakeFs = class _DeeplakeFs { } // ── Virtual index.md generation ──────────────────────────────────────────── async generateVirtualIndex() { - const rows = await this.client.query(`SELECT path, project, description, creation_date, last_update_date FROM "${this.table}" WHERE path LIKE '${sqlStr("/summaries/")}%' ORDER BY last_update_date DESC`); - const sessionPathsByKey = /* @__PURE__ */ new Map(); - for (const sp of this.sessionPaths) { - const hivemind = sp.match(/\/sessions\/[^/]+\/[^/]+_([^.]+)\.jsonl$/); - if (hivemind) { - sessionPathsByKey.set(hivemind[1], sp.slice(1)); - } else { - const fname = sp.split("/").pop() ?? ""; - const stem = fname.replace(/\.[^.]+$/, ""); - if (stem) - sessionPathsByKey.set(stem, sp.slice(1)); - } - } - const lines = [ - "# Session Index", - "", - "List of all Claude Code sessions with summaries.", - "", - "| Session | Conversation | Created | Last Updated | Project | Description |", - "|---------|-------------|---------|--------------|---------|-------------|" - ]; - for (const row of rows) { - const p22 = row["path"]; - const match2 = p22.match(/\/summaries\/([^/]+)\/([^/]+)\.md$/); - if (!match2) - continue; - const summaryUser = match2[1]; - const sessionId = match2[2]; - const relPath = `summaries/${summaryUser}/${sessionId}.md`; - const baseName = sessionId.replace(/_summary$/, ""); - const convPath = sessionPathsByKey.get(sessionId) ?? sessionPathsByKey.get(baseName); - const convLink = convPath ? `[messages](${convPath})` : ""; - const project = row["project"] || ""; - const description = row["description"] || ""; - const creationDate = row["creation_date"] || ""; - const lastUpdateDate = row["last_update_date"] || ""; - lines.push(`| [${sessionId}](${relPath}) | ${convLink} | ${creationDate} | ${lastUpdateDate} | ${project} | ${description} |`); - } - lines.push(""); - return lines.join("\n"); + const rows = await this.client.query(`SELECT path, project, description, summary, creation_date, last_update_date FROM "${this.table}" WHERE path LIKE '${sqlStr("/summaries/")}%' ORDER BY last_update_date DESC, creation_date DESC`); + return buildVirtualIndexContent(rows); } // ── batch prefetch ──────────────────────────────────────────────────────── /** @@ -67802,7 +68194,7 @@ var DeeplakeFs = class _DeeplakeFs { const p22 = normPath(path2); if (this.dirs.has(p22) && !this.files.has(p22)) throw fsErr("EISDIR", "illegal operation on a directory", p22); - if (p22 === "/index.md" && !this.files.has(p22)) { + if (!this.sessionsOnly && !this.indexDisabled && p22 === "/index.md" && !this.files.has(p22)) { const realRows = await this.client.query(`SELECT summary FROM "${this.table}" WHERE path = '${sqlStr("/index.md")}' LIMIT 1`); if (realRows.length > 0 && realRows[0]["summary"]) { const text2 = realRows[0]["summary"]; @@ -67847,13 +68239,13 @@ var DeeplakeFs = class _DeeplakeFs { throw fsErr("EISDIR", "illegal operation on a directory", p22); const text = typeof content === "string" ? content : Buffer.from(content).toString("utf-8"); const buf = Buffer.from(text, "utf-8"); - const mime = guessMime(basename4(p22)); + const mime = guessMime(basename5(p22)); this.files.set(p22, buf); this.meta.set(p22, { size: buf.length, mime, mtime: /* @__PURE__ */ new Date() }); this.addToTree(p22); this.pending.set(p22, { path: p22, - filename: basename4(p22), + filename: basename5(p22), contentText: text, mimeType: mime, sizeBytes: buf.length, @@ -67872,13 +68264,13 @@ var DeeplakeFs = class _DeeplakeFs { throw fsErr("EISDIR", "illegal operation on a directory", p22); const text = typeof content === "string" ? content : Buffer.from(content).toString("utf-8"); const buf = Buffer.from(text, "utf-8"); - const mime = guessMime(basename4(p22)); + const mime = guessMime(basename5(p22)); this.files.set(p22, buf); this.meta.set(p22, { size: buf.length, mime, mtime: /* @__PURE__ */ new Date() }); this.addToTree(p22); this.pending.set(p22, { path: p22, - filename: basename4(p22), + filename: basename5(p22), contentText: text, mimeType: mime, sizeBytes: buf.length @@ -67910,7 +68302,7 @@ var DeeplakeFs = class _DeeplakeFs { // ── IFileSystem: metadata ───────────────────────────────────────────────── async exists(path2) { const p22 = normPath(path2); - if (p22 === "/index.md") + if (!this.sessionsOnly && !this.indexDisabled && p22 === "/index.md") return true; return this.files.has(p22) || this.dirs.has(p22); } @@ -67918,7 +68310,7 @@ var DeeplakeFs = class _DeeplakeFs { const p22 = normPath(path2); const isFile = this.files.has(p22); const isDir = this.dirs.has(p22); - if (p22 === "/index.md" && !isFile && !isDir) { + if (!this.sessionsOnly && !this.indexDisabled && p22 === "/index.md" && !isFile && !isDir) { return { isFile: true, isDirectory: false, @@ -67958,7 +68350,7 @@ var DeeplakeFs = class _DeeplakeFs { } async realpath(path2) { const p22 = normPath(path2); - if (p22 === "/index.md") + if (!this.sessionsOnly && !this.indexDisabled && p22 === "/index.md") return p22; if (!this.files.has(p22) && !this.dirs.has(p22)) throw fsErr("ENOENT", "no such file or directory", p22); @@ -67983,14 +68375,14 @@ var DeeplakeFs = class _DeeplakeFs { const parent = parentOf(p22); if (!this.dirs.has(parent)) this.dirs.set(parent, /* @__PURE__ */ new Set()); - this.dirs.get(parent).add(basename4(p22)); + this.dirs.get(parent).add(basename5(p22)); } async readdir(path2) { const p22 = normPath(path2); if (!this.dirs.has(p22)) throw fsErr("ENOTDIR", "not a directory", p22); const entries = [...this.dirs.get(p22) ?? []]; - if (p22 === "/" && !entries.includes("index.md")) { + if (!this.sessionsOnly && !this.indexDisabled && p22 === "/" && !entries.includes("index.md")) { entries.push("index.md"); } return entries; @@ -68002,7 +68394,7 @@ var DeeplakeFs = class _DeeplakeFs { const child = p22 === "/" ? `/${name}` : `${p22}/${name}`; return { name, - isFile: (this.files.has(child) || child === "/index.md") && !this.dirs.has(child), + isFile: (this.files.has(child) || !this.sessionsOnly && !this.indexDisabled && child === "/index.md") && !this.dirs.has(child), isDirectory: this.dirs.has(child), isSymbolicLink: false }; @@ -68038,7 +68430,7 @@ var DeeplakeFs = class _DeeplakeFs { for (const fp of safeToDelete) this.removeFromTree(fp); this.dirs.delete(p22); - this.dirs.get(parentOf(p22))?.delete(basename4(p22)); + this.dirs.get(parentOf(p22))?.delete(basename5(p22)); if (safeToDelete.length > 0) { const inList = safeToDelete.map((fp) => `'${sqlStr(fp)}'`).join(", "); await this.client.query(`DELETE FROM "${this.table}" WHERE path IN (${inList})`); @@ -69112,8 +69504,7 @@ function createGrepCommand(client, fs3, table, sessionsTable) { try { const searchOptions = { ...buildGrepSearchOptions(matchParams, targets[0] ?? ctx.cwd), - pathFilter: buildPathFilterForTargets(targets), - limit: 100 + pathFilter: buildPathFilterForTargets(targets) }; const queryRows = await Promise.race([ searchDeeplakeTables(client, table, sessionsTable ?? "sessions", searchOptions), @@ -69136,7 +69527,8 @@ function createGrepCommand(client, fs3, table, sessionsTable) { } } const normalized = rows.map((r10) => ({ path: r10.path, content: normalizeContent(r10.path, r10.content) })); - const output = refineGrepMatches(normalized, matchParams); + const forceMultiFilePrefix = parsed.r || parsed.R || parsed.recursive ? true : void 0; + const output = refineGrepMatches(normalized, matchParams, forceMultiFilePrefix); return { stdout: output.length > 0 ? output.join("\n") + "\n" : "", stderr: "", diff --git a/src/hooks/bash-command-compiler.ts b/src/hooks/bash-command-compiler.ts index 4bf6ce0..11cad25 100644 --- a/src/hooks/bash-command-compiler.ts +++ b/src/hooks/bash-command-compiler.ts @@ -23,6 +23,11 @@ interface ParsedModifier { ignoreMissing: boolean; } +interface ParsedFindSpec { + patterns: string[]; + execGrepCmd: string | null; +} + function isQuoted(ch: string): boolean { return ch === "'" || ch === "\""; } @@ -31,11 +36,18 @@ export function splitTopLevel(input: string, operators: string[]): string[] | nu const parts: string[] = []; let current = ""; let quote: string | null = null; + let escaped = false; for (let i = 0; i < input.length; i++) { const ch = input[i]; + if (escaped) { + current += ch; + escaped = false; + continue; + } if (quote) { if (ch === quote) quote = null; + else if (ch === "\\" && quote === "\"") escaped = true; current += ch; continue; } @@ -44,6 +56,11 @@ export function splitTopLevel(input: string, operators: string[]): string[] | nu current += ch; continue; } + if (ch === "\\" && i + 1 < input.length) { + current += ch; + escaped = true; + continue; + } const matched = operators.find((op) => input.startsWith(op, i)); if (matched) { @@ -57,7 +74,7 @@ export function splitTopLevel(input: string, operators: string[]): string[] | nu current += ch; } - if (quote) return null; + if (quote || escaped) return null; const trimmed = current.trim(); if (trimmed) parts.push(trimmed); return parts; @@ -127,10 +144,10 @@ export function expandBraceToken(token: string): string[] { } export function stripAllowedModifiers(segment: string): ParsedModifier { - const ignoreMissing = /\s2>\/dev\/null\s*$/.test(segment); + const ignoreMissing = /\s2>\/dev\/null(?=\s*(?:\||$))/.test(segment); const clean = segment - .replace(/\s2>\/dev\/null\s*$/g, "") - .replace(/\s2>&1\s*/g, " ") + .replace(/\s2>\/dev\/null(?=\s*(?:\||$))/g, "") + .replace(/\s2>&1(?=\s*(?:\||$))/g, "") .trim(); return { clean, ignoreMissing }; } @@ -192,7 +209,7 @@ function isValidPipelineHeadTailStage(stage: string): boolean { return false; } -function parseFindNamePatterns(tokens: string[]): string[] | null { +function parseFindSpec(tokens: string[]): ParsedFindSpec | null { const patterns: string[] = []; for (let i = 2; i < tokens.length; i++) { const token = tokens[i]; @@ -208,9 +225,20 @@ function parseFindNamePatterns(tokens: string[]): string[] | null { i += 1; continue; } + if (token === "-exec") { + const execTokens = tokens.slice(i + 1); + if (patterns.length === 0 || execTokens.length < 4) return null; + const terminator = execTokens.at(-1); + const target = execTokens.at(-2); + if ((terminator !== "\\;" && terminator !== ";") || target !== "{}") return null; + return { + patterns, + execGrepCmd: execTokens.slice(0, -1).join(" "), + }; + } return null; } - return patterns.length > 0 ? patterns : null; + return patterns.length > 0 ? { patterns, execGrepCmd: null } : null; } export function parseCompiledSegment(segment: string): CompiledSegment | null { @@ -297,14 +325,29 @@ export function parseCompiledSegment(segment: string): CompiledSegment | null { if (pipeline.length > 3) return null; const dir = tokens[1]; if (!dir) return null; - const patterns = parseFindNamePatterns(tokens); - if (!patterns) return null; + const spec = parseFindSpec(tokens); + if (!spec) return null; + const { patterns, execGrepCmd } = spec; const countOnly = pipeline.length === 2 && /^wc\s+-l\s*$/.test(pipeline[1].trim()); if (countOnly) { if (patterns.length !== 1) return null; return { kind: "find", dir, pattern: patterns[0], countOnly }; } + if (execGrepCmd) { + const grepParams = parseBashGrep(execGrepCmd); + if (!grepParams) return null; + let lineLimit = 0; + if (pipeline.length === 2) { + const headStage = pipeline[1].trim(); + if (!isValidPipelineHeadTailStage(headStage)) return null; + const headTail = parseHeadTailStage(headStage); + if (!headTail || headTail.fromEnd) return null; + lineLimit = headTail.lineLimit; + } + return { kind: "find_grep", dir, patterns, params: grepParams, lineLimit }; + } + if (pipeline.length >= 2) { const xargsTokens = tokenizeShellWords(pipeline[1].trim()); if (!xargsTokens || xargsTokens[0] !== "xargs") return null; diff --git a/src/hooks/codex/pre-tool-use.ts b/src/hooks/codex/pre-tool-use.ts index 3b1aacd..51cc5ee 100644 --- a/src/hooks/codex/pre-tool-use.ts +++ b/src/hooks/codex/pre-tool-use.ts @@ -28,6 +28,7 @@ import { readVirtualPathContents, listVirtualPathRows, readVirtualPathContent, + buildVirtualIndexContent, } from "../virtual-table-query.js"; import { readCachedIndexContent, @@ -36,6 +37,7 @@ import { import { log as _log } from "../../utils/debug.js"; import { isDirectRun } from "../../utils/direct-run.js"; import { isSafe, touchesMemory, rewritePaths } from "../memory-path-utils.js"; +import { isIndexDisabled, isSessionsOnlyMode } from "../../utils/retrieval-mode.js"; export { isSafe, touchesMemory, rewritePaths }; @@ -84,18 +86,6 @@ export function runVirtualShell(cmd: string, shellBundle = SHELL_BUNDLE, logFn: } } -function buildIndexContent(rows: Record[]): string { - const lines = ["# Memory Index", "", `${rows.length} sessions:`, ""]; - for (const row of rows) { - const path = row["path"] as string; - const project = row["project"] as string || ""; - const description = (row["description"] as string || "").slice(0, 120); - const date = (row["creation_date"] as string || "").slice(0, 10); - lines.push(`- [${path}](${path}) ${date} ${project ? `[${project}]` : ""} ${description}`); - } - return lines.join("\n"); -} - interface CodexPreToolDeps { config?: ReturnType; createApi?: (table: string, config: NonNullable>) => DeeplakeApi; @@ -164,7 +154,7 @@ export async function processCodexPreToolUse( ): Promise> => { const uniquePaths = [...new Set(cachePaths)]; const result = new Map(uniquePaths.map((path) => [path, null])); - const cachedIndex = uniquePaths.includes("/index.md") + const cachedIndex = !isIndexDisabled() && uniquePaths.includes("/index.md") ? readCachedIndexContentFn(input.session_id) : null; @@ -248,17 +238,17 @@ export async function processCodexPreToolUse( if (virtualPath && !virtualPath.endsWith("/")) { logFn(`direct read: ${virtualPath}`); - let content = virtualPath === "/index.md" + let content = !isIndexDisabled() && virtualPath === "/index.md" ? readCachedIndexContentFn(input.session_id) : null; if (content === null) { content = await readVirtualPathContentFn(api, table, sessionsTable, virtualPath); } - if (content === null && virtualPath === "/index.md") { + if (content === null && virtualPath === "/index.md" && !isSessionsOnlyMode() && !isIndexDisabled()) { const idxRows = await api.query( - `SELECT path, project, description, creation_date FROM "${table}" WHERE path LIKE '/summaries/%' ORDER BY creation_date DESC` + `SELECT path, project, description, summary, creation_date, last_update_date FROM "${table}" WHERE path LIKE '/summaries/%' ORDER BY last_update_date DESC, creation_date DESC` ); - content = buildIndexContent(idxRows); + content = buildVirtualIndexContent(idxRows); } if (content !== null) { diff --git a/src/hooks/grep-direct.ts b/src/hooks/grep-direct.ts index 77427bf..01c0538 100644 --- a/src/hooks/grep-direct.ts +++ b/src/hooks/grep-direct.ts @@ -11,6 +11,7 @@ import { grepBothTables, type GrepMatchParams } from "../shell/grep-core.js"; export interface GrepParams { pattern: string; targetPath: string; + recursive: boolean; ignoreCase: boolean; wordMatch: boolean; filesOnly: boolean; @@ -107,7 +108,7 @@ export function parseBashGrep(cmd: string): GrepParams | null { const tokens = tokenizeGrepStage(first); if (!tokens || tokens.length === 0) return null; - let ignoreCase = false, wordMatch = false, filesOnly = false, countOnly = false, + let recursive = false, ignoreCase = false, wordMatch = false, filesOnly = false, countOnly = false, lineNumber = false, invertMatch = false, fixedString = isFixed; const explicitPatterns: string[] = []; @@ -165,6 +166,8 @@ export function parseBashGrep(cmd: string): GrepParams | null { case "F": fixedString = true; break; case "r": case "R": + recursive = true; + break; case "E": break; case "A": @@ -204,6 +207,7 @@ export function parseBashGrep(cmd: string): GrepParams | null { return { pattern, targetPath: target, + recursive, ignoreCase, wordMatch, filesOnly, countOnly, lineNumber, invertMatch, fixedString, }; } @@ -228,6 +232,13 @@ export async function handleGrepDirect( fixedString: params.fixedString, }; - const output = await grepBothTables(api, table, sessionsTable, matchParams, params.targetPath); + const output = await grepBothTables( + api, + table, + sessionsTable, + matchParams, + params.targetPath, + params.recursive ? true : undefined, + ); return output.join("\n") || "(no matches)"; } diff --git a/src/hooks/memory-path-utils.ts b/src/hooks/memory-path-utils.ts index b741cb3..0650b63 100644 --- a/src/hooks/memory-path-utils.ts +++ b/src/hooks/memory-path-utils.ts @@ -22,10 +22,69 @@ export const SAFE_BUILTINS = new Set([ "for", "while", "do", "done", "if", "then", "else", "fi", "case", "esac", ]); +function splitSafeStages(cmd: string): string[] | null { + const stages: string[] = []; + let current = ""; + let quote: string | null = null; + let escaped = false; + + for (let i = 0; i < cmd.length; i++) { + const ch = cmd[i]; + + if (escaped) { + current += ch; + escaped = false; + continue; + } + + if (quote) { + current += ch; + if (ch === quote) { + quote = null; + } else if (ch === "\\" && quote === "\"") { + escaped = true; + } + continue; + } + + if (ch === "\\" && i + 1 < cmd.length) { + current += ch; + escaped = true; + continue; + } + + if (ch === "'" || ch === "\"") { + quote = ch; + current += ch; + continue; + } + + const twoChar = cmd.slice(i, i + 2); + if (twoChar === "&&" || twoChar === "||") { + if (current.trim()) stages.push(current.trim()); + current = ""; + i += 1; + continue; + } + if (ch === "|" || ch === ";" || ch === "\n") { + if (current.trim()) stages.push(current.trim()); + current = ""; + continue; + } + + current += ch; + } + + if (quote || escaped) return null; + if (current.trim()) stages.push(current.trim()); + return stages; +} + export function isSafe(cmd: string): boolean { if (/\$\(|`|<\(/.test(cmd)) return false; const stripped = cmd.replace(/'[^']*'/g, "''").replace(/"[^"]*"/g, '""'); - const stages = stripped.split(/\||;|&&|\|\||\n/); + const stages = splitSafeStages(stripped); + if (!stages) return false; for (const stage of stages) { const firstToken = stage.trim().split(/\s+/)[0] ?? ""; if (firstToken && !SAFE_BUILTINS.has(firstToken)) return false; diff --git a/src/hooks/pre-tool-use.ts b/src/hooks/pre-tool-use.ts index 2dc6498..3fbe58a 100644 --- a/src/hooks/pre-tool-use.ts +++ b/src/hooks/pre-tool-use.ts @@ -16,12 +16,14 @@ import { readVirtualPathContents, listVirtualPathRows, readVirtualPathContent, + buildVirtualIndexContent, } from "./virtual-table-query.js"; import { readCachedIndexContent, writeCachedIndexContent, } from "./query-cache.js"; import { isSafe, touchesMemory, rewritePaths } from "./memory-path-utils.js"; +import { isIndexDisabled, isSessionsOnlyMode } from "../utils/retrieval-mode.js"; export { isSafe, touchesMemory, rewritePaths }; @@ -110,6 +112,7 @@ export function extractGrepParams( return { pattern: (toolInput.pattern as string) ?? "", targetPath: rewritePaths((toolInput.path as string) ?? "") || "/", + recursive: true, ignoreCase: !!toolInput["-i"], wordMatch: false, filesOnly: outputMode === "files_with_matches", @@ -196,7 +199,7 @@ export async function processPreToolUse(input: PreToolUseInput, deps: ClaudePreT ): Promise> => { const uniquePaths = [...new Set(cachePaths)]; const result = new Map(uniquePaths.map((path) => [path, null])); - const cachedIndex = uniquePaths.includes("/index.md") + const cachedIndex = !isIndexDisabled() && uniquePaths.includes("/index.md") ? readCachedIndexContentFn(input.session_id) : null; @@ -283,26 +286,18 @@ export async function processPreToolUse(input: PreToolUseInput, deps: ClaudePreT if (virtualPath && !virtualPath.endsWith("/")) { logFn(`direct read: ${virtualPath}`); - let content = virtualPath === "/index.md" + let content = !isIndexDisabled() && virtualPath === "/index.md" ? readCachedIndexContentFn(input.session_id) : null; if (content === null) { content = await readVirtualPathContentFn(api, table, sessionsTable, virtualPath); } - if (content === null && virtualPath === "/index.md") { + if (content === null && virtualPath === "/index.md" && !isSessionsOnlyMode() && !isIndexDisabled()) { const idxRows = await api.query( - `SELECT path, project, description, creation_date FROM "${table}" WHERE path LIKE '/summaries/%' ORDER BY creation_date DESC` + `SELECT path, project, description, summary, creation_date, last_update_date FROM "${table}" WHERE path LIKE '/summaries/%' ORDER BY last_update_date DESC, creation_date DESC` ); - const lines = ["# Memory Index", "", `${idxRows.length} sessions:`, ""]; - for (const r of idxRows) { - const p = r["path"] as string; - const proj = r["project"] as string || ""; - const desc = (r["description"] as string || "").slice(0, 120); - const date = (r["creation_date"] as string || "").slice(0, 10); - lines.push(`- [${p}](${p}) ${date} ${proj ? `[${proj}]` : ""} ${desc}`); - } - content = lines.join("\n"); + content = buildVirtualIndexContent(idxRows); } if (content !== null) { if (virtualPath === "/index.md") { diff --git a/src/hooks/virtual-table-query.ts b/src/hooks/virtual-table-query.ts index 34f0bf6..794f977 100644 --- a/src/hooks/virtual-table-query.ts +++ b/src/hooks/virtual-table-query.ts @@ -1,6 +1,8 @@ import type { DeeplakeApi } from "../deeplake-api.js"; import { sqlLike, sqlStr } from "../utils/sql.js"; import { normalizeContent } from "../shell/grep-core.js"; +import { isIndexDisabled, isSessionsOnlyMode } from "../utils/retrieval-mode.js"; +import { buildSummaryIndexEntry, buildSummaryIndexLine, type SummaryIndexEntry } from "../utils/summary-format.js"; type Row = Record; @@ -9,17 +11,109 @@ function normalizeSessionPart(path: string, content: string): string { } export function buildVirtualIndexContent(rows: Row[]): string { - const lines = ["# Memory Index", "", `${rows.length} sessions:`, ""]; - for (const row of rows) { - const path = row["path"] as string; - const project = row["project"] as string || ""; - const description = (row["description"] as string || "").slice(0, 120); - const date = (row["creation_date"] as string || "").slice(0, 10); - lines.push(`- [${path}](${path}) ${date} ${project ? `[${project}]` : ""} ${description}`); + const entries = rows + .map((row) => buildSummaryIndexEntry(row)) + .filter((entry): entry is SummaryIndexEntry => entry !== null) + .sort((a, b) => (b.sortDate || "").localeCompare(a.sortDate || "") || a.path.localeCompare(b.path)); + + const lines = [ + "# Memory Index", + "", + "Persistent wiki directory. Start here, open the linked summary first, then open the paired raw session if you need exact wording or temporal grounding.", + "", + "## How To Use", + "", + "- Use the People section when the question names a person.", + "- In the catalog, each row links to both the summary page and its source session.", + "- Once you have a likely match, open that exact summary or session instead of broadening into wide grep scans.", + "", + ]; + + const peopleLines = buildPeopleDirectory(entries); + if (peopleLines.length > 0) { + lines.push("## People"); + lines.push(""); + lines.push(...peopleLines); + lines.push(""); + } + + const projectLines = buildProjectDirectory(entries); + if (projectLines.length > 0) { + lines.push("## Projects"); + lines.push(""); + lines.push(...projectLines); + lines.push(""); + } + + lines.push("## Summary To Session Catalog"); + lines.push(""); + for (const entry of entries) { + const line = buildSummaryIndexLine(entry); + if (line) lines.push(line); } return lines.join("\n"); } +function formatEntryLink(entry: SummaryIndexEntry): string { + const session = entry.source ? ` -> [session](${entry.source})` : ""; + return `[${entry.label}](${entry.path})${session}`; +} + +function topList(counts: Map, limit: number): string[] { + return [...counts.entries()] + .sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0])) + .slice(0, limit) + .map(([value]) => value); +} + +function buildPeopleDirectory(entries: SummaryIndexEntry[]): string[] { + const people = new Map; recent: SummaryIndexEntry[] }>(); + + for (const entry of entries) { + for (const person of entry.participants) { + const current = people.get(person) ?? { count: 0, topics: new Map(), recent: [] }; + current.count += 1; + for (const topic of entry.topics) { + current.topics.set(topic, (current.topics.get(topic) ?? 0) + 1); + } + current.recent.push(entry); + people.set(person, current); + } + } + + return [...people.entries()] + .sort((a, b) => b[1].count - a[1].count || a[0].localeCompare(b[0])) + .map(([person, info]) => { + const topics = topList(info.topics, 3); + const recent = info.recent.slice(0, 2).map((entry) => formatEntryLink(entry)).join(", "); + const parts = [`- ${person} — ${info.count} summaries`]; + if (topics.length > 0) parts.push(`topics: ${topics.join("; ")}`); + if (recent) parts.push(`recent: ${recent}`); + return parts.join(" — "); + }); +} + +function buildProjectDirectory(entries: SummaryIndexEntry[]): string[] { + const projects = new Map(); + + for (const entry of entries) { + if (!entry.project) continue; + const current = projects.get(entry.project) ?? { count: 0, recent: [] }; + current.count += 1; + current.recent.push(entry); + projects.set(entry.project, current); + } + + return [...projects.entries()] + .sort((a, b) => b[1].count - a[1].count || a[0].localeCompare(b[0])) + .map(([project, info]) => { + const recent = info.recent.slice(0, 2).map((entry) => formatEntryLink(entry)).join(", "); + const parts = [`- ${project} — ${info.count} summaries`]; + if (recent) parts.push(`recent: ${recent}`); + return parts.join(" — "); + }); +} + function buildUnionQuery(memoryQuery: string, sessionsQuery: string): string { return ( `SELECT path, content, size_bytes, creation_date, source_order FROM (` + @@ -44,6 +138,12 @@ async function queryUnionRows( memoryQuery: string, sessionsQuery: string, ): Promise { + if (isSessionsOnlyMode()) { + return api.query( + `SELECT path, content, size_bytes, creation_date, source_order FROM (${sessionsQuery}) AS combined ORDER BY path, source_order, creation_date` + ); + } + const unionQuery = buildUnionQuery(memoryQuery, sessionsQuery); try { return await api.query(unionQuery); @@ -65,8 +165,16 @@ export async function readVirtualPathContents( const uniquePaths = [...new Set(virtualPaths)]; const result = new Map(uniquePaths.map(path => [path, null])); if (uniquePaths.length === 0) return result; + if (isIndexDisabled() && uniquePaths.includes("/index.md")) { + result.set("/index.md", null); + } + + const queryPaths = isIndexDisabled() + ? uniquePaths.filter((path) => path !== "/index.md") + : uniquePaths; + if (queryPaths.length === 0) return result; - const inList = buildInList(uniquePaths); + const inList = buildInList(queryPaths); const rows = await queryUnionRows( api, `SELECT path, summary::text AS content, NULL::bigint AS size_bytes, '' AS creation_date, 0 AS source_order FROM "${memoryTable}" WHERE path IN (${inList})`, @@ -89,7 +197,7 @@ export async function readVirtualPathContents( } } - for (const path of uniquePaths) { + for (const path of queryPaths) { if (memoryHits.has(path)) { result.set(path, memoryHits.get(path) ?? null); continue; @@ -100,9 +208,9 @@ export async function readVirtualPathContents( } } - if (result.get("/index.md") === null && uniquePaths.includes("/index.md")) { + if (!isSessionsOnlyMode() && !isIndexDisabled() && result.get("/index.md") === null && uniquePaths.includes("/index.md")) { const rows = await api.query( - `SELECT path, project, description, creation_date FROM "${memoryTable}" WHERE path LIKE '/summaries/%' ORDER BY creation_date DESC` + `SELECT path, project, description, summary, creation_date, last_update_date FROM "${memoryTable}" WHERE path LIKE '/summaries/%' ORDER BY last_update_date DESC, creation_date DESC` ).catch(() => []); result.set("/index.md", buildVirtualIndexContent(rows)); } diff --git a/src/shell/deeplake-fs.ts b/src/shell/deeplake-fs.ts index 8db0716..a9e450a 100644 --- a/src/shell/deeplake-fs.ts +++ b/src/shell/deeplake-fs.ts @@ -6,6 +6,8 @@ import type { FileContent, BufferEncoding, } from "just-bash"; import { normalizeContent } from "./grep-core.js"; +import { isIndexDisabled, isSessionsOnlyMode } from "../utils/retrieval-mode.js"; +import { buildVirtualIndexContent } from "../hooks/virtual-table-query.js"; interface ReadFileOptions { encoding?: BufferEncoding } interface WriteFileOptions { encoding?: BufferEncoding } @@ -84,6 +86,8 @@ export class DeeplakeFs implements IFileSystem { // Paths that live in the sessions table (multi-row, read by concatenation) private sessionPaths = new Set(); private sessionsTable: string | null = null; + private sessionsOnly = false; + private indexDisabled = false; private constructor( private readonly client: DeeplakeApi, @@ -102,12 +106,14 @@ export class DeeplakeFs implements IFileSystem { ): Promise { const fs = new DeeplakeFs(client, table, mount); fs.sessionsTable = sessionsTable ?? null; + fs.sessionsOnly = isSessionsOnlyMode(); + fs.indexDisabled = isIndexDisabled(); // Ensure the table exists before bootstrapping. await client.ensureTable(); // Bootstrap memory + sessions metadata in parallel. let sessionSyncOk = true; - const memoryBootstrap = (async () => { + const memoryBootstrap = fs.sessionsOnly ? Promise.resolve() : (async () => { const sql = `SELECT path, size_bytes, mime_type FROM "${table}" ORDER BY path`; try { const rows = await client.query(sql); @@ -247,55 +253,10 @@ export class DeeplakeFs implements IFileSystem { private async generateVirtualIndex(): Promise { const rows = await this.client.query( - `SELECT path, project, description, creation_date, last_update_date FROM "${this.table}" ` + - `WHERE path LIKE '${esc("/summaries/")}%' ORDER BY last_update_date DESC` + `SELECT path, project, description, summary, creation_date, last_update_date FROM "${this.table}" ` + + `WHERE path LIKE '${esc("/summaries/")}%' ORDER BY last_update_date DESC, creation_date DESC` ); - - // Build a lookup: key → session path from sessionPaths - // Supports two formats: - // 1. /sessions//___.jsonl → key = sessionId - // 2. /sessions//.json or .jsonl → key = filename stem - const sessionPathsByKey = new Map(); - for (const sp of this.sessionPaths) { - const hivemind = sp.match(/\/sessions\/[^/]+\/[^/]+_([^.]+)\.jsonl$/); - if (hivemind) { - sessionPathsByKey.set(hivemind[1], sp.slice(1)); - } else { - // Generic: extract filename without extension - const fname = sp.split("/").pop() ?? ""; - const stem = fname.replace(/\.[^.]+$/, ""); - if (stem) sessionPathsByKey.set(stem, sp.slice(1)); - } - } - - const lines: string[] = [ - "# Session Index", - "", - "List of all Claude Code sessions with summaries.", - "", - "| Session | Conversation | Created | Last Updated | Project | Description |", - "|---------|-------------|---------|--------------|---------|-------------|", - ]; - for (const row of rows) { - const p = row["path"] as string; - // Extract session ID from path: /summaries//.md - const match = p.match(/\/summaries\/([^/]+)\/([^/]+)\.md$/); - if (!match) continue; - const summaryUser = match[1]; - const sessionId = match[2]; - const relPath = `summaries/${summaryUser}/${sessionId}.md`; - // Try matching session: first exact sessionId, then strip _summary suffix - const baseName = sessionId.replace(/_summary$/, ""); - const convPath = sessionPathsByKey.get(sessionId) ?? sessionPathsByKey.get(baseName); - const convLink = convPath ? `[messages](${convPath})` : ""; - const project = (row["project"] as string) || ""; - const description = (row["description"] as string) || ""; - const creationDate = (row["creation_date"] as string) || ""; - const lastUpdateDate = (row["last_update_date"] as string) || ""; - lines.push(`| [${sessionId}](${relPath}) | ${convLink} | ${creationDate} | ${lastUpdateDate} | ${project} | ${description} |`); - } - lines.push(""); - return lines.join("\n"); + return buildVirtualIndexContent(rows); } // ── batch prefetch ──────────────────────────────────────────────────────── @@ -396,7 +357,7 @@ export class DeeplakeFs implements IFileSystem { if (this.dirs.has(p) && !this.files.has(p)) throw fsErr("EISDIR", "illegal operation on a directory", p); // Virtual index.md: if no real row exists, generate from summary rows - if (p === "/index.md" && !this.files.has(p)) { + if (!this.sessionsOnly && !this.indexDisabled && p === "/index.md" && !this.files.has(p)) { // Check if a real /index.md row exists in the table const realRows = await this.client.query( `SELECT summary FROM "${this.table}" WHERE path = '${esc("/index.md")}' LIMIT 1` @@ -526,7 +487,7 @@ export class DeeplakeFs implements IFileSystem { async exists(path: string): Promise { const p = normPath(path); - if (p === "/index.md") return true; // Virtual index always exists + if (!this.sessionsOnly && !this.indexDisabled && p === "/index.md") return true; // Virtual index always exists return this.files.has(p) || this.dirs.has(p); } @@ -535,7 +496,7 @@ export class DeeplakeFs implements IFileSystem { const isFile = this.files.has(p); const isDir = this.dirs.has(p); // Virtual index.md: always exists as a file - if (p === "/index.md" && !isFile && !isDir) { + if (!this.sessionsOnly && !this.indexDisabled && p === "/index.md" && !isFile && !isDir) { return { isFile: true, isDirectory: false, isSymbolicLink: false, mode: 0o644, size: 0, mtime: new Date(), @@ -562,7 +523,7 @@ export class DeeplakeFs implements IFileSystem { async readlink(path: string): Promise { throw fsErr("EINVAL", "invalid argument", path); } async realpath(path: string): Promise { const p = normPath(path); - if (p === "/index.md") return p; // Virtual index always exists + if (!this.sessionsOnly && !this.indexDisabled && p === "/index.md") return p; // Virtual index always exists if (!this.files.has(p) && !this.dirs.has(p)) throw fsErr("ENOENT", "no such file or directory", p); return p; } @@ -591,7 +552,7 @@ export class DeeplakeFs implements IFileSystem { if (!this.dirs.has(p)) throw fsErr("ENOTDIR", "not a directory", p); const entries = [...(this.dirs.get(p) ?? [])]; // Virtual index.md: always show in root listing even if no real row exists - if (p === "/" && !entries.includes("index.md")) { + if (!this.sessionsOnly && !this.indexDisabled && p === "/" && !entries.includes("index.md")) { entries.push("index.md"); } return entries; @@ -604,7 +565,7 @@ export class DeeplakeFs implements IFileSystem { const child = p === "/" ? `/${name}` : `${p}/${name}`; return { name, - isFile: (this.files.has(child) || child === "/index.md") && !this.dirs.has(child), + isFile: (this.files.has(child) || (!this.sessionsOnly && !this.indexDisabled && child === "/index.md")) && !this.dirs.has(child), isDirectory: this.dirs.has(child), isSymbolicLink: false, }; diff --git a/src/shell/grep-core.ts b/src/shell/grep-core.ts index abad499..8e2cae4 100644 --- a/src/shell/grep-core.ts +++ b/src/shell/grep-core.ts @@ -7,15 +7,23 @@ * 1. searchDeeplakeTables: run one UNION ALL query across both the memory * table (summaries, column `summary`) AND the sessions table * (raw dialogue, column `message` JSONB), returning {path, content}. - * 2. normalizeSessionContent: when a row comes from a session path, turn the - * single-line JSON blob into multi-line "Speaker: text" so the standard - * line-wise regex refinement surfaces only matching turns, not the whole - * 5 KB blob. + * 2. normalizeSessionContent: when a row comes from a session path, expose a + * file-like text view. Transcript JSON blobs stay as canonical pretty + * JSON so local grep/read over `/sessions/*.json` matches the plugin + * surface, while production hook-event rows keep their concise normalized + * text view. * 3. refineGrepMatches: line-by-line regex match with the usual grep flags. */ import type { DeeplakeApi } from "../deeplake-api.js"; import { sqlStr, sqlLike } from "../utils/sql.js"; +import { isSessionsOnlyMode, isSummaryBm25Disabled } from "../utils/retrieval-mode.js"; + +const DEFAULT_GREP_CANDIDATE_LIMIT = Number( + process.env["HIVEMIND_GREP_LIMIT"] + ?? process.env["DEEPLAKE_GREP_LIMIT"] + ?? 500, +); // ── Types ──────────────────────────────────────────────────────────────────── @@ -44,20 +52,42 @@ export interface SearchOptions { likeOp: "LIKE" | "ILIKE"; /** LIKE-escaped pattern (via sqlLike). */ escapedPattern: string; + /** Optional raw grep regex pattern. May be normalized before SQL pushdown. */ + regexPattern?: string; /** Optional safe literal anchor for regex searches (e.g. foo.*bar → foo). */ prefilterPattern?: string; /** Optional safe literal alternation anchors for regex searches (e.g. foo|bar). */ prefilterPatterns?: string[]; + /** Optional lexical query text for BM25 summary retrieval. */ + bm25QueryText?: string; /** Per-table row cap. */ limit?: number; } +function escapeRegexLiteral(value: string): string { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + +/** + * Normalize common grep BRE operator spellings into the JS/SQL-regex form used + * by our execution paths. This fixes patterns like `book\\|novel` that grep + * users often write for alternation. + */ +export function normalizeGrepRegexPattern(pattern: string): string { + return pattern + .replace(/\\([|(){}+?])/g, "$1") + .replace(/\\/g, "\\b"); +} + // ── Content normalization ─────────────────────────────────────────────────── /** - * If the row is a session JSON blob, serialize it as multi-line - * "Speaker: text" so the standard grep refinement surfaces only matching turns. - * Falls back to the raw content if parsing fails or the path is not a session. + * If the row is a session JSON blob, expose a file-like text view. Transcript + * blobs (`turns` / `dialogue`) stay as canonical pretty JSON so grep/read + * match the local filesystem surface. Hook-event rows keep a concise + * normalized text projection. Falls back to the raw content if parsing fails + * or the path is not a session. */ // ── Tool-call extractor ───────────────────────────────────────────────────── // Extracts only signal-bearing fields from `tool_input` / `tool_response`, @@ -173,23 +203,9 @@ export function normalizeContent(path: string, raw: string): string { let obj: any; try { obj = JSON.parse(raw); } catch { return raw; } - // ── Turn-array session shape: { turns: [...] } ─────────────────────────── - if (Array.isArray(obj.turns)) { - const header: string[] = []; - if (obj.date_time) header.push(`date: ${obj.date_time}`); - if (obj.speakers) { - const s = obj.speakers; - const names = [s.speaker_a, s.speaker_b].filter(Boolean).join(", "); - if (names) header.push(`speakers: ${names}`); - } - const lines = obj.turns.map((t: any) => { - const sp = String(t?.speaker ?? t?.name ?? "?").trim(); - const tx = String(t?.text ?? t?.content ?? "").replace(/\s+/g, " ").trim(); - const tag = t?.dia_id ? `[${t.dia_id}] ` : ""; - return `${tag}${sp}: ${tx}`; - }); - const out = [...header, ...lines].join("\n"); - return out.trim() ? out : raw; + // ── Transcript session shapes: keep a canonical raw-JSON view ─────────── + if (Array.isArray(obj.turns) || Array.isArray(obj.dialogue)) { + return `${JSON.stringify(obj, null, 2)}\n`; } // ── Production shape: single hook-event row (capture.ts output) ───────── @@ -254,22 +270,52 @@ export async function searchDeeplakeTables( sessionsTable: string, opts: SearchOptions, ): Promise { - const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns } = opts; - const limit = opts.limit ?? 100; + const { pathFilter, contentScanOnly, likeOp, escapedPattern, regexPattern, prefilterPattern, prefilterPatterns, bm25QueryText } = opts; + const limit = opts.limit ?? DEFAULT_GREP_CANDIDATE_LIMIT; const filterPatterns = contentScanOnly ? (prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : (prefilterPattern ? [prefilterPattern] : [])) : [escapedPattern]; - const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns); - const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns); + const ignoreCase = likeOp === "ILIKE"; + const likeMemFilter = buildContentFilter("summary::text", likeOp, filterPatterns); + const likeSessFilter = buildContentFilter("message::text", likeOp, filterPatterns); + const regexMemFilter = regexPattern ? buildRegexFilter("summary::text", regexPattern, ignoreCase) : ""; + const regexSessFilter = regexPattern ? buildRegexFilter("message::text", regexPattern, ignoreCase) : ""; + // Stay on portable message::text filters for session rows. The structured + // json_extract_string() predicates currently fail against the managed + // backend for these JSONB rows, which forces a 400 and a retry onto a + // coarser query path. + const primarySessFilter = `${likeSessFilter}${regexSessFilter}`; + const fallbackSessFilter = likeSessFilter; + const hasSqlRegexFilter = Boolean(regexMemFilter || regexSessFilter); + const sessionsOnly = isSessionsOnlyMode(); + const useSummaryBm25 = !sessionsOnly && !isSummaryBm25Disabled() && Boolean(bm25QueryText); + const shouldUseFallbackCapablePrimary = useSummaryBm25 + || hasSqlRegexFilter; + const ensureSummaryBm25Index = (api as DeeplakeApi & { + ensureSummaryBm25Index?: (tableName?: string) => Promise; + }).ensureSummaryBm25Index; + + if (useSummaryBm25 && typeof ensureSummaryBm25Index === "function") { + await ensureSummaryBm25Index.call(api, memoryTable).catch(() => {}); + } + + const buildCombinedQuery = (memFilter: string, sessFilter: string, useBm25Summary = false): string => { + const memQuery = useBm25Summary + ? buildSummaryBm25Query(memoryTable, pathFilter, bm25QueryText ?? "", limit) + : `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; + const sessQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; + return sessionsOnly + ? `SELECT path, content, source_order, creation_date FROM (${sessQuery}) AS combined ORDER BY path, source_order, creation_date` + : `SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`; + }; - const memQuery = `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter}${memFilter} LIMIT ${limit}`; - const sessQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; + const primaryMemFilter = useSummaryBm25 ? "" : `${likeMemFilter}${regexMemFilter}`; + const primaryQuery = buildCombinedQuery(primaryMemFilter, primarySessFilter, useSummaryBm25); + const fallbackQuery = buildCombinedQuery(likeMemFilter, fallbackSessFilter, false); - const rows = await api.query( - `SELECT path, content, source_order, creation_date FROM (` + - `(${memQuery}) UNION ALL (${sessQuery})` + - `) AS combined ORDER BY path, source_order, creation_date` - ); + const rows = shouldUseFallbackCapablePrimary + ? await api.query(primaryQuery).catch(() => api.query(fallbackQuery)) + : await api.query(primaryQuery); return rows.map(row => ({ path: String(row["path"]), @@ -311,6 +357,10 @@ export function extractRegexLiteralPrefilter(pattern: string): string | null { if (ch === "\\") { const next = pattern[i + 1]; if (!next) return null; + if (/[bByYmM<>]/.test(next)) { + i++; + continue; + } if (/[dDsSwWbBAZzGkKpP]/.test(next)) return null; current += next; i++; @@ -335,14 +385,15 @@ export function extractRegexLiteralPrefilter(pattern: string): string | null { } export function extractRegexAlternationPrefilters(pattern: string): string[] | null { - if (!pattern.includes("|")) return null; + const unwrapped = unwrapWholeRegexGroup(pattern); + if (!unwrapped.includes("|")) return null; const parts: string[] = []; let current = ""; let escaped = false; - for (let i = 0; i < pattern.length; i++) { - const ch = pattern[i]; + for (let i = 0; i < unwrapped.length; i++) { + const ch = unwrapped[i]; if (escaped) { current += `\\${ch}`; escaped = false; @@ -374,42 +425,291 @@ export function extractRegexAlternationPrefilters(pattern: string): string[] | n } export function buildGrepSearchOptions(params: GrepMatchParams, targetPath: string): SearchOptions { - const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(params.pattern); - const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(params.pattern) : null; - const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(params.pattern) : null; + const normalizedPattern = params.fixedString ? params.pattern : normalizeGrepRegexPattern(params.pattern); + const hasRegexMeta = !params.fixedString && /[.*+?^${}()|[\]\\]/.test(normalizedPattern); + const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(normalizedPattern) : null; + const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(normalizedPattern) : null; + const bm25QueryText = buildSummaryBm25QueryText(normalizedPattern, params.fixedString, literalPrefilter, alternationPrefilters); + const regexBase = params.fixedString ? escapeRegexLiteral(normalizedPattern) : normalizedPattern; + const sqlRegexPattern = params.wordMatch + ? `\\b(?:${regexBase})\\b` + : hasRegexMeta + ? regexBase + : undefined; return { pathFilter: buildPathFilter(targetPath), contentScanOnly: hasRegexMeta, likeOp: params.ignoreCase ? "ILIKE" : "LIKE", escapedPattern: sqlLike(params.pattern), + regexPattern: sqlRegexPattern, prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : undefined, prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)), + bm25QueryText: bm25QueryText ?? undefined, + limit: DEFAULT_GREP_CANDIDATE_LIMIT, }; } +export function buildSummaryBm25QueryText( + pattern: string, + fixedString: boolean, + literalPrefilter: string | null, + alternationPrefilters: string[] | null, +): string | null { + const rawTokens = alternationPrefilters && alternationPrefilters.length > 0 + ? alternationPrefilters + : literalPrefilter + ? [literalPrefilter] + : [pattern]; + + const cleaned = [...new Set( + rawTokens + .flatMap((token) => token + .replace(/\\b/g, " ") + .replace(/[.*+?^${}()[\]{}|\\]/g, " ") + .split(/\s+/)) + .map((token) => token.trim()) + .filter((token) => token.length >= 2), + )]; + + if (cleaned.length === 0) { + return fixedString && pattern.trim().length >= 2 ? pattern.trim() : null; + } + return cleaned.join(" "); +} + function buildContentFilter( column: string, likeOp: "LIKE" | "ILIKE", patterns: string[], +): string { + const predicate = buildContentPredicate(column, likeOp, patterns); + return predicate ? ` AND ${predicate}` : ""; +} + +function buildRegexFilter( + column: string, + pattern: string, + ignoreCase: boolean, +): string { + const predicate = buildRegexPredicate(column, pattern, ignoreCase); + return predicate ? ` AND ${predicate}` : ""; +} + +function buildSummaryBm25Query( + memoryTable: string, + pathFilter: string, + queryText: string, + limit: number, +): string { + return `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter} ORDER BY (summary <#> '${sqlStr(queryText)}') DESC LIMIT ${limit}`; +} + +export function toSqlRegexPattern( + pattern: string, + ignoreCase: boolean, +): string | null { + if (!pattern) return null; + + // Deeplake SQL supports `~` but not `~*`. For ignore-case regex searches, + // rely on LIKE/ILIKE prefilters plus in-memory regex refinement instead of + // pushing an incompatible SQL operator. + if (ignoreCase) return null; + + try { + new RegExp(pattern); + return translateRegexPatternToSql(pattern); + } catch { + return pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + } +} + +function isSqlRegexPushdownSafe(pattern: string): boolean { + // The managed backend rejects some otherwise valid JS regexes, especially + // patterns with bracket syntax, anchors, or escaped literals like `^\[`. + // Keep SQL regex pushdown to a conservative subset and rely on in-memory + // refinement after candidate fetch for everything else. + return !/[\\[\]{}^$]/.test(pattern) && !/\(\?/.test(pattern); +} + +function unwrapWholeRegexGroup(pattern: string): string { + if (!pattern.startsWith("(") || !pattern.endsWith(")")) return pattern; + + let depth = 0; + let escaped = false; + for (let i = 0; i < pattern.length; i++) { + const ch = pattern[i]; + if (escaped) { + escaped = false; + continue; + } + if (ch === "\\") { + escaped = true; + continue; + } + if (ch === "(") depth++; + if (ch === ")") { + depth--; + if (depth === 0 && i !== pattern.length - 1) return pattern; + } + } + if (depth !== 0) return pattern; + if (pattern.startsWith("(?:")) return pattern.slice(3, -1); + return pattern.slice(1, -1); +} + +function translateRegexPatternToSql(pattern: string): string | null { + let out = ""; + + for (let i = 0; i < pattern.length; i++) { + const ch = pattern[i]; + + if (ch === "\\") { + const next = pattern[i + 1]; + if (!next) return pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + i++; + switch (next) { + case "d": out += "[[:digit:]]"; continue; + case "D": out += "[^[:digit:]]"; continue; + case "s": out += "[[:space:]]"; continue; + case "S": out += "[^[:space:]]"; continue; + case "w": out += "[[:alnum:]_]"; continue; + case "W": out += "[^[:alnum:]_]"; continue; + case "b": out += "\\y"; continue; + case "A": + case "B": + case "G": + case "K": + case "P": + case "p": + case "z": + return null; + default: + out += `\\${next}`; + continue; + } + } + + if (ch === "(" && pattern.startsWith("(?:", i)) { + out += "("; + i += 2; + continue; + } + + if (ch === "(" && /^[(]\?<[^>]+>/.test(pattern.slice(i))) { + const named = pattern.slice(i).match(/^\(\?<[^>]+>/); + if (!named) return null; + out += "("; + i += named[0].length - 1; + continue; + } + + if (ch === "(" && pattern[i + 1] === "?") return null; + + out += ch; + } + + return out; +} + +function buildContentPredicate( + column: string, + likeOp: "LIKE" | "ILIKE", + patterns: string[], ): string { if (patterns.length === 0) return ""; - if (patterns.length === 1) return ` AND ${column} ${likeOp} '%${patterns[0]}%'`; - return ` AND (${patterns.map((pattern) => `${column} ${likeOp} '%${pattern}%'`).join(" OR ")})`; + if (patterns.length === 1) return `${column} ${likeOp} '%${patterns[0]}%'`; + return `(${patterns.map((pattern) => `${column} ${likeOp} '%${pattern}%'`).join(" OR ")})`; +} + +function buildRegexPredicate( + column: string, + pattern: string | undefined, + ignoreCase: boolean, +): string { + if (!pattern) return ""; + if (!isSqlRegexPushdownSafe(pattern)) return ""; + const sqlPattern = toSqlRegexPattern(pattern, ignoreCase); + if (!sqlPattern) return ""; + return `${column} ~ '${sqlStr(sqlPattern)}'`; +} + +function joinAndPredicates(predicates: string[]): string { + const filtered = predicates.filter(Boolean); + if (filtered.length === 0) return ""; + if (filtered.length === 1) return filtered[0]!; + return `(${filtered.join(" AND ")})`; +} + +function joinOrPredicates(predicates: string[]): string { + const filtered = predicates.filter(Boolean); + if (filtered.length === 0) return ""; + if (filtered.length === 1) return filtered[0]!; + return `(${filtered.join(" OR ")})`; +} + +function buildAnyColumnPredicate( + columns: string[], + builder: (column: string) => string, +): string { + return joinOrPredicates(columns.map((column) => builder(column))); +} + +function buildStructuredSessionFilter( + likeOp: "LIKE" | "ILIKE", + patterns: string[], + regexPattern: string | undefined, + ignoreCase: boolean, +): string { + const typeExpr = "COALESCE(json_extract_string(message, '$.type'), '')"; + const contentExpr = "COALESCE(json_extract_string(message, '$.content'), '')"; + const toolFieldExprs = [ + "COALESCE(json_extract_string(message, '$.tool_name'), '')", + "COALESCE(json_extract_string(message, '$.tool_input'), '')", + "COALESCE(json_extract_string(message, '$.tool_response'), '')", + ]; + const metaExprs = [ + typeExpr, + "COALESCE(json_extract_string(message, '$.hook_event_name'), '')", + "COALESCE(json_extract_string(message, '$.agent_type'), '')", + ]; + + const buildFieldSearch = (columns: string[]): string => joinAndPredicates([ + buildAnyColumnPredicate(columns, (column) => buildContentPredicate(column, likeOp, patterns)), + buildAnyColumnPredicate(columns, (column) => buildRegexPredicate(column, regexPattern, ignoreCase)), + ]); + + const contentSearch = buildFieldSearch([contentExpr]); + const toolSearch = buildFieldSearch(toolFieldExprs); + const metaSearch = buildFieldSearch(metaExprs); + + const branches = [ + contentSearch + ? joinAndPredicates([`${typeExpr} IN ('user_message', 'assistant_message')`, contentSearch]) + : "", + toolSearch + ? joinAndPredicates([`${typeExpr} = 'tool_call'`, toolSearch]) + : "", + metaSearch, + ]; + + const predicate = joinOrPredicates(branches); + return predicate ? ` AND ${predicate}` : ""; } // ── Regex refinement (line-by-line grep) ──────────────────────────────────── /** Compile the grep regex from params, with a safe fallback on bad user regex. */ export function compileGrepRegex(params: GrepMatchParams): RegExp { + const normalizedPattern = params.fixedString ? params.pattern : normalizeGrepRegexPattern(params.pattern); let reStr = params.fixedString - ? params.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") - : params.pattern; - if (params.wordMatch) reStr = `\\b${reStr}\\b`; + ? escapeRegexLiteral(normalizedPattern) + : normalizedPattern; + if (params.wordMatch) reStr = `\\b(?:${reStr})\\b`; try { return new RegExp(reStr, params.ignoreCase ? "i" : ""); } catch { return new RegExp( - params.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), + escapeRegexLiteral(normalizedPattern), params.ignoreCase ? "i" : "", ); } @@ -462,6 +762,7 @@ export async function grepBothTables( sessionsTable: string, params: GrepMatchParams, targetPath: string, + forceMultiFilePrefix?: boolean, ): Promise { const rows = await searchDeeplakeTables(api, memoryTable, sessionsTable, buildGrepSearchOptions(params, targetPath)); // Defensive path dedup — memory and sessions tables use disjoint path @@ -472,5 +773,5 @@ export async function grepBothTables( const seen = new Set(); const unique = rows.filter(r => seen.has(r.path) ? false : (seen.add(r.path), true)); const normalized = unique.map(r => ({ path: r.path, content: normalizeContent(r.path, r.content) })); - return refineGrepMatches(normalized, params); + return refineGrepMatches(normalized, params, forceMultiFilePrefix); } diff --git a/src/shell/grep-interceptor.ts b/src/shell/grep-interceptor.ts index debd0cd..398f4c2 100644 --- a/src/shell/grep-interceptor.ts +++ b/src/shell/grep-interceptor.ts @@ -18,7 +18,7 @@ const MAX_FALLBACK_CANDIDATES = 500; /** * grep implementation for the deeplake-shell (virtual bash). Two paths: * 1. SQL-first: dual-table LIKE/ILIKE search via grep-core, with session - * JSON normalized to per-turn lines for sane output. + * content projected into the same file-like view used by local reads. * 2. Fallback: if SQL returns nothing (or races past a 3s timeout), scan * the in-memory FS cache using the same regex refinement. * @@ -76,7 +76,6 @@ export function createGrepCommand( const searchOptions = { ...buildGrepSearchOptions(matchParams, targets[0] ?? ctx.cwd), pathFilter: buildPathFilterForTargets(targets), - limit: 100, }; const queryRows = await Promise.race([ searchDeeplakeTables(client, table, sessionsTable ?? "sessions", searchOptions), @@ -106,9 +105,10 @@ export function createGrepCommand( } } - // Normalize session JSON blobs to per-turn lines before the regex pass. + // Normalize session blobs into the same file-like text view used by reads. const normalized = rows.map(r => ({ path: r.path, content: normalizeContent(r.path, r.content) })); - const output = refineGrepMatches(normalized, matchParams); + const forceMultiFilePrefix = parsed.r || parsed.R || parsed.recursive ? true : undefined; + const output = refineGrepMatches(normalized, matchParams, forceMultiFilePrefix); return { stdout: output.length > 0 ? output.join("\n") + "\n" : "", diff --git a/src/utils/retrieval-mode.ts b/src/utils/retrieval-mode.ts new file mode 100644 index 0000000..3433a07 --- /dev/null +++ b/src/utils/retrieval-mode.ts @@ -0,0 +1,14 @@ +export function isSessionsOnlyMode(): boolean { + const raw = process.env["HIVEMIND_SESSIONS_ONLY"] ?? process.env["DEEPLAKE_SESSIONS_ONLY"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} + +export function isIndexDisabled(): boolean { + const raw = process.env["HIVEMIND_DISABLE_INDEX"] ?? process.env["DEEPLAKE_DISABLE_INDEX"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} + +export function isSummaryBm25Disabled(): boolean { + const raw = process.env["HIVEMIND_DISABLE_SUMMARY_BM25"] ?? process.env["DEEPLAKE_DISABLE_SUMMARY_BM25"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} diff --git a/src/utils/summary-format.ts b/src/utils/summary-format.ts new file mode 100644 index 0000000..04254de --- /dev/null +++ b/src/utils/summary-format.ts @@ -0,0 +1,184 @@ +function escapeRegex(value: string): string { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + +function basename(path: string): string { + const trimmed = path.replace(/\/+$/, ""); + const idx = trimmed.lastIndexOf("/"); + return idx === -1 ? trimmed : trimmed.slice(idx + 1); +} + +export function extractSection(text: string, heading: string): string | null { + const re = new RegExp(`^## ${escapeRegex(heading)}\\s*\\n([\\s\\S]*?)(?=\\n## |$)`, "m"); + const match = text.match(re); + return match ? match[1].trim() : null; +} + +export function extractHeaderField(text: string, field: string): string | null { + const re = new RegExp(`^- \\*\\*${escapeRegex(field)}\\*\\*:\\s*(.+)$`, "m"); + const match = text.match(re); + return match ? match[1].trim() : null; +} + +function compactText(value: string): string { + return value.replace(/\s+/g, " ").trim(); +} + +function splitMetadataList(value: string | null): string[] { + if (!value) return []; + return [...new Set( + value + .split(/\s*(?:,|;|&|\band\b)\s*/i) + .map((part) => compactText(part)) + .filter((part) => part.length >= 2 && !/^unknown$/i.test(part)), + )]; +} + +function extractBullets(section: string | null, limit = 3): string[] { + if (!section) return []; + return section + .split("\n") + .map((line) => line.trim()) + .filter((line) => line.startsWith("- ")) + .map((line) => compactText(line.slice(2))) + .filter(Boolean) + .slice(0, limit); +} + +export function extractSummaryDate(text: string): string | null { + return extractHeaderField(text, "Date") + ?? extractHeaderField(text, "Started"); +} + +export function extractSummaryParticipants(text: string): string | null { + return extractHeaderField(text, "Participants") + ?? extractHeaderField(text, "Speakers"); +} + +export function extractSummaryTopics(text: string): string | null { + return extractHeaderField(text, "Topics"); +} + +export function extractSummarySource(text: string): string | null { + return extractHeaderField(text, "Source"); +} + +export function buildSummaryBlurb(text: string): string { + const participants = extractSummaryParticipants(text); + const topics = extractSummaryTopics(text); + const factBullets = extractBullets(extractSection(text, "Searchable Facts"), 3); + const keyBullets = factBullets.length > 0 ? factBullets : extractBullets(extractSection(text, "Key Facts"), 3); + const whatHappened = compactText(extractSection(text, "What Happened") ?? ""); + + const parts: string[] = []; + if (participants) parts.push(participants); + if (topics) parts.push(topics); + if (keyBullets.length > 0) parts.push(keyBullets.join("; ")); + if (parts.length === 0 && whatHappened) parts.push(whatHappened); + + const blurb = parts.join(" | ").slice(0, 300).trim(); + return blurb || "completed"; +} + +function truncate(value: string, max: number): string { + return value.length > max ? `${value.slice(0, max - 1).trimEnd()}…` : value; +} + +function formatIndexTimestamp(value: string): string { + if (!value) return ""; + if (!/^\d{4}-\d{2}-\d{2}T/.test(value)) return value; + const parsed = Date.parse(value); + if (!Number.isFinite(parsed)) return value; + const ts = new Date(parsed); + const yyyy = ts.getUTCFullYear(); + const mm = String(ts.getUTCMonth() + 1).padStart(2, "0"); + const dd = String(ts.getUTCDate()).padStart(2, "0"); + const hh = String(ts.getUTCHours()).padStart(2, "0"); + const min = String(ts.getUTCMinutes()).padStart(2, "0"); + return `${yyyy}-${mm}-${dd} ${hh}:${min} UTC`; +} + +export interface SummaryIndexRow { + path?: unknown; + project?: unknown; + description?: unknown; + summary?: unknown; + creation_date?: unknown; + last_update_date?: unknown; +} + +export interface SummaryIndexEntry { + path: string; + label: string; + project: string; + description: string; + date: string; + createdAt: string; + updatedAt: string; + sortDate: string; + participantsText: string; + participants: string[]; + topicsText: string; + topics: string[]; + source: string; + blurb: string; +} + +export function buildSummaryIndexEntry(row: SummaryIndexRow): SummaryIndexEntry | null { + const path = typeof row.path === "string" ? row.path : ""; + if (!path) return null; + if (path.startsWith("/summaries/") && !/^\/summaries\/[^/]+\/[^/]+$/.test(path)) return null; + + const summary = typeof row.summary === "string" ? row.summary : ""; + const project = typeof row.project === "string" ? row.project.trim() : ""; + const description = typeof row.description === "string" ? compactText(row.description) : ""; + const creationDate = typeof row.creation_date === "string" ? row.creation_date : ""; + const lastUpdateDate = typeof row.last_update_date === "string" ? row.last_update_date : ""; + + const label = basename(path) || path; + const date = summary ? extractSummaryDate(summary) ?? creationDate : creationDate; + const participantsText = summary ? extractSummaryParticipants(summary) ?? "" : ""; + const topicsText = summary ? extractSummaryTopics(summary) ?? "" : ""; + const source = summary ? extractSummarySource(summary) ?? "" : ""; + const structuredBlurb = summary ? buildSummaryBlurb(summary) : ""; + const blurb = structuredBlurb && structuredBlurb !== "completed" + ? structuredBlurb + : truncate(description, 220); + + return { + path, + label, + project, + description, + date, + createdAt: creationDate, + updatedAt: lastUpdateDate, + sortDate: lastUpdateDate || creationDate || date, + participantsText, + participants: splitMetadataList(participantsText), + topicsText, + topics: splitMetadataList(topicsText), + source, + blurb, + }; +} + +export function formatSummaryIndexEntry(entry: SummaryIndexEntry): string { + const parts = [`- [summary: ${entry.label}](${entry.path})`]; + if (entry.source) parts.push(`[session](${entry.source})`); + if (entry.date) parts.push(truncate(entry.date, 40)); + const visibleTime = entry.updatedAt || entry.createdAt; + if (visibleTime) parts.push(`updated: ${truncate(formatIndexTimestamp(visibleTime), 24)}`); + if (entry.participantsText) parts.push(truncate(entry.participantsText, 80)); + if (entry.topicsText) parts.push(`topics: ${truncate(entry.topicsText, 90)}`); + if (entry.project) parts.push(`[${truncate(entry.project, 40)}]`); + if (entry.blurb && entry.blurb !== "completed") parts.push(truncate(entry.blurb, 220)); + return parts.join(" — "); +} + +export function buildSummaryIndexLine(row: SummaryIndexRow | SummaryIndexEntry): string | null { + const entry = "label" in row && typeof row.label === "string" + ? row + : buildSummaryIndexEntry(row); + return entry ? formatSummaryIndexEntry(entry) : null; +} From 01bd4a163c848bcc1d2c4a1376aaf1cde94a2fb6 Mon Sep 17 00:00:00 2001 From: davitbun Date: Mon, 20 Apr 2026 15:19:44 -0700 Subject: [PATCH 2/7] Add SQL transcript surface for Deeplake recall --- claude-code/bundle/capture.js | 64 ++- claude-code/bundle/commands/auth-login.js | 23 + claude-code/bundle/pre-tool-use.js | 200 ++++++++- claude-code/bundle/session-end.js | 61 ++- claude-code/bundle/session-start-setup.js | 23 + claude-code/bundle/session-start.js | 144 ++++++- claude-code/bundle/wiki-worker.js | 52 ++- .../tests/bash-command-compiler.test.ts | 123 ++++++ claude-code/tests/deeplake-api.test.ts | 23 + claude-code/tests/deeplake-fs.test.ts | 88 +++- claude-code/tests/hooks-source.test.ts | 121 ++++++ claude-code/tests/query-cache.test.ts | 13 + claude-code/tests/session-start.test.ts | 9 + claude-code/tests/session-summary.test.ts | 9 +- claude-code/tests/summary-format.test.ts | 69 +++ claude-code/tests/upload-summary.test.ts | 26 +- codex/bundle/capture.js | 39 +- codex/bundle/commands/auth-login.js | 23 + codex/bundle/pre-tool-use.js | 218 +++++++++- codex/bundle/session-start-setup.js | 23 + codex/bundle/session-start.js | 100 ++++- codex/bundle/stop.js | 59 ++- codex/bundle/wiki-worker.js | 52 ++- codex/tests/codex-integration.test.ts | 14 + src/deeplake-api.ts | 24 ++ src/hooks/bash-command-compiler.ts | 183 ++++++++ src/hooks/codex/pre-tool-use.ts | 86 +++- src/hooks/codex/session-start.ts | 93 +++- src/hooks/codex/spawn-wiki-worker.ts | 38 +- src/hooks/memory-path-utils.ts | 1 + src/hooks/pre-tool-use.ts | 81 +++- src/hooks/query-cache.ts | 11 +- src/hooks/session-start.ts | 137 +++++- src/hooks/spawn-wiki-worker.ts | 40 +- src/hooks/upload-summary.ts | 6 +- src/tools/backfill-locomo-memory.ts | 403 ++++++++++++++++++ src/tools/smoke-summary-bm25.ts | 54 +++ src/utils/retrieval-mode.ts | 5 + 38 files changed, 2600 insertions(+), 138 deletions(-) create mode 100644 claude-code/tests/summary-format.test.ts create mode 100644 src/tools/backfill-locomo-memory.ts create mode 100644 src/tools/smoke-summary-bm25.ts diff --git a/claude-code/bundle/capture.js b/claude-code/bundle/capture.js index 82a4aac..a046850 100755 --- a/claude-code/bundle/capture.js +++ b/claude-code/bundle/capture.js @@ -290,6 +290,29 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + /** Create the standard BM25 summary index for a memory table. */ + async createSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const indexName = this.buildLookupIndexName(table, "summary_bm25"); + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" USING deeplake_index ("summary")`); + } + /** Ensure the standard BM25 summary index exists, using a local freshness marker to avoid repeated CREATEs. */ + async ensureSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const suffix = "summary_bm25"; + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + try { + await this.createSummaryBm25Index(table); + this.markLookupIndexReady(table, suffix); + } catch (e) { + if (isDuplicateIndexError(e)) { + this.markLookupIndexReady(table, suffix); + return; + } + throw e; + } + } buildLookupIndexName(table, suffix) { return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); } @@ -542,7 +565,9 @@ import { writeFileSync as writeFileSync3, mkdirSync as mkdirSync3, appendFileSyn import { homedir as homedir4, tmpdir as tmpdir2 } from "node:os"; var HOME = homedir4(); var WIKI_LOG = join5(HOME, ".claude", "hooks", "deeplake-wiki.log"); -var WIKI_PROMPT_TEMPLATE = `You are building a personal wiki from a coding session. Your goal is to extract every piece of knowledge \u2014 entities, decisions, relationships, and facts \u2014 into a structured, searchable wiki entry. Think of this as building a knowledge graph, not writing a summary. +var WIKI_PROMPT_TEMPLATE = `You are maintaining a persistent wiki from a session transcript. This page will become part of a long-lived knowledge base that future agents will search through index.md before opening the source session. Write for retrieval, not storytelling. + +The session may be a coding session, a meeting, or a personal conversation. Your job is to turn the raw transcript into a dense, factual wiki page that preserves names, dates, relationships, preferences, plans, titles, and exact status changes. SESSION JSONL path: __JSONL__ SUMMARY FILE to write: __SUMMARY__ @@ -556,40 +581,50 @@ Steps: - If PREVIOUS JSONL OFFSET > 0, this is a resumed session. Read the existing summary file first, then focus on lines AFTER the offset for new content. Merge new facts into the existing summary. - If offset is 0, generate from scratch. + - Treat the JSONL as the source of truth. Do not invent facts. 2. Write the summary file at the path above with this EXACT format. The header fields (Source, Project) are pre-filled \u2014 copy them VERBATIM, do NOT replace them with paths from the JSONL content: # Session __SESSION_ID__ - **Source**: __JSONL_SERVER_PATH__ +- **Date**: +- **Participants**: - **Started**: - **Ended**: - **Project**: __PROJECT__ +- **Topics**: - **JSONL offset**: __JSONL_LINES__ ## What Happened -<2-3 dense sentences. What was the goal, what was accomplished, what's left.> +<2-4 dense sentences. What happened, why it mattered, and what changed. Prefer specific names/titles/dates over abstractions.> + +## Searchable Facts + ## People - + ## Entities - + ## Decisions & Reasoning - - -## Key Facts - + ## Files Modified - + ## Open Questions / TODO - + -IMPORTANT: Be exhaustive. Extract EVERY entity, decision, and fact. Future you will search this wiki to answer questions like "who worked on X", "why did we choose Y", "what's the status of Z". If a detail exists in the session, it should be in the wiki. +IMPORTANT: +- Be exhaustive. If a detail exists in the session and could answer a later question, it should be in the wiki. +- Favor exact nouns and titles over generic paraphrases. Preserve exact book names, organization names, file names, feature names, and self-descriptions. +- Keep facts canonical and query-friendly: "Ava is single", "Leo's home country is Brazil", "The team chose retries because the API returned 429s". +- Resolve relative dates like "last year" or "next month" against the session's own date when the source makes that possible. If it is ambiguous, keep the relative phrase instead of guessing. +- Do not omit beneficiary groups or targets of goals (for example who a project, career, or effort is meant to help). +- Do not leak absolute filesystem paths beyond the pre-filled Source field. PRIVACY: Never include absolute filesystem paths (e.g. /home/user/..., /Users/..., C:\\\\...) in the summary. Use only project-relative paths or the project name. The Source and Project fields above are already correct \u2014 do not change them. @@ -892,11 +927,12 @@ function sleep2(ms) { } // dist/src/hooks/query-cache.js -import { mkdirSync as mkdirSync5, readFileSync as readFileSync5, rmSync as rmSync2, writeFileSync as writeFileSync5 } from "node:fs"; +import { mkdirSync as mkdirSync5, readFileSync as readFileSync5, rmSync as rmSync2, statSync as statSync2, writeFileSync as writeFileSync5 } from "node:fs"; import { join as join7 } from "node:path"; import { homedir as homedir6 } from "node:os"; var log3 = (msg) => log("query-cache", msg); var DEFAULT_CACHE_ROOT = join7(homedir6(), ".deeplake", "query-cache"); +var INDEX_CACHE_TTL_MS = 15 * 60 * 1e3; function getSessionQueryCacheDir(sessionId, deps = {}) { const { cacheRoot = DEFAULT_CACHE_ROOT } = deps; return join7(cacheRoot, sessionId); diff --git a/claude-code/bundle/commands/auth-login.js b/claude-code/bundle/commands/auth-login.js index ff5e179..3547fd0 100755 --- a/claude-code/bundle/commands/auth-login.js +++ b/claude-code/bundle/commands/auth-login.js @@ -465,6 +465,29 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + /** Create the standard BM25 summary index for a memory table. */ + async createSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const indexName = this.buildLookupIndexName(table, "summary_bm25"); + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" USING deeplake_index ("summary")`); + } + /** Ensure the standard BM25 summary index exists, using a local freshness marker to avoid repeated CREATEs. */ + async ensureSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const suffix = "summary_bm25"; + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + try { + await this.createSummaryBm25Index(table); + this.markLookupIndexReady(table, suffix); + } catch (e) { + if (isDuplicateIndexError(e)) { + this.markLookupIndexReady(table, suffix); + return; + } + throw e; + } + } buildLookupIndexName(table, suffix) { return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); } diff --git a/claude-code/bundle/pre-tool-use.js b/claude-code/bundle/pre-tool-use.js index 12b65db..f6a9b1a 100755 --- a/claude-code/bundle/pre-tool-use.js +++ b/claude-code/bundle/pre-tool-use.js @@ -450,6 +450,10 @@ function isSummaryBm25Disabled() { const raw = process.env["HIVEMIND_DISABLE_SUMMARY_BM25"] ?? process.env["DEEPLAKE_DISABLE_SUMMARY_BM25"] ?? ""; return /^(1|true|yes|on)$/i.test(raw.trim()); } +function isPsqlMode() { + const raw = process.env["HIVEMIND_PSQL_MODE"] ?? process.env["DEEPLAKE_PSQL_MODE"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} // dist/src/shell/grep-core.js var DEFAULT_GREP_CANDIDATE_LIMIT = Number(process.env["HIVEMIND_GREP_LIMIT"] ?? process.env["DEEPLAKE_GREP_LIMIT"] ?? 500); @@ -1803,6 +1807,139 @@ function parseFindSpec(tokens) { } return patterns.length > 0 ? { patterns, execGrepCmd: null } : null; } +function extractPsqlQuery(tokens) { + let query = null; + for (let i = 1; i < tokens.length; i++) { + const token = tokens[i]; + if (token === "-c" || token === "--command") { + query = tokens[i + 1] ?? null; + i += 1; + continue; + } + if (token.startsWith("-c") && token.length > 2) { + query = token.slice(2); + continue; + } + } + return query; +} +function extractPsqlQueryFromCommand(cmd) { + const tokens = tokenizeShellWords(cmd.trim()); + if (!tokens || tokens[0] !== "psql") + return null; + return extractPsqlQuery(tokens); +} +function normalizeSqlRef(ref) { + return ref.replace(/\s+/g, "").replace(/"/g, "").toLowerCase(); +} +function extractSqlTableRefs(query) { + const refs = []; + const regex = /\b(?:from|join)\s+((?:"[^"]+"|[a-zA-Z_][a-zA-Z0-9_]*)(?:\s*\.\s*(?:"[^"]+"|[a-zA-Z_][a-zA-Z0-9_]*))?)/gi; + for (const match of query.matchAll(regex)) { + if (match[1]) + refs.push(normalizeSqlRef(match[1])); + } + return refs; +} +function queryReferencesInterceptedTables(query) { + return extractSqlTableRefs(query).some((ref) => ref === "memory" || ref === "sessions" || ref === "sessions_text" || ref === "hivemind.memory" || ref === "hivemind.sessions" || ref === "hivemind.sessions_text"); +} +function queryUsesOnlyInterceptedTables(query) { + const refs = extractSqlTableRefs(query); + return refs.length > 0 && refs.every((ref) => ref === "memory" || ref === "sessions" || ref === "sessions_text" || ref === "hivemind.memory" || ref === "hivemind.sessions" || ref === "hivemind.sessions_text"); +} +function parsePsqlSegment(pipeline, tokens) { + if (tokens[0] !== "psql" || !isPsqlMode()) + return null; + const query = extractPsqlQuery(tokens); + let tuplesOnly = false; + let fieldSeparator = "|"; + for (let i = 1; i < tokens.length; i++) { + const token = tokens[i]; + if (token === "-F" || token === "--field-separator") { + fieldSeparator = tokens[i + 1] ?? fieldSeparator; + i += 1; + continue; + } + if (token.startsWith("-F") && token.length > 2) { + fieldSeparator = token.slice(2); + continue; + } + if (token === "-t" || token === "--tuples-only") { + tuplesOnly = true; + continue; + } + if (token.startsWith("-") && !token.startsWith("--")) { + const shortFlags = token.slice(1); + if (shortFlags.includes("t")) + tuplesOnly = true; + continue; + } + } + if (!query || !queryUsesOnlyInterceptedTables(query)) + return null; + let lineLimit = 0; + if (pipeline.length > 1) { + if (pipeline.length !== 2) + return null; + const headStage = pipeline[1].trim(); + if (!isValidPipelineHeadTailStage(headStage)) + return null; + const headTail = parseHeadTailStage(headStage); + if (!headTail || headTail.fromEnd) + return null; + lineLimit = headTail.lineLimit; + } + return { kind: "psql", query, lineLimit, tuplesOnly, fieldSeparator }; +} +function normalizePsqlQuery(query, memoryTable, sessionsTable) { + let sql = query.trim().replace(/;+\s*$/, ""); + sql = sql.replace(/\bFROM\s+"?memory"?\b/gi, `FROM "${memoryTable}"`).replace(/\bJOIN\s+"?memory"?\b/gi, `JOIN "${memoryTable}"`).replace(/\bFROM\s+"?sessions_text"?\b/gi, `FROM "__hivemind_sessions_text"`).replace(/\bJOIN\s+"?sessions_text"?\b/gi, `JOIN "__hivemind_sessions_text"`).replace(/\bFROM\s+"?sessions"?\b/gi, `FROM "${sessionsTable}"`).replace(/\bJOIN\s+"?sessions"?\b/gi, `JOIN "${sessionsTable}"`).replace(/\bFROM\s+"?hivemind"?\."?memory"?\b/gi, `FROM "${memoryTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?memory"?\b/gi, `JOIN "${memoryTable}"`).replace(/\bFROM\s+"?hivemind"?\."?sessions_text"?\b/gi, `FROM "__hivemind_sessions_text"`).replace(/\bJOIN\s+"?hivemind"?\."?sessions_text"?\b/gi, `JOIN "__hivemind_sessions_text"`).replace(/\bFROM\s+"?hivemind"?\."?sessions"?\b/gi, `FROM "${sessionsTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?sessions"?\b/gi, `JOIN "${sessionsTable}"`); + if (/\b__hivemind_sessions_text\b/i.test(sql)) { + const cte = `"__hivemind_sessions_text" AS (SELECT path, creation_date, message::text AS message_text FROM "${sessionsTable}")`; + sql = /^\s*with\b/i.test(sql) ? sql.replace(/^\s*with\b/i, `WITH ${cte},`) : `WITH ${cte} ${sql}`; + } + return sql; +} +function validatePsqlQuery(query, memoryTable, sessionsTable) { + if (!queryUsesOnlyInterceptedTables(query)) { + throw new Error("psql queries must reference only memory, sessions, sessions_text, hivemind.memory, hivemind.sessions, or hivemind.sessions_text"); + } + const sql = normalizePsqlQuery(query, memoryTable, sessionsTable); + const compact = sql.replace(/\s+/g, " ").trim(); + if (!/^(select|with)\b/i.test(compact)) { + throw new Error("psql mode only supports SELECT queries"); + } + const allowedTables = /* @__PURE__ */ new Set([memoryTable, sessionsTable, "__hivemind_sessions_text"]); + const tableMatches = [...compact.matchAll(/\b(?:from|join)\s+"?([a-zA-Z_][a-zA-Z0-9_]*)"?/gi)]; + if (tableMatches.length === 0) { + throw new Error("psql query must reference memory or sessions"); + } + for (const match of tableMatches) { + if (!allowedTables.has(match[1])) { + throw new Error(`psql query references unsupported table: ${match[1]}`); + } + } + return sql; +} +function formatPsqlValue(value) { + if (value === null || value === void 0) + return ""; + if (typeof value === "string") + return value; + if (typeof value === "number" || typeof value === "boolean") + return String(value); + return JSON.stringify(value); +} +function formatPsqlRows(rows, tuplesOnly, fieldSeparator) { + if (rows.length === 0) + return tuplesOnly ? "" : "(0 rows)"; + const columns = Object.keys(rows[0] ?? {}); + const body = rows.map((row) => columns.map((column) => formatPsqlValue(row[column])).join(fieldSeparator)); + if (tuplesOnly) + return body.join("\n"); + return [columns.join(fieldSeparator), ...body].join("\n"); +} function parseCompiledSegment(segment) { const { clean, ignoreMissing } = stripAllowedModifiers(segment); if (hasUnsupportedRedirection(clean)) @@ -1813,6 +1950,9 @@ function parseCompiledSegment(segment) { const tokens = tokenizeShellWords(pipeline[0]); if (!tokens || tokens.length === 0) return null; + const psqlSegment = parsePsqlSegment(pipeline, tokens); + if (psqlSegment) + return psqlSegment; if (tokens[0] === "echo" && pipeline.length === 1) { const text = tokens.slice(1).join(" "); return { kind: "echo", text }; @@ -2083,6 +2223,14 @@ async function executeCompiledBashCommand(api, memoryTable, sessionsTable, cmd, outputs.push(limited.join("\n") || "(no matches)"); continue; } + if (segment.kind === "psql") { + const sql = validatePsqlQuery(segment.query, memoryTable, sessionsTable); + const rows = await api.query(sql); + const formatted = formatPsqlRows(rows, segment.tuplesOnly, segment.fieldSeparator); + const limited = segment.lineLimit > 0 ? formatted.split("\n").slice(0, segment.lineLimit).join("\n") : formatted; + outputs.push(limited); + continue; + } if (segment.kind === "grep") { const result = await handleGrepDirectFn(api, memoryTable, sessionsTable, segment.params); if (result === null) @@ -2209,6 +2357,7 @@ var SAFE_BUILTINS = /* @__PURE__ */ new Set([ "md5sum", "sha1sum", "sha256sum", + "psql", "echo", "printf", "tee", @@ -2320,6 +2469,27 @@ function rewritePaths(cmd) { } // dist/src/hooks/pre-tool-use.js +function isAnyPsqlCommand(cmd) { + return /^\s*psql\b/.test(cmd.trim()); +} +function isHivemindPsqlCommand(cmd) { + if (!isPsqlMode()) + return false; + const query = extractPsqlQueryFromCommand(cmd); + return !!query && queryUsesOnlyInterceptedTables(query); +} +function needsHivemindPsqlRewrite(cmd) { + if (!isPsqlMode() || !isAnyPsqlCommand(cmd)) + return false; + const query = extractPsqlQueryFromCommand(cmd); + return !!query && queryReferencesInterceptedTables(query) && !queryUsesOnlyInterceptedTables(query); +} +function buildPsqlOnlyGuidance() { + return "[RETRY REQUIRED] Hivemind recall is SQL-only in this mode. Use psql with the memory and sessions tables only. Do NOT use grep, cat, ls, Read, Glob, or filesystem paths for memory lookups."; +} +function buildPsqlSchemaGuidance() { + return "[RETRY REQUIRED] Only psql SELECT queries over memory and sessions are intercepted in SQL mode. Rewrite the query to reference only those tables with normal psql SELECT syntax."; +} var log4 = (msg) => log("pre", msg); var __bundleDir = dirname(fileURLToPath2(import.meta.url)); var SHELL_BUNDLE = existsSync3(join6(__bundleDir, "shell", "deeplake-shell.js")) ? join6(__bundleDir, "shell", "deeplake-shell.js") : join6(__bundleDir, "..", "shell", "deeplake-shell.js"); @@ -2338,6 +2508,8 @@ function getShellCommand(toolName, toolInput) { switch (toolName) { case "Grep": { const p = toolInput.path; + if (isPsqlMode() && p && touchesMemory(p)) + return null; if (p && touchesMemory(p)) { const pattern = toolInput.pattern ?? ""; const flags = ["-r"]; @@ -2351,6 +2523,8 @@ function getShellCommand(toolName, toolInput) { } case "Read": { const fp = getReadTargetPath(toolInput); + if (isPsqlMode() && fp && touchesMemory(fp)) + return null; if (fp && touchesMemory(fp)) { const rewritten = rewritePaths(fp) || "/"; return `${isLikelyDirectoryPath(rewritten) ? "ls" : "cat"} ${rewritten}`; @@ -2359,7 +2533,13 @@ function getShellCommand(toolName, toolInput) { } case "Bash": { const cmd = toolInput.command; - if (!cmd || !touchesMemory(cmd)) + if (!cmd) + break; + if (isHivemindPsqlCommand(cmd)) + return cmd.trim(); + if (isPsqlMode() && (touchesMemory(cmd) || needsHivemindPsqlRewrite(cmd))) + return null; + if (!touchesMemory(cmd)) break; const rewritten = rewritePaths(cmd); if (!isSafe(rewritten)) { @@ -2370,6 +2550,8 @@ function getShellCommand(toolName, toolInput) { } case "Glob": { const p = toolInput.path; + if (isPsqlMode() && p && touchesMemory(p)) + return null; if (p && touchesMemory(p)) return "ls /"; break; @@ -2408,15 +2590,20 @@ async function processPreToolUse(input, deps = {}) { const cmd = input.tool_input.command ?? ""; const shellCmd = getShellCommand(input.tool_name, input.tool_input); const toolPath = getReadTargetPath(input.tool_input) ?? input.tool_input.path ?? ""; - if (!shellCmd && (touchesMemory(cmd) || touchesMemory(toolPath))) { - const guidance = "[RETRY REQUIRED] The command you tried is not available for ~/.deeplake/memory/. This virtual filesystem only supports bash builtins: cat, ls, grep, echo, jq, head, tail, sed, awk, wc, sort, find, etc. python, python3, node, and curl are NOT available. You MUST rewrite your command using only the bash tools listed above and try again. For example, to parse JSON use: cat file.json | jq '.key'. To count keys: cat file.json | jq 'keys | length'."; + const psqlRewriteNeeded = needsHivemindPsqlRewrite(cmd); + if (!shellCmd && (touchesMemory(cmd) || touchesMemory(toolPath) || psqlRewriteNeeded)) { + const guidance = isPsqlMode() ? psqlRewriteNeeded ? buildPsqlSchemaGuidance() : buildPsqlOnlyGuidance() : `[RETRY REQUIRED] The command you tried is not available for ~/.deeplake/memory/. This virtual filesystem only supports bash builtins plus benchmark SQL mode via psql -At -F '|' -c "SELECT ...". python, python3, node, and curl are NOT available. You MUST rewrite your command using only the bash tools listed above and try again. For example, to parse JSON use: cat file.json | jq '.key'. To count keys: cat file.json | jq 'keys | length'.`; logFn(`unsupported command, returning guidance: ${cmd}`); - return buildAllowDecision(`echo ${JSON.stringify(guidance)}`, "[DeepLake] unsupported command \u2014 rewrite using bash builtins"); + return buildAllowDecision(`echo ${JSON.stringify(guidance)}`, isPsqlMode() ? "[DeepLake SQL] unsupported command \u2014 rewrite using psql over memory/sessions" : "[DeepLake] unsupported command \u2014 rewrite using bash builtins"); } if (!shellCmd) return null; - if (!config) + if (!config) { + if (isHivemindPsqlCommand(shellCmd)) { + return buildAllowDecision(`echo ${JSON.stringify("[RETRY REQUIRED] Hivemind SQL mode is unavailable because Deeplake credentials are missing.")}`, "[DeepLake SQL] unavailable"); + } return buildFallbackDecision(shellCmd, shellBundle); + } const table = process.env["HIVEMIND_TABLE"] ?? "memory"; const sessionsTable = process.env["HIVEMIND_SESSIONS_TABLE"] ?? "sessions"; const api = createApi(table, config); @@ -2595,6 +2782,9 @@ async function processPreToolUse(input, deps = {}) { } catch (e) { logFn(`direct query failed, falling back to shell: ${e.message}`); } + if (isHivemindPsqlCommand(shellCmd)) { + return buildAllowDecision(`echo ${JSON.stringify("[RETRY REQUIRED] Hivemind SQL mode could not satisfy the query. Rewrite it as a narrower SELECT over memory or sessions.")}`, "[DeepLake SQL] query rewrite required"); + } return buildFallbackDecision(shellCmd, shellBundle); } async function main() { diff --git a/claude-code/bundle/session-end.js b/claude-code/bundle/session-end.js index 944977c..c1c98c0 100755 --- a/claude-code/bundle/session-end.js +++ b/claude-code/bundle/session-end.js @@ -290,6 +290,29 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + /** Create the standard BM25 summary index for a memory table. */ + async createSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const indexName = this.buildLookupIndexName(table, "summary_bm25"); + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" USING deeplake_index ("summary")`); + } + /** Ensure the standard BM25 summary index exists, using a local freshness marker to avoid repeated CREATEs. */ + async ensureSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const suffix = "summary_bm25"; + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + try { + await this.createSummaryBm25Index(table); + this.markLookupIndexReady(table, suffix); + } catch (e) { + if (isDuplicateIndexError(e)) { + this.markLookupIndexReady(table, suffix); + return; + } + throw e; + } + } buildLookupIndexName(table, suffix) { return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); } @@ -423,7 +446,9 @@ import { writeFileSync as writeFileSync2, mkdirSync as mkdirSync2, appendFileSyn import { homedir as homedir3, tmpdir as tmpdir2 } from "node:os"; var HOME = homedir3(); var WIKI_LOG = join4(HOME, ".claude", "hooks", "deeplake-wiki.log"); -var WIKI_PROMPT_TEMPLATE = `You are building a personal wiki from a coding session. Your goal is to extract every piece of knowledge \u2014 entities, decisions, relationships, and facts \u2014 into a structured, searchable wiki entry. Think of this as building a knowledge graph, not writing a summary. +var WIKI_PROMPT_TEMPLATE = `You are maintaining a persistent wiki from a session transcript. This page will become part of a long-lived knowledge base that future agents will search through index.md before opening the source session. Write for retrieval, not storytelling. + +The session may be a coding session, a meeting, or a personal conversation. Your job is to turn the raw transcript into a dense, factual wiki page that preserves names, dates, relationships, preferences, plans, titles, and exact status changes. SESSION JSONL path: __JSONL__ SUMMARY FILE to write: __SUMMARY__ @@ -437,40 +462,50 @@ Steps: - If PREVIOUS JSONL OFFSET > 0, this is a resumed session. Read the existing summary file first, then focus on lines AFTER the offset for new content. Merge new facts into the existing summary. - If offset is 0, generate from scratch. + - Treat the JSONL as the source of truth. Do not invent facts. 2. Write the summary file at the path above with this EXACT format. The header fields (Source, Project) are pre-filled \u2014 copy them VERBATIM, do NOT replace them with paths from the JSONL content: # Session __SESSION_ID__ - **Source**: __JSONL_SERVER_PATH__ +- **Date**: +- **Participants**: - **Started**: - **Ended**: - **Project**: __PROJECT__ +- **Topics**: - **JSONL offset**: __JSONL_LINES__ ## What Happened -<2-3 dense sentences. What was the goal, what was accomplished, what's left.> +<2-4 dense sentences. What happened, why it mattered, and what changed. Prefer specific names/titles/dates over abstractions.> + +## Searchable Facts + ## People - + ## Entities - + ## Decisions & Reasoning - - -## Key Facts - + ## Files Modified - + ## Open Questions / TODO - + -IMPORTANT: Be exhaustive. Extract EVERY entity, decision, and fact. Future you will search this wiki to answer questions like "who worked on X", "why did we choose Y", "what's the status of Z". If a detail exists in the session, it should be in the wiki. +IMPORTANT: +- Be exhaustive. If a detail exists in the session and could answer a later question, it should be in the wiki. +- Favor exact nouns and titles over generic paraphrases. Preserve exact book names, organization names, file names, feature names, and self-descriptions. +- Keep facts canonical and query-friendly: "Ava is single", "Leo's home country is Brazil", "The team chose retries because the API returned 429s". +- Resolve relative dates like "last year" or "next month" against the session's own date when the source makes that possible. If it is ambiguous, keep the relative phrase instead of guessing. +- Do not omit beneficiary groups or targets of goals (for example who a project, career, or effort is meant to help). +- Do not leak absolute filesystem paths beyond the pre-filled Source field. PRIVACY: Never include absolute filesystem paths (e.g. /home/user/..., /Users/..., C:\\\\...) in the summary. Use only project-relative paths or the project name. The Source and Project fields above are already correct \u2014 do not change them. diff --git a/claude-code/bundle/session-start-setup.js b/claude-code/bundle/session-start-setup.js index 77621bc..73357f5 100755 --- a/claude-code/bundle/session-start-setup.js +++ b/claude-code/bundle/session-start-setup.js @@ -302,6 +302,29 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + /** Create the standard BM25 summary index for a memory table. */ + async createSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const indexName = this.buildLookupIndexName(table, "summary_bm25"); + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" USING deeplake_index ("summary")`); + } + /** Ensure the standard BM25 summary index exists, using a local freshness marker to avoid repeated CREATEs. */ + async ensureSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const suffix = "summary_bm25"; + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + try { + await this.createSummaryBm25Index(table); + this.markLookupIndexReady(table, suffix); + } catch (e) { + if (isDuplicateIndexError(e)) { + this.markLookupIndexReady(table, suffix); + return; + } + throw e; + } + } buildLookupIndexName(table, suffix) { return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); } diff --git a/claude-code/bundle/session-start.js b/claude-code/bundle/session-start.js index ea84c9c..54a1b42 100755 --- a/claude-code/bundle/session-start.js +++ b/claude-code/bundle/session-start.js @@ -70,6 +70,20 @@ function isDirectRun(metaUrl) { } } +// dist/src/utils/retrieval-mode.js +function isSessionsOnlyMode() { + const raw = process.env["HIVEMIND_SESSIONS_ONLY"] ?? process.env["DEEPLAKE_SESSIONS_ONLY"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} +function isIndexDisabled() { + const raw = process.env["HIVEMIND_DISABLE_INDEX"] ?? process.env["DEEPLAKE_DISABLE_INDEX"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} +function isPsqlMode() { + const raw = process.env["HIVEMIND_PSQL_MODE"] ?? process.env["DEEPLAKE_PSQL_MODE"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} + // dist/src/hooks/version-check.js import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync as readFileSync2, writeFileSync as writeFileSync2 } from "node:fs"; import { dirname, join as join3 } from "node:path"; @@ -170,10 +184,135 @@ IMPORTANT: Only use bash commands (cat, ls, grep, echo, jq, head, tail, etc.) to LIMITS: Do NOT spawn subagents to read deeplake memory. If a file returns empty after 2 attempts, skip it and move on. Report what you found rather than exhaustively retrying. +Debugging: Set HIVEMIND_DEBUG=1 to enable verbose logging to ~/.deeplake/hook-debug.log`; +var CLAUDE_SESSION_START_CONTEXT_SESSIONS_ONLY = `DEEPLAKE MEMORY: You have TWO memory sources. ALWAYS check BOTH when the user asks you to recall, remember, or look up ANY information: + +1. Your built-in memory (~/.claude/) \u2014 personal per-project notes +2. Deeplake global memory (~/.deeplake/memory/) \u2014 shared org memory, currently exposed in SESSIONS-ONLY mode for benchmark comparison + +Deeplake memory structure available in this mode: +- ~/.deeplake/memory/sessions/{author}/* \u2014 raw session data + +SEARCH STRATEGY: Search raw session files directly. In this mode, do NOT start with index.md or summaries and do NOT assume those paths exist. +Open the most likely session file directly before broadening into synonym greps or wide exploratory scans. +Do NOT probe unrelated local paths such as ~/.claude/projects/, arbitrary home directories, or guessed summary roots when the question is about Deeplake memory. +TEMPORAL GROUNDING: If a transcript uses relative time like "last year", "last week", or "next month", resolve it against that session's own date/date_time metadata, not today's date. +TEMPORAL FOLLOW-THROUGH: If a session only gives a relative time, use that session's date/date_time to convert the final answer into an absolute month/date/year or explicit range before responding. +ANSWER SHAPE: Once you have enough evidence, answer with the smallest exact phrase supported by memory. For identity or relationship questions, use just the noun phrase. For education questions, answer with the likely field or credential directly, not the broader life story. For "when" questions, prefer absolute dates/months/years over relative phrases. Avoid extra biography, explanation, or hedging. +NOT-FOUND BAR: Do NOT answer "not found" until you have checked at least one likely raw session file for the named person. If keyword grep is empty, grep the person's name alone and inspect the candidate session files. +NEGATIVE-EVIDENCE QUESTIONS: For identity, relationship status, and research-topic questions, raw sessions may contain the exact phrase even when broad keyword grep looks sparse. Read the candidate transcript and look for positive clues before concluding the answer is absent. +SELF-LABEL PRIORITY: For identity questions, prefer the person's own explicit self-label from the transcript over broader category descriptions or paraphrases. +RELATIONSHIP STATUS INFERENCE: For relationship-status questions, treat explicit self-descriptions about partnership, dating, marriage, or parenting plans as status evidence. If the transcript strongly supports an unpartnered status, answer with the concise status phrase instead of "not found." + +Search command: Grep pattern="keyword" path="~/.deeplake/memory" + +Organization management \u2014 each argument is SEPARATE (do NOT quote subcommands together): +- node "HIVEMIND_AUTH_CMD" login \u2014 SSO login +- node "HIVEMIND_AUTH_CMD" whoami \u2014 show current user/org +- node "HIVEMIND_AUTH_CMD" org list \u2014 list organizations +- node "HIVEMIND_AUTH_CMD" org switch \u2014 switch organization +- node "HIVEMIND_AUTH_CMD" workspaces \u2014 list workspaces +- node "HIVEMIND_AUTH_CMD" workspace \u2014 switch workspace +- node "HIVEMIND_AUTH_CMD" invite \u2014 invite member (ALWAYS ask user which role before inviting) +- node "HIVEMIND_AUTH_CMD" members \u2014 list members +- node "HIVEMIND_AUTH_CMD" remove \u2014 remove member + +IMPORTANT: Only use bash commands (cat, ls, grep, echo, jq, head, tail, etc.) to interact with ~/.deeplake/memory/. Do NOT use python, python3, node, curl, or other interpreters \u2014 they are not available in the memory filesystem. If a task seems to require Python, rewrite it using bash commands and standard text-processing tools (awk, sed, jq, grep, etc.). + +LIMITS: Do NOT spawn subagents to read deeplake memory. If a file returns empty after 2 attempts, skip it and move on. Report what you found rather than exhaustively retrying. + +Debugging: Set HIVEMIND_DEBUG=1 to enable verbose logging to ~/.deeplake/hook-debug.log`; +var CLAUDE_SESSION_START_CONTEXT_NO_INDEX = `DEEPLAKE MEMORY: You have TWO memory sources. ALWAYS check BOTH when the user asks you to recall, remember, or look up ANY information: + +1. Your built-in memory (~/.claude/) \u2014 personal per-project notes +2. Deeplake global memory (~/.deeplake/memory/) \u2014 global memory shared across all sessions, users, and agents in the org + +Deeplake memory structure in this mode: +- ~/.deeplake/memory/summaries/username/*.md \u2014 AI-generated wiki summaries per session +- ~/.deeplake/memory/sessions/{author}/* \u2014 raw session data (last resort) + +INDEX MODE: /index.md is intentionally unavailable for this run. Do NOT try to read it or rely on it. +SEARCH STRATEGY: Start by grepping summaries for the named person, topic, or keyword. Then read the specific matching summaries. Only read raw session files if the summaries don't have enough detail. Do NOT jump straight to raw session files. +If a summary points to a likely source session, open that exact raw session before broadening into synonym greps or wide exploratory scans. +Do NOT probe unrelated local paths such as ~/.claude/projects/, arbitrary home directories, or guessed summary roots when the question is about Deeplake memory. +TEMPORAL GROUNDING: If a summary or transcript uses relative time like "last year", "last week", or "next month", resolve it against that session's own date/date_time metadata, not today's date. +TEMPORAL FOLLOW-THROUGH: If a summary only gives a relative time, open the linked source session and use its date/date_time to convert the final answer into an absolute month/date/year or explicit range before responding. +ANSWER SHAPE: Once you have enough evidence, answer with the smallest exact phrase supported by memory. For identity or relationship questions, use just the noun phrase. For education questions, answer with the likely field or credential directly, not the broader life story. For "when" questions, prefer absolute dates/months/years over relative phrases. Avoid extra biography, explanation, or hedging. +NOT-FOUND BAR: Do NOT answer "not found" until you have checked at least one likely summary plus one likely raw session file for the named person when the summary is ambiguous. If keyword grep is empty, grep the person's name alone and inspect the candidate files. +NEGATIVE-EVIDENCE QUESTIONS: For identity, relationship status, and research-topic questions, summaries may omit the exact phrase. If likely summaries are ambiguous, read the candidate raw session transcript and look for positive clues before concluding the answer is absent. +SELF-LABEL PRIORITY: For identity questions, prefer the person's own explicit self-label from the transcript over broader category descriptions or paraphrases. +RELATIONSHIP STATUS INFERENCE: For relationship-status questions, treat explicit self-descriptions about partnership, dating, marriage, or parenting plans as status evidence. If the transcript strongly supports an unpartnered status, answer with the concise status phrase instead of "not found." + +Search command: Grep pattern="keyword" path="~/.deeplake/memory" + +Organization management \u2014 each argument is SEPARATE (do NOT quote subcommands together): +- node "HIVEMIND_AUTH_CMD" login \u2014 SSO login +- node "HIVEMIND_AUTH_CMD" whoami \u2014 show current user/org +- node "HIVEMIND_AUTH_CMD" org list \u2014 list organizations +- node "HIVEMIND_AUTH_CMD" org switch \u2014 switch organization +- node "HIVEMIND_AUTH_CMD" workspaces \u2014 list workspaces +- node "HIVEMIND_AUTH_CMD" workspace \u2014 switch workspace +- node "HIVEMIND_AUTH_CMD" invite \u2014 invite member (ALWAYS ask user which role before inviting) +- node "HIVEMIND_AUTH_CMD" members \u2014 list members +- node "HIVEMIND_AUTH_CMD" remove \u2014 remove member + +IMPORTANT: Only use bash commands (cat, ls, grep, echo, jq, head, tail, etc.) to interact with ~/.deeplake/memory/. Do NOT use python, python3, node, curl, or other interpreters \u2014 they are not available in the memory filesystem. If a task seems to require Python, rewrite it using bash commands and standard text-processing tools (awk, sed, jq, grep, etc.). + +LIMITS: Do NOT spawn subagents to read deeplake memory. If a file returns empty after 2 attempts, skip it and move on. Report what you found rather than exhaustively retrying. + +Debugging: Set HIVEMIND_DEBUG=1 to enable verbose logging to ~/.deeplake/hook-debug.log`; +var CLAUDE_SESSION_START_CONTEXT_PSQL = `DEEPLAKE MEMORY SQL MODE: For this run, use SQL only when answering recall questions. + +Available Deeplake tables: +- memory(path, summary, project, description, creation_date, last_update_date) +- sessions_text(path, creation_date, message_text) +- sessions(path, message, creation_date) + +Use this command shape: +- psql -At -F '|' -c "SELECT ..." + +SQL strategy: +1. Start with targeted SELECTs against memory to find likely summaries. +2. In the first pass, combine the named person/entity term with one or more topic terms. Prefer narrow AND filters over broad OR filters. +3. After finding candidate summary rows, re-query memory by exact path to inspect only those summaries. +4. If the answer needs exact wording, exact dates, or transcript grounding, query sessions_text by exact path for those candidate sessions. +5. Prefer precise WHERE filters, ORDER BY creation_date/last_update_date, and LIMIT 5-10. +6. Do not use filesystem commands, grep, cat, ls, Read, or Glob for recall in this mode. +7. If the first summary query returns 0-3 weak rows or the answer still seems semantically off, retry with BM25 ranking on memory before concluding the data is absent. +8. Use sessions only when you need the raw structured payload; use sessions_text for normal text filtering. + +Good query patterns: +- Candidate summaries: + psql -At -F '|' -c "SELECT path, summary, creation_date FROM memory WHERE summary ILIKE '%%' AND (summary ILIKE '%%' OR summary ILIKE '%%') ORDER BY creation_date DESC LIMIT 5" +- Exact summary reread: + psql -At -F '|' -c "SELECT path, summary FROM memory WHERE path IN ('/summaries/...', '/summaries/...')" +- Transcript grounding by exact path: + psql -At -F '|' -c "SELECT path, creation_date, message_text FROM sessions_text WHERE path IN ('/sessions/...', '/sessions/...') ORDER BY creation_date ASC" +- Transcript search inside known sessions: + psql -At -F '|' -c "SELECT path, creation_date, message_text FROM sessions_text WHERE path IN ('/sessions/...', '/sessions/...') AND message_text ILIKE '%%' ORDER BY creation_date ASC" +- If literal ILIKE retrieval is sparse or semantically weak, retry with BM25 text ranking on summaries: + psql -At -F '|' -c "SELECT path, summary, summary <#> ' ' AS score FROM memory WHERE summary ILIKE '%%' ORDER BY score DESC LIMIT 5" + +Avoid these mistakes: +- Do NOT search person names via path ILIKE. Person names live in summary text, not session paths. +- Do NOT filter sessions.message directly. Use sessions_text.message_text for transcript text search. +- Do NOT blend multiple different events when the question asks about one specific event. Prefer the most direct supporting row. + +Answer rules: +- Return the smallest exact answer supported by the data. +- Resolve relative dates against the session's own creation_date or transcript date metadata, not today's date. +- Do not answer "not found" until you have checked both memory and a likely sessions_text row for the named person. +- For duration or age-style answers, preserve the stored relative phrase when it directly answers the question instead of over-converting it. +- For list or profile questions, aggregate across the small set of candidate sessions before answering. +- For "likely", "would", or profile questions, a concise inference from strong summary evidence is allowed even if the exact final phrase is not quoted verbatim. + +IMPORTANT: Only psql SELECT queries over memory and sessions are intercepted in this mode. Do NOT use python, python3, node, curl, or filesystem paths for recall in this mode. + Debugging: Set HIVEMIND_DEBUG=1 to enable verbose logging to ~/.deeplake/hook-debug.log`; var GITHUB_RAW_PKG = "https://raw.githubusercontent.com/activeloopai/hivemind/main/package.json"; function buildSessionStartAdditionalContext(args) { - const resolvedContext = CLAUDE_SESSION_START_CONTEXT.replace(/HIVEMIND_AUTH_CMD/g, args.authCommand); + const template = isPsqlMode() ? CLAUDE_SESSION_START_CONTEXT_PSQL : isSessionsOnlyMode() ? CLAUDE_SESSION_START_CONTEXT_SESSIONS_ONLY : isIndexDisabled() ? CLAUDE_SESSION_START_CONTEXT_NO_INDEX : CLAUDE_SESSION_START_CONTEXT; + const resolvedContext = template.replace(/HIVEMIND_AUTH_CMD/g, args.authCommand); let updateNotice = ""; if (args.currentVersion) { if (args.latestVersion && isNewer(args.latestVersion, args.currentVersion)) { @@ -236,6 +375,9 @@ if (isDirectRun(import.meta.url)) { } export { CLAUDE_SESSION_START_CONTEXT, + CLAUDE_SESSION_START_CONTEXT_NO_INDEX, + CLAUDE_SESSION_START_CONTEXT_PSQL, + CLAUDE_SESSION_START_CONTEXT_SESSIONS_ONLY, buildSessionStartAdditionalContext, runSessionStartHook }; diff --git a/claude-code/bundle/wiki-worker.js b/claude-code/bundle/wiki-worker.js index cd53b4e..f280a41 100755 --- a/claude-code/bundle/wiki-worker.js +++ b/claude-code/bundle/wiki-worker.js @@ -94,12 +94,60 @@ function releaseLock(sessionId) { // dist/src/hooks/upload-summary.js import { randomUUID } from "node:crypto"; + +// dist/src/utils/summary-format.js +function escapeRegex(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} +function extractSection(text, heading) { + const re = new RegExp(`^## ${escapeRegex(heading)}\\s*\\n([\\s\\S]*?)(?=\\n## |$)`, "m"); + const match = text.match(re); + return match ? match[1].trim() : null; +} +function extractHeaderField(text, field) { + const re = new RegExp(`^- \\*\\*${escapeRegex(field)}\\*\\*:\\s*(.+)$`, "m"); + const match = text.match(re); + return match ? match[1].trim() : null; +} +function compactText(value) { + return value.replace(/\s+/g, " ").trim(); +} +function extractBullets(section, limit = 3) { + if (!section) + return []; + return section.split("\n").map((line) => line.trim()).filter((line) => line.startsWith("- ")).map((line) => compactText(line.slice(2))).filter(Boolean).slice(0, limit); +} +function extractSummaryParticipants(text) { + return extractHeaderField(text, "Participants") ?? extractHeaderField(text, "Speakers"); +} +function extractSummaryTopics(text) { + return extractHeaderField(text, "Topics"); +} +function buildSummaryBlurb(text) { + const participants = extractSummaryParticipants(text); + const topics = extractSummaryTopics(text); + const factBullets = extractBullets(extractSection(text, "Searchable Facts"), 3); + const keyBullets = factBullets.length > 0 ? factBullets : extractBullets(extractSection(text, "Key Facts"), 3); + const whatHappened = compactText(extractSection(text, "What Happened") ?? ""); + const parts = []; + if (participants) + parts.push(participants); + if (topics) + parts.push(topics); + if (keyBullets.length > 0) + parts.push(keyBullets.join("; ")); + if (parts.length === 0 && whatHappened) + parts.push(whatHappened); + const blurb = parts.join(" | ").slice(0, 300).trim(); + return blurb || "completed"; +} + +// dist/src/hooks/upload-summary.js function esc(s) { return s.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, ""); } function extractDescription(text) { - const match = text.match(/## What Happened\n([\s\S]*?)(?=\n##|$)/); - return match ? match[1].trim().slice(0, 300) : "completed"; + return buildSummaryBlurb(text); } async function uploadSummary(query2, params) { const { tableName, vpath, fname, userName, project, agent, text } = params; diff --git a/claude-code/tests/bash-command-compiler.test.ts b/claude-code/tests/bash-command-compiler.test.ts index e6d49be..78d9502 100644 --- a/claude-code/tests/bash-command-compiler.test.ts +++ b/claude-code/tests/bash-command-compiler.test.ts @@ -10,6 +10,13 @@ import { tokenizeShellWords, } from "../../src/hooks/bash-command-compiler.js"; +const originalPsqlMode = process.env.HIVEMIND_PSQL_MODE; + +function restorePsqlMode(): void { + if (originalPsqlMode === undefined) delete process.env.HIVEMIND_PSQL_MODE; + else process.env.HIVEMIND_PSQL_MODE = originalPsqlMode; +} + describe("bash-command-compiler parsing", () => { it("splits top-level sequences while respecting quotes", () => { expect(splitTopLevel("cat /a && echo 'x && y' ; ls /b", ["&&", ";"])).toEqual([ @@ -74,6 +81,7 @@ describe("bash-command-compiler parsing", () => { }); it("parses supported read-only segments", () => { + restorePsqlMode(); expect(parseCompiledSegment("echo ---")).toEqual({ kind: "echo", text: "---" }); expect(parseCompiledSegment("cat /a /b | head -2")).toEqual({ kind: "cat", @@ -280,7 +288,40 @@ describe("bash-command-compiler parsing", () => { }); }); + it("parses psql segments only when psql mode is enabled", () => { + delete process.env.HIVEMIND_PSQL_MODE; + expect(parseCompiledSegment("psql -At -F '|' -c \"SELECT path, summary FROM memory LIMIT 2\"")).toBeNull(); + + process.env.HIVEMIND_PSQL_MODE = "1"; + expect(parseCompiledSegment("psql -At -F '|' -c \"SELECT path, summary FROM memory LIMIT 2\" | head -1")).toEqual({ + kind: "psql", + query: "SELECT path, summary FROM memory LIMIT 2", + lineLimit: 1, + tuplesOnly: true, + fieldSeparator: "|", + }); + + expect(parseCompiledSegment("psql -At -F '|' -c \"SELECT path, summary FROM hivemind.memory LIMIT 2\"")).toEqual({ + kind: "psql", + query: "SELECT path, summary FROM hivemind.memory LIMIT 2", + lineLimit: 0, + tuplesOnly: true, + fieldSeparator: "|", + }); + + expect(parseCompiledSegment("psql -At -F '|' -c \"SELECT path, creation_date, message_text FROM sessions_text WHERE message_text ILIKE '%camp%' LIMIT 2\"")).toEqual({ + kind: "psql", + query: "SELECT path, creation_date, message_text FROM sessions_text WHERE message_text ILIKE '%camp%' LIMIT 2", + lineLimit: 0, + tuplesOnly: true, + fieldSeparator: "|", + }); + + restorePsqlMode(); + }); + it("rejects unsupported segments and command shapes", () => { + process.env.HIVEMIND_PSQL_MODE = "1"; expect(parseCompiledSegment("cat")).toBeNull(); expect(parseCompiledSegment("echo ok > /x")).toBeNull(); expect(parseCompiledSegment("cat /a | jq '.x'")).toBeNull(); @@ -300,8 +341,10 @@ describe("bash-command-compiler parsing", () => { expect(parseCompiledSegment("find /summaries -name '*.md' | xargs grep -l foo | tail -2")).toBeNull(); expect(parseCompiledSegment("grep foo /a | tail -2")).toBeNull(); expect(parseCompiledSegment("grep foo /a | head nope")).toBeNull(); + expect(parseCompiledSegment("psql -At -F '|' -c \"SELECT * FROM memory\" | tail -2")).toBeNull(); expect(parseCompiledBashCommand("cat /a || cat /b")).toBeNull(); expect(parseCompiledBashCommand("cat /a && echo ok > /x")).toBeNull(); + restorePsqlMode(); }); }); @@ -467,6 +510,86 @@ describe("bash-command-compiler execution", () => { expect(output).toBeNull(); }); + it("executes psql queries against normalized memory and sessions table names", async () => { + const query = vi.fn(async (sql: string) => { + expect(sql).toContain('FROM "memory_actual"'); + expect(sql).toContain('JOIN "sessions_actual"'); + return [ + { path: "/summaries/locomo/conv_0_session_6_summary.md", summary: "Caroline keeps classic kids books" }, + ]; + }); + + process.env.HIVEMIND_PSQL_MODE = "1"; + const output = await executeCompiledBashCommand( + { query } as any, + "memory_actual", + "sessions_actual", + "psql -At -F '|' -c \"SELECT m.path, m.summary FROM memory m JOIN sessions s ON s.path = m.path WHERE m.summary ILIKE '%Caroline%' LIMIT 1\"", + ); + expect(output).toBe("/summaries/locomo/conv_0_session_6_summary.md|Caroline keeps classic kids books"); + restorePsqlMode(); + }); + + it("rewrites sessions_text queries into a text CTE over the backing sessions table", async () => { + const query = vi.fn(async (sql: string) => { + expect(sql).toContain('WITH "__hivemind_sessions_text" AS (SELECT path, creation_date, message::text AS message_text FROM "sessions_actual")'); + expect(sql).toContain('FROM "__hivemind_sessions_text"'); + expect(sql).toContain("message_text ILIKE '%camp%'"); + return [ + { path: "/sessions/conv_0_session_8.json", creation_date: "2023-08-10", message_text: "{\"turns\":[{\"text\":\"We planned a camping trip\"}]}" }, + ]; + }); + + process.env.HIVEMIND_PSQL_MODE = "1"; + const output = await executeCompiledBashCommand( + { query } as any, + "memory_actual", + "sessions_actual", + "psql -At -F '|' -c \"SELECT path, creation_date, message_text FROM sessions_text WHERE message_text ILIKE '%camp%' LIMIT 1\"", + ); + expect(output).toBe('/sessions/conv_0_session_8.json|2023-08-10|{"turns":[{"text":"We planned a camping trip"}]}'); + restorePsqlMode(); + }); + + it("matches psql tuples-only empty output semantics", async () => { + process.env.HIVEMIND_PSQL_MODE = "1"; + const tuplesOnly = await executeCompiledBashCommand( + { query: vi.fn(async () => []) } as any, + "memory", + "sessions", + "psql -At -F '|' -c \"SELECT path FROM memory WHERE summary ILIKE '%missing%'\"", + ); + expect(tuplesOnly).toBe(""); + + const withHeader = await executeCompiledBashCommand( + { query: vi.fn(async () => []) } as any, + "memory", + "sessions", + "psql -F '|' -c \"SELECT path FROM memory WHERE summary ILIKE '%missing%'\"", + ); + expect(withHeader).toBe("(0 rows)"); + restorePsqlMode(); + }); + + it("does not compile unrelated psql commands and rejects invalid hivemind writes", async () => { + process.env.HIVEMIND_PSQL_MODE = "1"; + await expect(executeCompiledBashCommand( + { query: vi.fn() } as any, + "memory", + "sessions", + "psql -At -F '|' -c \"DELETE FROM memory\"", + )).rejects.toThrow("psql mode only supports SELECT queries"); + + const unrelated = await executeCompiledBashCommand( + { query: vi.fn() } as any, + "memory", + "sessions", + "psql -At -F '|' -c \"SELECT * FROM users\"", + ); + expect(unrelated).toBeNull(); + restorePsqlMode(); + }); + it("compiles find | xargs grep -l | head into batched path reads", async () => { const findVirtualPathsFn = vi.fn() .mockResolvedValueOnce(["/summaries/a.md", "/summaries/shared.json"]) diff --git a/claude-code/tests/deeplake-api.test.ts b/claude-code/tests/deeplake-api.test.ts index f427bf7..306c2df 100644 --- a/claude-code/tests/deeplake-api.test.ts +++ b/claude-code/tests/deeplake-api.test.ts @@ -314,6 +314,29 @@ describe("DeeplakeApi.createIndex", () => { }); }); +describe("DeeplakeApi.createSummaryBm25Index", () => { + it("generates correct CREATE INDEX SQL for summary BM25", async () => { + mockFetch.mockResolvedValueOnce(jsonResponse({})); + const api = makeApi("memory"); + await api.createSummaryBm25Index(); + const sql = JSON.parse(mockFetch.mock.calls[0][1].body).query; + expect(sql).toContain("CREATE INDEX IF NOT EXISTS"); + expect(sql).toContain("idx_memory_summary_bm25"); + expect(sql).toContain('ON "memory" USING deeplake_index ("summary")'); + }); +}); + +describe("DeeplakeApi.ensureSummaryBm25Index", () => { + it("creates the summary BM25 index when no fresh marker exists", async () => { + mockFetch.mockResolvedValueOnce(jsonResponse({})); + const api = makeApi("memory"); + await api.ensureSummaryBm25Index(); + const sql = JSON.parse(mockFetch.mock.calls[0][1].body).query; + expect(sql).toContain("CREATE INDEX IF NOT EXISTS"); + expect(sql).toContain("idx_memory_summary_bm25"); + }); +}); + // ── listTables ────────────────────────────────────────────────────────────── describe("DeeplakeApi.listTables", () => { diff --git a/claude-code/tests/deeplake-fs.test.ts b/claude-code/tests/deeplake-fs.test.ts index 455b86a..1c564ea 100644 --- a/claude-code/tests/deeplake-fs.test.ts +++ b/claude-code/tests/deeplake-fs.test.ts @@ -47,12 +47,12 @@ function makeClient(seed: Record = {}) { } return []; } - // Virtual index: SELECT path, project, description, creation_date, last_update_date FROM ... WHERE path LIKE '/summaries/%' - if (sql.includes("SELECT path, project, description, creation_date, last_update_date")) { + // Virtual index: SELECT path, project, description, summary, creation_date FROM ... WHERE path LIKE '/summaries/%' + if (sql.includes("SELECT path, project, description, summary, creation_date")) { return rows .filter(r => r.path.startsWith("/summaries/")) .map(r => ({ - path: r.path, project: r.project, description: r.description, + path: r.path, project: r.project, description: r.description, summary: r.summary, creation_date: r.creation_date, last_update_date: r.last_update_date, })); } @@ -804,17 +804,50 @@ describe("virtual index.md", () => { it("generates virtual index when no /index.md row exists", async () => { const { fs } = await makeFsWithSummaries([ - { id: "aaa-111", userName: "alice", project: "my-project", description: "Fixed auth bug", creationDate: "2026-04-07T10:00:00.000Z", lastUpdateDate: "2026-04-07T11:00:00.000Z", content: "# Session aaa-111" }, - { id: "bbb-222", userName: "alice", project: "other-proj", description: "in progress", creationDate: "2026-04-07T12:00:00.000Z", lastUpdateDate: "2026-04-07T12:00:00.000Z", content: "# Session bbb-222" }, + { + id: "aaa-111", + userName: "alice", + project: "my-project", + description: "Fixed auth bug", + creationDate: "2026-04-07T10:00:00.000Z", + lastUpdateDate: "2026-04-07T11:00:00.000Z", + content: `# Session aaa-111 +- **Source**: /sessions/alice/aaa-111.jsonl +- **Date**: 2026-04-07 +- **Participants**: Alice, Bob +- **Topics**: auth, retries + +## Searchable Facts +- Auth tokens now refresh automatically. +`, + }, + { + id: "bbb-222", + userName: "alice", + project: "other-proj", + description: "in progress", + creationDate: "2026-04-07T12:00:00.000Z", + lastUpdateDate: "2026-04-07T12:00:00.000Z", + content: `# Session bbb-222 +- **Source**: /sessions/alice/bbb-222.jsonl +- **Date**: 2026-04-07 +- **Participants**: Alice, Carol +- **Topics**: rollout +`, + }, ]); const content = await fs.readFile("/index.md"); - expect(content).toContain("# Session Index"); - expect(content).toContain("| Session | Conversation | Created | Last Updated | Project | Description |"); + expect(content).toContain("# Memory Index"); + expect(content).toContain("## People"); + expect(content).toContain("## Projects"); + expect(content).toContain("## Summary To Session Catalog"); expect(content).toContain("aaa-111"); expect(content).toContain("bbb-222"); expect(content).toContain("my-project"); - expect(content).toContain("Fixed auth bug"); + expect(content).toContain("Alice, Bob"); + expect(content).toContain("[session](/sessions/alice/aaa-111.jsonl)"); expect(content).toContain("2026-04-07"); + expect(content).toContain("updated: 2026-04-07 11:00 UTC"); }); it("serves real /index.md row when it exists", async () => { @@ -847,6 +880,39 @@ describe("virtual index.md", () => { expect(s.isDirectory).toBe(false); }); + it("hides the virtual index in sessions-only mode", async () => { + const prev = process.env.HIVEMIND_SESSIONS_ONLY; + process.env.HIVEMIND_SESSIONS_ONLY = "1"; + try { + const { fs } = await makeFsWithSummaries([ + { id: "aaa-111", userName: "alice", project: "proj", description: "desc", creationDate: "2026-04-07T10:00:00.000Z", lastUpdateDate: "2026-04-07T10:00:00.000Z", content: "# Session" }, + ]); + expect(await fs.exists("/index.md")).toBe(false); + const entries = await fs.readdir("/"); + expect(entries).not.toContain("index.md"); + } finally { + if (prev === undefined) delete process.env.HIVEMIND_SESSIONS_ONLY; + else process.env.HIVEMIND_SESSIONS_ONLY = prev; + } + }); + + it("hides the virtual index when index is disabled but still keeps summaries", async () => { + const prev = process.env.HIVEMIND_DISABLE_INDEX; + process.env.HIVEMIND_DISABLE_INDEX = "1"; + try { + const { fs } = await makeFsWithSummaries([ + { id: "aaa-111", userName: "alice", project: "proj", description: "desc", creationDate: "2026-04-07T10:00:00.000Z", lastUpdateDate: "2026-04-07T10:00:00.000Z", content: "# Session" }, + ]); + expect(await fs.exists("/index.md")).toBe(false); + const entries = await fs.readdir("/"); + expect(entries).not.toContain("index.md"); + expect(entries).toContain("summaries"); + } finally { + if (prev === undefined) delete process.env.HIVEMIND_DISABLE_INDEX; + else process.env.HIVEMIND_DISABLE_INDEX = prev; + } + }); + it("virtual index shows all summary rows ordered", async () => { const { fs } = await makeFsWithSummaries([ { id: "old-session", userName: "alice", project: "proj-a", description: "Old work", creationDate: "2026-04-01T10:00:00.000Z", lastUpdateDate: "2026-04-01T11:00:00.000Z", content: "# Old" }, @@ -863,10 +929,8 @@ describe("virtual index.md", () => { it("virtual index handles empty summaries table", async () => { const { fs } = await makeFs({}, "/"); const content = await fs.readFile("/index.md"); - expect(content).toContain("# Session Index"); - expect(content).toContain("| Session | Conversation | Created | Last Updated | Project | Description |"); - // No data rows - const lines = content.split("\n").filter(l => l.startsWith("| [")); + expect(content).toContain("# Memory Index"); + const lines = content.split("\n").filter(l => l.startsWith("- [")); expect(lines.length).toBe(0); }); diff --git a/claude-code/tests/hooks-source.test.ts b/claude-code/tests/hooks-source.test.ts index 4e576c4..a2b8ff9 100644 --- a/claude-code/tests/hooks-source.test.ts +++ b/claude-code/tests/hooks-source.test.ts @@ -282,6 +282,74 @@ describe("claude pre-tool source", () => { }); }); + it("passes through psql bash commands when sql mode is enabled", () => { + const prev = process.env.HIVEMIND_PSQL_MODE; + process.env.HIVEMIND_PSQL_MODE = "1"; + try { + expect(getShellCommand("Bash", { + command: "psql -At -F '|' -c \"SELECT path, summary FROM memory LIMIT 1\"", + })).toBe("psql -At -F '|' -c \"SELECT path, summary FROM memory LIMIT 1\""); + expect(getShellCommand("Bash", { + command: "psql -At -F '|' -c \"SELECT path, summary FROM hivemind.memory LIMIT 1\"", + })).toBe("psql -At -F '|' -c \"SELECT path, summary FROM hivemind.memory LIMIT 1\""); + expect(getShellCommand("Bash", { + command: "psql -At -F '|' -c \"SELECT path, creation_date, message_text FROM sessions_text LIMIT 1\"", + })).toBe("psql -At -F '|' -c \"SELECT path, creation_date, message_text FROM sessions_text LIMIT 1\""); + expect(getShellCommand("Read", { file_path: "~/.deeplake/memory/index.md" })).toBeNull(); + expect(getShellCommand("Glob", { path: "~/.deeplake/memory/summaries" })).toBeNull(); + } finally { + if (prev === undefined) delete process.env.HIVEMIND_PSQL_MODE; + else process.env.HIVEMIND_PSQL_MODE = prev; + } + }); + + it("passes through memory/session psql queries and leaves unrelated psql untouched", async () => { + const prev = process.env.HIVEMIND_PSQL_MODE; + process.env.HIVEMIND_PSQL_MODE = "1"; + try { + const decision = await processPreToolUse({ + session_id: "s1", + tool_name: "Bash", + tool_input: { + command: "psql -At -F '|' -c \"SELECT path, summary FROM memory LIMIT 1\"", + }, + tool_use_id: "tu-psql-memory", + }, { + config: baseConfig, + executeCompiledBashCommandFn: vi.fn(async () => "/summaries/locomo/conv_0_session_1_summary.md|summary") as any, + }); + expect(decision?.command).toContain("summary"); + + const passthrough = await processPreToolUse({ + session_id: "s1", + tool_name: "Bash", + tool_input: { + command: "psql -At -F '|' -c \"SELECT * FROM users LIMIT 1\"", + }, + tool_use_id: "tu-psql-pass", + }, { + config: baseConfig, + }); + expect(passthrough).toBeNull(); + + const sessionsText = await processPreToolUse({ + session_id: "s1", + tool_name: "Bash", + tool_input: { + command: "psql -At -F '|' -c \"SELECT path, creation_date, message_text FROM sessions_text WHERE message_text ILIKE '%camp%' LIMIT 1\"", + }, + tool_use_id: "tu-psql-sessions-text", + }, { + config: baseConfig, + executeCompiledBashCommandFn: vi.fn(async () => "/sessions/conv_0_session_8.json|2023-08-10|{\"turns\":[{\"text\":\"We planned a camping trip\"}]}") as any, + }); + expect(sessionsText?.command).toContain("camping trip"); + } finally { + if (prev === undefined) delete process.env.HIVEMIND_PSQL_MODE; + else process.env.HIVEMIND_PSQL_MODE = prev; + } + }); + it("returns guidance for unsupported memory commands and passthrough for non-memory commands", async () => { const guidance = await processPreToolUse({ session_id: "s1", @@ -560,6 +628,33 @@ describe("claude pre-tool source", () => { expect(decision?.command).toContain("compiled output"); expect(decision?.description).toContain("DeepLake compiled"); }); + + it("routes supported psql benchmark commands through the compiled path", async () => { + const prev = process.env.HIVEMIND_PSQL_MODE; + process.env.HIVEMIND_PSQL_MODE = "1"; + try { + const decision = await processPreToolUse({ + session_id: "s1", + tool_name: "Bash", + tool_input: { + command: "psql -At -F '|' -c \"SELECT path, summary FROM memory WHERE summary ILIKE '%Caroline%' LIMIT 1\"", + }, + tool_use_id: "tu-psql-1", + }, { + config: baseConfig, + executeCompiledBashCommandFn: vi.fn(async (_api, _table, _sessions, cmd) => { + expect(cmd).toBe("psql -At -F '|' -c \"SELECT path, summary FROM memory WHERE summary ILIKE '%Caroline%' LIMIT 1\""); + return "/summaries/locomo/conv_0_session_6_summary.md|Caroline keeps classic kids books"; + }) as any, + }); + + expect(decision?.command).toContain("classic kids books"); + expect(decision?.description).toContain("DeepLake compiled"); + } finally { + if (prev === undefined) delete process.env.HIVEMIND_PSQL_MODE; + else process.env.HIVEMIND_PSQL_MODE = prev; + } + }); }); describe("claude session start source", () => { @@ -648,6 +743,32 @@ describe("claude session start source", () => { } }); + it("switches to sql guidance when psql mode is enabled", () => { + const prev = process.env.HIVEMIND_PSQL_MODE; + process.env.HIVEMIND_PSQL_MODE = "1"; + try { + const context = buildSessionStartAdditionalContext({ + authCommand: "/tmp/auth-login.js", + creds: baseCreds, + currentVersion: null, + latestVersion: null, + }); + expect(context).toContain("DEEPLAKE MEMORY SQL MODE"); + expect(context).toContain("memory(path, summary"); + expect(context).toContain("sessions_text(path, creation_date, message_text)"); + expect(context).toContain("sessions(path, message"); + expect(context).toContain("psql -At -F '|'"); + expect(context).toContain("Use sessions only when you need the raw structured payload"); + expect(context).toContain("Do NOT filter sessions.message directly"); + expect(context).toContain("Do not use filesystem commands"); + expect(context).not.toContain("Always read index.md first"); + expect(context).not.toContain("~/.deeplake/memory"); + } finally { + if (prev === undefined) delete process.env.HIVEMIND_PSQL_MODE; + else process.env.HIVEMIND_PSQL_MODE = prev; + } + }); + it("logs authenticated startup without backfilling when the username is already present", async () => { const logFn = vi.fn(); const save = vi.fn(); diff --git a/claude-code/tests/query-cache.test.ts b/claude-code/tests/query-cache.test.ts index 84f62a9..b14c08b 100644 --- a/claude-code/tests/query-cache.test.ts +++ b/claude-code/tests/query-cache.test.ts @@ -13,6 +13,7 @@ describe("query-cache", () => { const tempRoots: string[] = []; afterEach(() => { + vi.useRealTimers(); for (const root of tempRoots.splice(0)) { rmSync(root, { recursive: true, force: true }); } @@ -65,4 +66,16 @@ describe("query-cache", () => { }); expect(logFn).toHaveBeenCalledWith(expect.stringContaining("clear failed")); }); + + it("drops stale cached content instead of reusing it across long gaps", () => { + const cacheRoot = mkdtempSync(join(tmpdir(), "hivemind-query-cache-")); + tempRoots.push(cacheRoot); + + writeCachedIndexContent("session-3", "cached once", { cacheRoot }); + vi.useFakeTimers(); + vi.setSystemTime(Date.now() + (16 * 60 * 1000)); + + expect(readCachedIndexContent("session-3", { cacheRoot })).toBeNull(); + expect(readCachedIndexContent("session-3", { cacheRoot })).toBeNull(); + }); }); diff --git a/claude-code/tests/session-start.test.ts b/claude-code/tests/session-start.test.ts index 858f544..0b9ce43 100644 --- a/claude-code/tests/session-start.test.ts +++ b/claude-code/tests/session-start.test.ts @@ -148,6 +148,15 @@ describe("claude-code integration: session-start.js (sync hook)", () => { expect(ctx).toContain("convert the final answer into an absolute month/date/year"); }); + it("switches to summary-first guidance when index is disabled", () => { + const raw = runHook("session-start.js", baseInput, { HIVEMIND_DISABLE_INDEX: "1" }); + const parsed = JSON.parse(raw); + const ctx = parsed.hookSpecificOutput.additionalContext; + expect(ctx).toContain("/index.md is intentionally unavailable"); + expect(ctx).toContain("Start by grepping summaries"); + expect(ctx).not.toContain("Always read index.md first"); + }); + it("completes within 3s with no credentials (no server calls)", () => { const start = Date.now(); runHook("session-start.js", baseInput); diff --git a/claude-code/tests/session-summary.test.ts b/claude-code/tests/session-summary.test.ts index 09f123a..cc776f5 100644 --- a/claude-code/tests/session-summary.test.ts +++ b/claude-code/tests/session-summary.test.ts @@ -1,5 +1,6 @@ import { describe, it, expect, beforeEach, vi } from "vitest"; import { DeeplakeFs, guessMime } from "../../src/shell/deeplake-fs.js"; +import { extractDescription } from "../../src/hooks/upload-summary.js"; // ── Mock client (same pattern as deeplake-fs.test.ts) ──────────────────────── type Row = { @@ -33,11 +34,11 @@ function makeClient(seed: Record = {}) { const row = match ? rows.find(r => r.path === match[1]) : undefined; return row ? [{ summary: row.summary }] : []; } - if (sql.includes("SELECT path, project, description, creation_date, last_update_date")) { + if (sql.includes("SELECT path, project, description, summary, creation_date")) { return rows .filter(r => r.path.startsWith("/summaries/")) .map(r => ({ - path: r.path, project: r.project, description: r.description, + path: r.path, project: r.project, description: r.description, summary: r.summary, creation_date: r.creation_date, last_update_date: r.last_update_date, })); } @@ -186,7 +187,7 @@ async function uploadSummary( const summaryPath = `/summaries/${userName}/${sessionId}.md`; await fs.writeFileWithMeta(summaryPath, summaryContent, { project: projectName, - description: summaryContent.match(/## What Happened\n([\s\S]*?)(?=\n##|$)/)?.[1]?.trim().slice(0, 80) ?? "completed", + description: extractDescription(summaryContent), lastUpdateDate: new Date().toISOString(), }); await fs.flush(); @@ -302,7 +303,7 @@ describe("session summary — resumed sessions update last_update_date", () => { // last_update_date must have changed expect(rowAfterEnd.last_update_date).not.toBe(initialDate); // description must be extracted from What Happened section - expect(rowAfterEnd.description).toBe("Fixed authentication bug in the login flow. Added retry logic for token refresh."); + expect(rowAfterEnd.description).toBe("Auth tokens now refresh automatically"); // content must be the full summary expect(rowAfterEnd.summary).toContain("## What Happened"); expect(rowAfterEnd.summary).toContain("## Key Facts"); diff --git a/claude-code/tests/summary-format.test.ts b/claude-code/tests/summary-format.test.ts new file mode 100644 index 0000000..afa7f9c --- /dev/null +++ b/claude-code/tests/summary-format.test.ts @@ -0,0 +1,69 @@ +import { describe, expect, it } from "vitest"; +import { + buildSummaryBlurb, + buildSummaryIndexLine, + extractHeaderField, + extractSection, + extractSummaryDate, + extractSummaryParticipants, + extractSummarySource, + extractSummaryTopics, +} from "../../src/utils/summary-format.js"; + +const SUMMARY = `# Session conv_0_session_10 +- **Source**: /sessions/conv_0_session_10.json +- **Date**: 8:56 pm on 20 July, 2023 +- **Participants**: Caroline, Melanie +- **Project**: locomo +- **Topics**: LGBTQ activism, family summer traditions + +## What Happened +Caroline and Melanie talked about activism, family trips, and a recent child milestone. + +## Searchable Facts +- Caroline joined Connected LGBTQ Activists last Tuesday. +- Melanie's family takes an annual summer camping trip. +- Melanie's youngest child recently took her first steps. +`; + +describe("summary-format", () => { + it("extracts header fields and sections", () => { + expect(extractHeaderField(SUMMARY, "Source")).toBe("/sessions/conv_0_session_10.json"); + expect(extractSection(SUMMARY, "What Happened")).toContain("activism"); + }); + + it("extracts common summary metadata", () => { + expect(extractSummaryDate(SUMMARY)).toBe("8:56 pm on 20 July, 2023"); + expect(extractSummaryParticipants(SUMMARY)).toBe("Caroline, Melanie"); + expect(extractSummaryTopics(SUMMARY)).toBe("LGBTQ activism, family summer traditions"); + expect(extractSummarySource(SUMMARY)).toBe("/sessions/conv_0_session_10.json"); + }); + + it("builds a searchable blurb from participants, topics, and facts", () => { + const blurb = buildSummaryBlurb(SUMMARY); + expect(blurb).toContain("Caroline, Melanie"); + expect(blurb).toContain("Connected LGBTQ Activists"); + expect(blurb).not.toContain("## Searchable Facts"); + }); + + it("builds an index line with source and metadata", () => { + const line = buildSummaryIndexLine({ + path: "/summaries/locomo/conv_0_session_10_summary.md", + project: "locomo", + description: "fallback description", + summary: SUMMARY, + creation_date: "2026-04-18T00:00:00.000Z", + last_update_date: "2026-04-18T13:45:00.000Z", + }); + + expect(line).toContain("conv_0_session_10_summary.md"); + expect(line).toContain("8:56 pm on 20 July, 2023"); + expect(line).toContain("Caroline, Melanie"); + expect(line).toContain("[session](/sessions/conv_0_session_10.json)"); + expect(line).toContain("updated: 2026-04-18 13:45 UTC"); + }); + + it("returns null for rows without a path", () => { + expect(buildSummaryIndexLine({})).toBeNull(); + }); +}); diff --git a/claude-code/tests/upload-summary.test.ts b/claude-code/tests/upload-summary.test.ts index 56eb0e9..7d836b3 100644 --- a/claude-code/tests/upload-summary.test.ts +++ b/claude-code/tests/upload-summary.test.ts @@ -24,6 +24,22 @@ All ten commands executed successfully. **test-project** (directory) — working directory `; +const TEXT_WITH_STRUCTURED_FACTS = `# Session conv_0_session_10 +- **Source**: /sessions/conv_0_session_10.json +- **Date**: 8:56 pm on 20 July, 2023 +- **Participants**: Caroline, Melanie +- **Project**: locomo +- **Topics**: LGBTQ activism, family summer traditions + +## What Happened +Caroline and Melanie talked about activism, family trips, and recent milestones. + +## Searchable Facts +- Caroline joined Connected LGBTQ Activists last Tuesday. +- Melanie's family takes an annual summer camping trip. +- Melanie's youngest child recently took her first steps. +`; + function makeSpyQuery(responses: Array>> = [[]]): { fn: QueryFn; calls: string[] } { const calls: string[] = []; let i = 0; @@ -134,12 +150,20 @@ describe("uploadSummary — Deeplake single-UPDATE invariant", () => { }); describe("extractDescription", () => { - it("extracts the What Happened section trimmed to 300 chars", () => { + it("falls back to the What Happened section when no richer structure exists", () => { const d = extractDescription(TEXT_WITH_WHAT_HAPPENED); expect(d.startsWith("User ran diagnostic commands")).toBe(true); expect(d.length).toBeLessThanOrEqual(300); }); + it("prefers participants, topics, and searchable facts when present", () => { + const d = extractDescription(TEXT_WITH_STRUCTURED_FACTS); + expect(d).toContain("Caroline, Melanie"); + expect(d).toContain("LGBTQ activism, family summer traditions"); + expect(d).toContain("Connected LGBTQ Activists"); + expect(d).not.toContain("## Searchable Facts"); + }); + it("returns 'completed' when the section is absent", () => { expect(extractDescription("# Only header, nothing else.")).toBe("completed"); }); diff --git a/codex/bundle/capture.js b/codex/bundle/capture.js index 764460e..261697c 100755 --- a/codex/bundle/capture.js +++ b/codex/bundle/capture.js @@ -207,7 +207,9 @@ import { writeFileSync as writeFileSync2, mkdirSync as mkdirSync2, appendFileSyn import { homedir as homedir4, tmpdir } from "node:os"; var HOME = homedir4(); var WIKI_LOG = join4(HOME, ".codex", "hooks", "deeplake-wiki.log"); -var WIKI_PROMPT_TEMPLATE = `You are building a personal wiki from a coding session. Your goal is to extract every piece of knowledge \u2014 entities, decisions, relationships, and facts \u2014 into a structured, searchable wiki entry. +var WIKI_PROMPT_TEMPLATE = `You are maintaining a persistent wiki from a session transcript. This page will become part of a long-lived knowledge base that future agents will search through index.md before opening the source session. Write for retrieval, not storytelling. + +The session may be a coding session, a meeting, or a personal conversation. Your job is to turn the raw transcript into a dense, factual wiki page that preserves names, dates, relationships, preferences, plans, titles, and exact status changes. SESSION JSONL path: __JSONL__ SUMMARY FILE to write: __SUMMARY__ @@ -221,39 +223,49 @@ Steps: - If PREVIOUS JSONL OFFSET > 0, this is a resumed session. Read the existing summary file first, then focus on lines AFTER the offset for new content. Merge new facts into the existing summary. - If offset is 0, generate from scratch. + - Treat the JSONL as the source of truth. Do not invent facts. 2. Write the summary file at the path above with this EXACT format: # Session __SESSION_ID__ - **Source**: __JSONL_SERVER_PATH__ +- **Date**: +- **Participants**: - **Started**: - **Ended**: - **Project**: __PROJECT__ +- **Topics**: - **JSONL offset**: __JSONL_LINES__ ## What Happened -<2-3 dense sentences. What was the goal, what was accomplished, what's left.> +<2-4 dense sentences. What happened, why it mattered, and what changed. Prefer specific names/titles/dates over abstractions.> + +## Searchable Facts + ## People - + ## Entities - + ## Decisions & Reasoning - - -## Key Facts - + ## Files Modified - + ## Open Questions / TODO - + -IMPORTANT: Be exhaustive. Extract EVERY entity, decision, and fact. +IMPORTANT: +- Be exhaustive. If a detail exists in the session and could answer a later question, it should be in the wiki. +- Favor exact nouns and titles over generic paraphrases. Preserve exact book names, organization names, file names, feature names, and self-descriptions. +- Keep facts canonical and query-friendly: "Ava is single", "Leo's home country is Brazil", "The team chose retries because the API returned 429s". +- Resolve relative dates like "last year" or "next month" against the session's own date when the source makes that possible. If it is ambiguous, keep the relative phrase instead of guessing. +- Do not omit beneficiary groups or targets of goals. PRIVACY: Never include absolute filesystem paths in the summary. LENGTH LIMIT: Keep the total summary under 4000 characters.`; function wikiLog(msg) { @@ -346,11 +358,12 @@ function extractSessionId(sessionPath) { } // dist/src/hooks/query-cache.js -import { mkdirSync as mkdirSync4, readFileSync as readFileSync4, rmSync as rmSync2, writeFileSync as writeFileSync4 } from "node:fs"; +import { mkdirSync as mkdirSync4, readFileSync as readFileSync4, rmSync as rmSync2, statSync as statSync2, writeFileSync as writeFileSync4 } from "node:fs"; import { join as join6 } from "node:path"; import { homedir as homedir6 } from "node:os"; var log2 = (msg) => log("query-cache", msg); var DEFAULT_CACHE_ROOT = join6(homedir6(), ".deeplake", "query-cache"); +var INDEX_CACHE_TTL_MS = 15 * 60 * 1e3; function getSessionQueryCacheDir(sessionId, deps = {}) { const { cacheRoot = DEFAULT_CACHE_ROOT } = deps; return join6(cacheRoot, sessionId); diff --git a/codex/bundle/commands/auth-login.js b/codex/bundle/commands/auth-login.js index ff5e179..3547fd0 100755 --- a/codex/bundle/commands/auth-login.js +++ b/codex/bundle/commands/auth-login.js @@ -465,6 +465,29 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + /** Create the standard BM25 summary index for a memory table. */ + async createSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const indexName = this.buildLookupIndexName(table, "summary_bm25"); + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" USING deeplake_index ("summary")`); + } + /** Ensure the standard BM25 summary index exists, using a local freshness marker to avoid repeated CREATEs. */ + async ensureSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const suffix = "summary_bm25"; + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + try { + await this.createSummaryBm25Index(table); + this.markLookupIndexReady(table, suffix); + } catch (e) { + if (isDuplicateIndexError(e)) { + this.markLookupIndexReady(table, suffix); + return; + } + throw e; + } + } buildLookupIndexName(table, suffix) { return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); } diff --git a/codex/bundle/pre-tool-use.js b/codex/bundle/pre-tool-use.js index 8d1aad6..48ba30b 100755 --- a/codex/bundle/pre-tool-use.js +++ b/codex/bundle/pre-tool-use.js @@ -437,6 +437,10 @@ function isSummaryBm25Disabled() { const raw = process.env["HIVEMIND_DISABLE_SUMMARY_BM25"] ?? process.env["DEEPLAKE_DISABLE_SUMMARY_BM25"] ?? ""; return /^(1|true|yes|on)$/i.test(raw.trim()); } +function isPsqlMode() { + const raw = process.env["HIVEMIND_PSQL_MODE"] ?? process.env["DEEPLAKE_PSQL_MODE"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} // dist/src/shell/grep-core.js var DEFAULT_GREP_CANDIDATE_LIMIT = Number(process.env["HIVEMIND_GREP_LIMIT"] ?? process.env["DEEPLAKE_GREP_LIMIT"] ?? 500); @@ -1790,6 +1794,139 @@ function parseFindSpec(tokens) { } return patterns.length > 0 ? { patterns, execGrepCmd: null } : null; } +function extractPsqlQuery(tokens) { + let query = null; + for (let i = 1; i < tokens.length; i++) { + const token = tokens[i]; + if (token === "-c" || token === "--command") { + query = tokens[i + 1] ?? null; + i += 1; + continue; + } + if (token.startsWith("-c") && token.length > 2) { + query = token.slice(2); + continue; + } + } + return query; +} +function extractPsqlQueryFromCommand(cmd) { + const tokens = tokenizeShellWords(cmd.trim()); + if (!tokens || tokens[0] !== "psql") + return null; + return extractPsqlQuery(tokens); +} +function normalizeSqlRef(ref) { + return ref.replace(/\s+/g, "").replace(/"/g, "").toLowerCase(); +} +function extractSqlTableRefs(query) { + const refs = []; + const regex = /\b(?:from|join)\s+((?:"[^"]+"|[a-zA-Z_][a-zA-Z0-9_]*)(?:\s*\.\s*(?:"[^"]+"|[a-zA-Z_][a-zA-Z0-9_]*))?)/gi; + for (const match of query.matchAll(regex)) { + if (match[1]) + refs.push(normalizeSqlRef(match[1])); + } + return refs; +} +function queryReferencesInterceptedTables(query) { + return extractSqlTableRefs(query).some((ref) => ref === "memory" || ref === "sessions" || ref === "sessions_text" || ref === "hivemind.memory" || ref === "hivemind.sessions" || ref === "hivemind.sessions_text"); +} +function queryUsesOnlyInterceptedTables(query) { + const refs = extractSqlTableRefs(query); + return refs.length > 0 && refs.every((ref) => ref === "memory" || ref === "sessions" || ref === "sessions_text" || ref === "hivemind.memory" || ref === "hivemind.sessions" || ref === "hivemind.sessions_text"); +} +function parsePsqlSegment(pipeline, tokens) { + if (tokens[0] !== "psql" || !isPsqlMode()) + return null; + const query = extractPsqlQuery(tokens); + let tuplesOnly = false; + let fieldSeparator = "|"; + for (let i = 1; i < tokens.length; i++) { + const token = tokens[i]; + if (token === "-F" || token === "--field-separator") { + fieldSeparator = tokens[i + 1] ?? fieldSeparator; + i += 1; + continue; + } + if (token.startsWith("-F") && token.length > 2) { + fieldSeparator = token.slice(2); + continue; + } + if (token === "-t" || token === "--tuples-only") { + tuplesOnly = true; + continue; + } + if (token.startsWith("-") && !token.startsWith("--")) { + const shortFlags = token.slice(1); + if (shortFlags.includes("t")) + tuplesOnly = true; + continue; + } + } + if (!query || !queryUsesOnlyInterceptedTables(query)) + return null; + let lineLimit = 0; + if (pipeline.length > 1) { + if (pipeline.length !== 2) + return null; + const headStage = pipeline[1].trim(); + if (!isValidPipelineHeadTailStage(headStage)) + return null; + const headTail = parseHeadTailStage(headStage); + if (!headTail || headTail.fromEnd) + return null; + lineLimit = headTail.lineLimit; + } + return { kind: "psql", query, lineLimit, tuplesOnly, fieldSeparator }; +} +function normalizePsqlQuery(query, memoryTable, sessionsTable) { + let sql = query.trim().replace(/;+\s*$/, ""); + sql = sql.replace(/\bFROM\s+"?memory"?\b/gi, `FROM "${memoryTable}"`).replace(/\bJOIN\s+"?memory"?\b/gi, `JOIN "${memoryTable}"`).replace(/\bFROM\s+"?sessions_text"?\b/gi, `FROM "__hivemind_sessions_text"`).replace(/\bJOIN\s+"?sessions_text"?\b/gi, `JOIN "__hivemind_sessions_text"`).replace(/\bFROM\s+"?sessions"?\b/gi, `FROM "${sessionsTable}"`).replace(/\bJOIN\s+"?sessions"?\b/gi, `JOIN "${sessionsTable}"`).replace(/\bFROM\s+"?hivemind"?\."?memory"?\b/gi, `FROM "${memoryTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?memory"?\b/gi, `JOIN "${memoryTable}"`).replace(/\bFROM\s+"?hivemind"?\."?sessions_text"?\b/gi, `FROM "__hivemind_sessions_text"`).replace(/\bJOIN\s+"?hivemind"?\."?sessions_text"?\b/gi, `JOIN "__hivemind_sessions_text"`).replace(/\bFROM\s+"?hivemind"?\."?sessions"?\b/gi, `FROM "${sessionsTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?sessions"?\b/gi, `JOIN "${sessionsTable}"`); + if (/\b__hivemind_sessions_text\b/i.test(sql)) { + const cte = `"__hivemind_sessions_text" AS (SELECT path, creation_date, message::text AS message_text FROM "${sessionsTable}")`; + sql = /^\s*with\b/i.test(sql) ? sql.replace(/^\s*with\b/i, `WITH ${cte},`) : `WITH ${cte} ${sql}`; + } + return sql; +} +function validatePsqlQuery(query, memoryTable, sessionsTable) { + if (!queryUsesOnlyInterceptedTables(query)) { + throw new Error("psql queries must reference only memory, sessions, sessions_text, hivemind.memory, hivemind.sessions, or hivemind.sessions_text"); + } + const sql = normalizePsqlQuery(query, memoryTable, sessionsTable); + const compact = sql.replace(/\s+/g, " ").trim(); + if (!/^(select|with)\b/i.test(compact)) { + throw new Error("psql mode only supports SELECT queries"); + } + const allowedTables = /* @__PURE__ */ new Set([memoryTable, sessionsTable, "__hivemind_sessions_text"]); + const tableMatches = [...compact.matchAll(/\b(?:from|join)\s+"?([a-zA-Z_][a-zA-Z0-9_]*)"?/gi)]; + if (tableMatches.length === 0) { + throw new Error("psql query must reference memory or sessions"); + } + for (const match of tableMatches) { + if (!allowedTables.has(match[1])) { + throw new Error(`psql query references unsupported table: ${match[1]}`); + } + } + return sql; +} +function formatPsqlValue(value) { + if (value === null || value === void 0) + return ""; + if (typeof value === "string") + return value; + if (typeof value === "number" || typeof value === "boolean") + return String(value); + return JSON.stringify(value); +} +function formatPsqlRows(rows, tuplesOnly, fieldSeparator) { + if (rows.length === 0) + return tuplesOnly ? "" : "(0 rows)"; + const columns = Object.keys(rows[0] ?? {}); + const body = rows.map((row) => columns.map((column) => formatPsqlValue(row[column])).join(fieldSeparator)); + if (tuplesOnly) + return body.join("\n"); + return [columns.join(fieldSeparator), ...body].join("\n"); +} function parseCompiledSegment(segment) { const { clean, ignoreMissing } = stripAllowedModifiers(segment); if (hasUnsupportedRedirection(clean)) @@ -1800,6 +1937,9 @@ function parseCompiledSegment(segment) { const tokens = tokenizeShellWords(pipeline[0]); if (!tokens || tokens.length === 0) return null; + const psqlSegment = parsePsqlSegment(pipeline, tokens); + if (psqlSegment) + return psqlSegment; if (tokens[0] === "echo" && pipeline.length === 1) { const text = tokens.slice(1).join(" "); return { kind: "echo", text }; @@ -2070,6 +2210,14 @@ async function executeCompiledBashCommand(api, memoryTable, sessionsTable, cmd, outputs.push(limited.join("\n") || "(no matches)"); continue; } + if (segment.kind === "psql") { + const sql = validatePsqlQuery(segment.query, memoryTable, sessionsTable); + const rows = await api.query(sql); + const formatted = formatPsqlRows(rows, segment.tuplesOnly, segment.fieldSeparator); + const limited = segment.lineLimit > 0 ? formatted.split("\n").slice(0, segment.lineLimit).join("\n") : formatted; + outputs.push(limited); + continue; + } if (segment.kind === "grep") { const result = await handleGrepDirectFn(api, memoryTable, sessionsTable, segment.params); if (result === null) @@ -2210,6 +2358,7 @@ var SAFE_BUILTINS = /* @__PURE__ */ new Set([ "md5sum", "sha1sum", "sha256sum", + "psql", "echo", "printf", "tee", @@ -2321,11 +2470,32 @@ function rewritePaths(cmd) { } // dist/src/hooks/codex/pre-tool-use.js +function isAnyPsqlCommand(cmd) { + return /^\s*psql\b/.test(cmd.trim()); +} +function isHivemindPsqlCommand(cmd) { + if (!isPsqlMode()) + return false; + const query = extractPsqlQueryFromCommand(cmd); + return !!query && queryUsesOnlyInterceptedTables(query); +} +function needsHivemindPsqlRewrite(cmd) { + if (!isPsqlMode() || !isAnyPsqlCommand(cmd)) + return false; + const query = extractPsqlQueryFromCommand(cmd); + return !!query && queryReferencesInterceptedTables(query) && !queryUsesOnlyInterceptedTables(query); +} var log4 = (msg) => log("codex-pre", msg); var __bundleDir = dirname(fileURLToPath2(import.meta.url)); var SHELL_BUNDLE = existsSync3(join6(__bundleDir, "shell", "deeplake-shell.js")) ? join6(__bundleDir, "shell", "deeplake-shell.js") : join6(__bundleDir, "..", "shell", "deeplake-shell.js"); function buildUnsupportedGuidance() { - return "This command is not supported for ~/.deeplake/memory/ operations. Only bash builtins are available: cat, ls, grep, echo, jq, head, tail, sed, awk, wc, sort, find, etc. Do NOT use python, python3, node, curl, or other interpreters. Rewrite your command using only bash tools and retry."; + return `This command is not supported for ~/.deeplake/memory/ operations. Only bash builtins are available, plus benchmark SQL mode via psql -At -F '|' -c "SELECT ...". Do NOT use python, python3, node, curl, or other interpreters. Rewrite your command using only bash tools and retry.`; +} +function buildPsqlOnlyGuidance() { + return "Hivemind recall is SQL-only in this mode. Use psql with the memory and sessions tables only. Do NOT use grep, cat, ls, Read, Glob, or filesystem paths for memory lookups."; +} +function buildPsqlSchemaGuidance() { + return "Only psql SELECT queries over memory and sessions are intercepted in SQL mode. Rewrite the query to reference only those tables with normal psql SELECT syntax."; } function runVirtualShell(cmd, shellBundle = SHELL_BUNDLE, logFn = log4) { try { @@ -2344,11 +2514,28 @@ async function processCodexPreToolUse(input, deps = {}) { const { config = loadConfig(), createApi = (table, activeConfig) => new DeeplakeApi(activeConfig.token, activeConfig.apiUrl, activeConfig.orgId, activeConfig.workspaceId, table), executeCompiledBashCommandFn = executeCompiledBashCommand, readVirtualPathContentsFn = readVirtualPathContents, readVirtualPathContentFn = readVirtualPathContent, listVirtualPathRowsFn = listVirtualPathRows, findVirtualPathsFn = findVirtualPaths, handleGrepDirectFn = handleGrepDirect, readCachedIndexContentFn = readCachedIndexContent, writeCachedIndexContentFn = writeCachedIndexContent, runVirtualShellFn = runVirtualShell, shellBundle = SHELL_BUNDLE, logFn = log4 } = deps; const cmd = input.tool_input?.command ?? ""; logFn(`hook fired: cmd=${cmd}`); - if (!touchesMemory(cmd)) + if (!touchesMemory(cmd) && !isAnyPsqlCommand(cmd)) + return { action: "pass" }; + if (isAnyPsqlCommand(cmd) && !isHivemindPsqlCommand(cmd)) { + if (needsHivemindPsqlRewrite(cmd)) { + return { + action: "guide", + output: buildPsqlSchemaGuidance(), + rewrittenCommand: cmd.trim() + }; + } return { action: "pass" }; - const rewritten = rewritePaths(cmd); + } + if (isPsqlMode() && touchesMemory(cmd)) { + return { + action: "guide", + output: buildPsqlOnlyGuidance(), + rewrittenCommand: cmd.trim() + }; + } + const rewritten = isHivemindPsqlCommand(cmd) ? cmd.trim() : rewritePaths(cmd); if (!isSafe(rewritten)) { - const guidance = buildUnsupportedGuidance(); + const guidance = isPsqlMode() ? buildPsqlOnlyGuidance() : buildUnsupportedGuidance(); logFn(`unsupported command, returning guidance: ${rewritten}`); return { action: "guide", @@ -2356,6 +2543,13 @@ async function processCodexPreToolUse(input, deps = {}) { rewrittenCommand: rewritten }; } + if (isHivemindPsqlCommand(rewritten) && !config) { + return { + action: "guide", + output: "Hivemind SQL mode is unavailable because Deeplake credentials are missing.", + rewrittenCommand: rewritten + }; + } if (config) { const table = process.env["HIVEMIND_TABLE"] ?? "memory"; const sessionsTable = process.env["HIVEMIND_SESSIONS_TABLE"] ?? "sessions"; @@ -2525,8 +2719,22 @@ async function processCodexPreToolUse(input, deps = {}) { } } catch (e) { logFn(`direct query failed, falling back to shell: ${e.message}`); + if (isHivemindPsqlCommand(rewritten)) { + return { + action: "guide", + output: "Hivemind SQL mode could not satisfy the query. Rewrite it as a narrower SELECT over memory or sessions.", + rewrittenCommand: rewritten + }; + } } } + if (isHivemindPsqlCommand(rewritten)) { + return { + action: "guide", + output: "Hivemind SQL mode could not satisfy the query. Rewrite it as a narrower SELECT over memory or sessions.", + rewrittenCommand: rewritten + }; + } logFn(`intercepted \u2192 running via virtual shell: ${rewritten}`); const result = runVirtualShellFn(rewritten, shellBundle, logFn); return { @@ -2556,6 +2764,8 @@ if (isDirectRun(import.meta.url)) { }); } export { + buildPsqlOnlyGuidance, + buildPsqlSchemaGuidance, buildUnsupportedGuidance, isSafe, processCodexPreToolUse, diff --git a/codex/bundle/session-start-setup.js b/codex/bundle/session-start-setup.js index e13a5e2..f6d17c7 100755 --- a/codex/bundle/session-start-setup.js +++ b/codex/bundle/session-start-setup.js @@ -299,6 +299,29 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + /** Create the standard BM25 summary index for a memory table. */ + async createSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const indexName = this.buildLookupIndexName(table, "summary_bm25"); + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" USING deeplake_index ("summary")`); + } + /** Ensure the standard BM25 summary index exists, using a local freshness marker to avoid repeated CREATEs. */ + async ensureSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const suffix = "summary_bm25"; + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + try { + await this.createSummaryBm25Index(table); + this.markLookupIndexReady(table, suffix); + } catch (e) { + if (isDuplicateIndexError(e)) { + this.markLookupIndexReady(table, suffix); + return; + } + throw e; + } + } buildLookupIndexName(table, suffix) { return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); } diff --git a/codex/bundle/session-start.js b/codex/bundle/session-start.js index bb3ebd0..9ab2ac7 100755 --- a/codex/bundle/session-start.js +++ b/codex/bundle/session-start.js @@ -66,6 +66,20 @@ function isDirectRun(metaUrl) { } } +// dist/src/utils/retrieval-mode.js +function isSessionsOnlyMode() { + const raw = process.env["HIVEMIND_SESSIONS_ONLY"] ?? process.env["DEEPLAKE_SESSIONS_ONLY"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} +function isIndexDisabled() { + const raw = process.env["HIVEMIND_DISABLE_INDEX"] ?? process.env["DEEPLAKE_DISABLE_INDEX"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} +function isPsqlMode() { + const raw = process.env["HIVEMIND_PSQL_MODE"] ?? process.env["DEEPLAKE_PSQL_MODE"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} + // dist/src/hooks/version-check.js import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync as readFileSync2, writeFileSync as writeFileSync2 } from "node:fs"; import { dirname, join as join3 } from "node:path"; @@ -117,11 +131,90 @@ RELATIONSHIP STATUS INFERENCE: For relationship-status questions, treat explicit Search: grep -r "keyword" ~/.deeplake/memory/ IMPORTANT: Only use bash commands (cat, ls, grep, echo, jq, head, tail, sed, awk, etc.) to interact with ~/.deeplake/memory/. Do NOT use python, python3, node, curl, or other interpreters \u2014 they are not available in the memory filesystem. Do NOT spawn subagents to read deeplake memory.`; +var CODEX_SESSION_START_CONTEXT_SESSIONS_ONLY = `DEEPLAKE MEMORY: Persistent memory at ~/.deeplake/memory/ shared across sessions, users, and agents. + +SESSIONS-ONLY mode is active for benchmark comparison. Available Deeplake recall paths are raw session files under sessions/{author}/*. +Do NOT start with index.md or summaries in this mode, and do NOT assume those paths exist. +Open the most likely session file directly before broader grep variants. +Do NOT probe unrelated local paths such as ~/.claude/projects/, arbitrary home directories, or guessed summary roots for Deeplake recall tasks. +TEMPORAL GROUNDING: If a transcript uses relative time like "last year", "last week", or "next month", resolve it against that session's own date/date_time metadata, not today's date. +TEMPORAL FOLLOW-THROUGH: If a session only gives a relative time, use its date/date_time to convert the final answer into an absolute month/date/year or explicit range before responding. +ANSWER SHAPE: Once you have enough evidence, answer with the smallest exact phrase supported by memory. For identity or relationship questions, use just the noun phrase. For education questions, answer with the likely field or credential directly, not the broader life story. For "when" questions, prefer absolute dates/months/years over relative phrases. Avoid extra biography, explanation, or hedging. +NOT-FOUND BAR: Do NOT answer "not found" until you have checked at least one likely raw session file for the named person. If keyword grep is empty, grep the person's name alone and inspect the candidate session files. +NEGATIVE-EVIDENCE QUESTIONS: For identity, relationship status, and research-topic questions, raw sessions may contain the exact phrase even when broad keyword grep looks sparse. Read the candidate transcript and look for positive clues before concluding the answer is absent. +SELF-LABEL PRIORITY: For identity questions, prefer the person's own explicit self-label from the transcript over broader category descriptions or paraphrases. +RELATIONSHIP STATUS INFERENCE: For relationship-status questions, treat explicit self-descriptions about partnership, dating, marriage, or parenting plans as status evidence. If the transcript strongly supports an unpartnered status, answer with the concise status phrase instead of "not found." +Search: grep -r "keyword" ~/.deeplake/memory/ +IMPORTANT: Only use bash commands (cat, ls, grep, echo, jq, head, tail, sed, awk, etc.) to interact with ~/.deeplake/memory/. Do NOT use python, python3, node, curl, or other interpreters \u2014 they are not available in the memory filesystem. +Do NOT spawn subagents to read deeplake memory.`; +var CODEX_SESSION_START_CONTEXT_NO_INDEX = `DEEPLAKE MEMORY: Persistent memory at ~/.deeplake/memory/ shared across sessions, users, and agents. + +Structure in this mode: summaries/*.md \u2192 sessions/{author}/* (last resort). /index.md is intentionally unavailable, so do NOT read it or rely on it. +Start by grepping summaries for the named person, topic, or keyword. Then open the specific matching summaries. Only read raw session files if summaries do not contain the exact detail. +If a summary points to a likely source session, read that exact raw session before broader grep variants. +Do NOT probe unrelated local paths such as ~/.claude/projects/, arbitrary home directories, or guessed summary roots for Deeplake recall tasks. +TEMPORAL GROUNDING: If a summary or transcript uses relative time like "last year", "last week", or "next month", resolve it against that session's own date/date_time metadata, not today's date. +TEMPORAL FOLLOW-THROUGH: If a summary only gives a relative time, open the linked source session and use its date/date_time to convert the final answer into an absolute month/date/year or explicit range before responding. +ANSWER SHAPE: Once you have enough evidence, answer with the smallest exact phrase supported by memory. For identity or relationship questions, use just the noun phrase. For education questions, answer with the likely field or credential directly, not the broader life story. For "when" questions, prefer absolute dates/months/years over relative phrases. Avoid extra biography, explanation, or hedging. +NOT-FOUND BAR: Do NOT answer "not found" until you have checked at least one likely summary plus one likely raw session file when the summary is ambiguous. If keyword grep is empty, grep the person's name alone and inspect the candidate files. +NEGATIVE-EVIDENCE QUESTIONS: For identity, relationship status, and research-topic questions, summaries may omit the exact phrase. If likely summaries are ambiguous, read the candidate raw session transcript and look for positive clues before concluding the answer is absent. +SELF-LABEL PRIORITY: For identity questions, prefer the person's own explicit self-label from the transcript over broader category descriptions or paraphrases. +RELATIONSHIP STATUS INFERENCE: For relationship-status questions, treat explicit self-descriptions about partnership, dating, marriage, or parenting plans as status evidence. If the transcript strongly supports an unpartnered status, answer with the concise status phrase instead of "not found." +Search: grep -r "keyword" ~/.deeplake/memory/ +IMPORTANT: Only use bash commands (cat, ls, grep, echo, jq, head, tail, sed, awk, etc.) to interact with ~/.deeplake/memory/. Do NOT use python, python3, node, curl, or other interpreters \u2014 they are not available in the memory filesystem. +Do NOT spawn subagents to read deeplake memory.`; +var CODEX_SESSION_START_CONTEXT_PSQL = `DEEPLAKE MEMORY SQL MODE: Use SQL only for Deeplake recall tasks. + +Available tables: +- memory(path, summary, project, description, creation_date, last_update_date) +- sessions_text(path, creation_date, message_text) +- sessions(path, message, creation_date) + +Use this command shape: +- psql -At -F '|' -c "SELECT ..." + +Workflow: +1. Query memory first to identify likely summaries. +2. In the first pass, combine the named person/entity term with one or more topic terms. Prefer narrow AND filters over broad OR filters. +3. Re-query memory by exact path for the small set of summary rows you selected. +4. Query sessions_text by exact path for transcript evidence or unresolved dates. +5. Prefer small targeted SELECTs with ORDER BY and LIMIT 5-10. +6. Do not use filesystem commands, grep, cat, ls, Read, or Glob for recall in this mode. +7. If the first summary query returns 0-3 weak rows or the answer still seems semantically off, retry with BM25 ranking on memory before concluding the data is absent. +8. Use sessions only when you need the raw structured payload; use sessions_text for normal text filtering. + +Good query patterns: +- Candidate summaries: + psql -At -F '|' -c "SELECT path, summary, creation_date FROM memory WHERE summary ILIKE '%%' AND (summary ILIKE '%%' OR summary ILIKE '%%') ORDER BY creation_date DESC LIMIT 5" +- Exact summary reread: + psql -At -F '|' -c "SELECT path, summary FROM memory WHERE path IN ('/summaries/...', '/summaries/...')" +- Transcript grounding by exact path: + psql -At -F '|' -c "SELECT path, creation_date, message_text FROM sessions_text WHERE path IN ('/sessions/...', '/sessions/...') ORDER BY creation_date ASC" +- Transcript search inside known sessions: + psql -At -F '|' -c "SELECT path, creation_date, message_text FROM sessions_text WHERE path IN ('/sessions/...', '/sessions/...') AND message_text ILIKE '%%' ORDER BY creation_date ASC" +- If literal ILIKE retrieval is sparse or semantically weak, retry with BM25 text ranking on summaries: + psql -At -F '|' -c "SELECT path, summary, summary <#> ' ' AS score FROM memory WHERE summary ILIKE '%%' ORDER BY score DESC LIMIT 5" + +Avoid these mistakes: +- Do NOT search person names via path ILIKE. Person names live in summary text, not session paths. +- Do NOT filter sessions.message directly. Use sessions_text.message_text for transcript text search. +- Do NOT blend multiple different events when the question asks about one specific event. Prefer the most direct supporting row. + +Answer rules: +- Return the smallest exact answer supported by the data. +- Resolve relative dates against session metadata, not today's date. +- Do not answer "not found" until you have checked both memory and a likely sessions_text row. +- Preserve direct relative-duration answers when they already match the question. +- Aggregate across the small candidate set before answering profile or list questions. +- For "likely", "would", or profile questions, a concise inference from strong summary evidence is allowed even if the exact final phrase is not quoted verbatim. + +Only psql SELECT queries over memory and sessions are intercepted in this mode. Do NOT use python, python3, node, curl, or filesystem paths for recall in this mode.`; function buildCodexSessionStartContext(args) { const versionNotice = args.currentVersion ? ` Hivemind v${args.currentVersion}` : ""; - return args.creds?.token ? `${CODEX_SESSION_START_CONTEXT} -Logged in to Deeplake as org: ${args.creds.orgName ?? args.creds.orgId} (workspace: ${args.creds.workspaceId ?? "default"})${versionNotice}` : `${CODEX_SESSION_START_CONTEXT} + const template = isPsqlMode() ? CODEX_SESSION_START_CONTEXT_PSQL : isSessionsOnlyMode() ? CODEX_SESSION_START_CONTEXT_SESSIONS_ONLY : isIndexDisabled() ? CODEX_SESSION_START_CONTEXT_NO_INDEX : CODEX_SESSION_START_CONTEXT; + return args.creds?.token ? `${template} +Logged in to Deeplake as org: ${args.creds.orgName ?? args.creds.orgId} (workspace: ${args.creds.workspaceId ?? "default"})${versionNotice}` : `${template} Not logged in to Deeplake. Run: node "${args.authCommand}" login${versionNotice}`; } async function runCodexSessionStartHook(input, deps = {}) { @@ -163,6 +256,9 @@ if (isDirectRun(import.meta.url)) { } export { CODEX_SESSION_START_CONTEXT, + CODEX_SESSION_START_CONTEXT_NO_INDEX, + CODEX_SESSION_START_CONTEXT_PSQL, + CODEX_SESSION_START_CONTEXT_SESSIONS_ONLY, buildCodexSessionStartContext, runCodexSessionStartHook }; diff --git a/codex/bundle/stop.js b/codex/bundle/stop.js index b2da8a8..a149f2e 100755 --- a/codex/bundle/stop.js +++ b/codex/bundle/stop.js @@ -290,6 +290,29 @@ var DeeplakeApi = class { async createIndex(column) { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + /** Create the standard BM25 summary index for a memory table. */ + async createSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const indexName = this.buildLookupIndexName(table, "summary_bm25"); + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" USING deeplake_index ("summary")`); + } + /** Ensure the standard BM25 summary index exists, using a local freshness marker to avoid repeated CREATEs. */ + async ensureSummaryBm25Index(tableName) { + const table = tableName ?? this.tableName; + const suffix = "summary_bm25"; + if (this.hasFreshLookupIndexMarker(table, suffix)) + return; + try { + await this.createSummaryBm25Index(table); + this.markLookupIndexReady(table, suffix); + } catch (e) { + if (isDuplicateIndexError(e)) { + this.markLookupIndexReady(table, suffix); + return; + } + throw e; + } + } buildLookupIndexName(table, suffix) { return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); } @@ -423,7 +446,9 @@ import { writeFileSync as writeFileSync2, mkdirSync as mkdirSync2, appendFileSyn import { homedir as homedir3, tmpdir as tmpdir2 } from "node:os"; var HOME = homedir3(); var WIKI_LOG = join4(HOME, ".codex", "hooks", "deeplake-wiki.log"); -var WIKI_PROMPT_TEMPLATE = `You are building a personal wiki from a coding session. Your goal is to extract every piece of knowledge \u2014 entities, decisions, relationships, and facts \u2014 into a structured, searchable wiki entry. +var WIKI_PROMPT_TEMPLATE = `You are maintaining a persistent wiki from a session transcript. This page will become part of a long-lived knowledge base that future agents will search through index.md before opening the source session. Write for retrieval, not storytelling. + +The session may be a coding session, a meeting, or a personal conversation. Your job is to turn the raw transcript into a dense, factual wiki page that preserves names, dates, relationships, preferences, plans, titles, and exact status changes. SESSION JSONL path: __JSONL__ SUMMARY FILE to write: __SUMMARY__ @@ -437,39 +462,49 @@ Steps: - If PREVIOUS JSONL OFFSET > 0, this is a resumed session. Read the existing summary file first, then focus on lines AFTER the offset for new content. Merge new facts into the existing summary. - If offset is 0, generate from scratch. + - Treat the JSONL as the source of truth. Do not invent facts. 2. Write the summary file at the path above with this EXACT format: # Session __SESSION_ID__ - **Source**: __JSONL_SERVER_PATH__ +- **Date**: +- **Participants**: - **Started**: - **Ended**: - **Project**: __PROJECT__ +- **Topics**: - **JSONL offset**: __JSONL_LINES__ ## What Happened -<2-3 dense sentences. What was the goal, what was accomplished, what's left.> +<2-4 dense sentences. What happened, why it mattered, and what changed. Prefer specific names/titles/dates over abstractions.> + +## Searchable Facts + ## People - + ## Entities - + ## Decisions & Reasoning - - -## Key Facts - + ## Files Modified - + ## Open Questions / TODO - + -IMPORTANT: Be exhaustive. Extract EVERY entity, decision, and fact. +IMPORTANT: +- Be exhaustive. If a detail exists in the session and could answer a later question, it should be in the wiki. +- Favor exact nouns and titles over generic paraphrases. Preserve exact book names, organization names, file names, feature names, and self-descriptions. +- Keep facts canonical and query-friendly: "Ava is single", "Leo's home country is Brazil", "The team chose retries because the API returned 429s". +- Resolve relative dates like "last year" or "next month" against the session's own date when the source makes that possible. If it is ambiguous, keep the relative phrase instead of guessing. +- Do not omit beneficiary groups or targets of goals. PRIVACY: Never include absolute filesystem paths in the summary. LENGTH LIMIT: Keep the total summary under 4000 characters.`; function wikiLog(msg) { diff --git a/codex/bundle/wiki-worker.js b/codex/bundle/wiki-worker.js index 1b596aa..0ec253c 100755 --- a/codex/bundle/wiki-worker.js +++ b/codex/bundle/wiki-worker.js @@ -84,12 +84,60 @@ function releaseLock(sessionId) { // dist/src/hooks/upload-summary.js import { randomUUID } from "node:crypto"; + +// dist/src/utils/summary-format.js +function escapeRegex(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} +function extractSection(text, heading) { + const re = new RegExp(`^## ${escapeRegex(heading)}\\s*\\n([\\s\\S]*?)(?=\\n## |$)`, "m"); + const match = text.match(re); + return match ? match[1].trim() : null; +} +function extractHeaderField(text, field) { + const re = new RegExp(`^- \\*\\*${escapeRegex(field)}\\*\\*:\\s*(.+)$`, "m"); + const match = text.match(re); + return match ? match[1].trim() : null; +} +function compactText(value) { + return value.replace(/\s+/g, " ").trim(); +} +function extractBullets(section, limit = 3) { + if (!section) + return []; + return section.split("\n").map((line) => line.trim()).filter((line) => line.startsWith("- ")).map((line) => compactText(line.slice(2))).filter(Boolean).slice(0, limit); +} +function extractSummaryParticipants(text) { + return extractHeaderField(text, "Participants") ?? extractHeaderField(text, "Speakers"); +} +function extractSummaryTopics(text) { + return extractHeaderField(text, "Topics"); +} +function buildSummaryBlurb(text) { + const participants = extractSummaryParticipants(text); + const topics = extractSummaryTopics(text); + const factBullets = extractBullets(extractSection(text, "Searchable Facts"), 3); + const keyBullets = factBullets.length > 0 ? factBullets : extractBullets(extractSection(text, "Key Facts"), 3); + const whatHappened = compactText(extractSection(text, "What Happened") ?? ""); + const parts = []; + if (participants) + parts.push(participants); + if (topics) + parts.push(topics); + if (keyBullets.length > 0) + parts.push(keyBullets.join("; ")); + if (parts.length === 0 && whatHappened) + parts.push(whatHappened); + const blurb = parts.join(" | ").slice(0, 300).trim(); + return blurb || "completed"; +} + +// dist/src/hooks/upload-summary.js function esc(s) { return s.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, ""); } function extractDescription(text) { - const match = text.match(/## What Happened\n([\s\S]*?)(?=\n##|$)/); - return match ? match[1].trim().slice(0, 300) : "completed"; + return buildSummaryBlurb(text); } async function uploadSummary(query2, params) { const { tableName, vpath, fname, userName, project, agent, text } = params; diff --git a/codex/tests/codex-integration.test.ts b/codex/tests/codex-integration.test.ts index 44b41dd..a74776a 100644 --- a/codex/tests/codex-integration.test.ts +++ b/codex/tests/codex-integration.test.ts @@ -128,6 +128,20 @@ describe("codex integration: session-start", () => { expect(raw).toContain("answer with the smallest exact phrase supported by memory"); expect(raw).toContain("convert the final answer into an absolute month/date/year"); }); + + it("switches to sessions-only recall guidance when the env flag is set", () => { + const raw = runHook("session-start.js", { + session_id: "test-session-004c", + cwd: "/tmp", + hook_event_name: "SessionStart", + model: "gpt-5.2", + }, { + HIVEMIND_SESSIONS_ONLY: "1", + }); + expect(raw).toContain("SESSIONS-ONLY mode"); + expect(raw).toContain("Do NOT start with index.md or summaries"); + expect(raw).not.toContain("index.md (start here)"); + }); }); // ── Capture (UserPromptSubmit) ─────────────────────────────────────────────── diff --git a/src/deeplake-api.ts b/src/deeplake-api.ts index 4b1dfed..0f4a261 100644 --- a/src/deeplake-api.ts +++ b/src/deeplake-api.ts @@ -246,6 +246,30 @@ export class DeeplakeApi { await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); } + /** Create the standard BM25 summary index for a memory table. */ + async createSummaryBm25Index(tableName?: string): Promise { + const table = tableName ?? this.tableName; + const indexName = this.buildLookupIndexName(table, "summary_bm25"); + await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" USING deeplake_index ("summary")`); + } + + /** Ensure the standard BM25 summary index exists, using a local freshness marker to avoid repeated CREATEs. */ + async ensureSummaryBm25Index(tableName?: string): Promise { + const table = tableName ?? this.tableName; + const suffix = "summary_bm25"; + if (this.hasFreshLookupIndexMarker(table, suffix)) return; + try { + await this.createSummaryBm25Index(table); + this.markLookupIndexReady(table, suffix); + } catch (e: any) { + if (isDuplicateIndexError(e)) { + this.markLookupIndexReady(table, suffix); + return; + } + throw e; + } + } + private buildLookupIndexName(table: string, suffix: string): string { return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_"); } diff --git a/src/hooks/bash-command-compiler.ts b/src/hooks/bash-command-compiler.ts index 11cad25..fe09bbd 100644 --- a/src/hooks/bash-command-compiler.ts +++ b/src/hooks/bash-command-compiler.ts @@ -7,6 +7,7 @@ import { readVirtualPathContents, findVirtualPaths, } from "./virtual-table-query.js"; +import { isPsqlMode } from "../utils/retrieval-mode.js"; type VirtualRow = Record; @@ -16,6 +17,7 @@ export type CompiledSegment = | { kind: "ls"; dirs: string[]; longFormat: boolean } | { kind: "find"; dir: string; pattern: string; countOnly: boolean } | { kind: "find_grep"; dir: string; patterns: string[]; params: GrepParams; lineLimit: number } + | { kind: "psql"; query: string; lineLimit: number; tuplesOnly: boolean; fieldSeparator: string } | { kind: "grep"; params: GrepParams; lineLimit: number }; interface ParsedModifier { @@ -241,6 +243,175 @@ function parseFindSpec(tokens: string[]): ParsedFindSpec | null { return patterns.length > 0 ? { patterns, execGrepCmd: null } : null; } +function extractPsqlQuery(tokens: string[]): string | null { + let query: string | null = null; + for (let i = 1; i < tokens.length; i++) { + const token = tokens[i]; + if (token === "-c" || token === "--command") { + query = tokens[i + 1] ?? null; + i += 1; + continue; + } + if (token.startsWith("-c") && token.length > 2) { + query = token.slice(2); + continue; + } + } + return query; +} + +export function extractPsqlQueryFromCommand(cmd: string): string | null { + const tokens = tokenizeShellWords(cmd.trim()); + if (!tokens || tokens[0] !== "psql") return null; + return extractPsqlQuery(tokens); +} + +function normalizeSqlRef(ref: string): string { + return ref.replace(/\s+/g, "").replace(/"/g, "").toLowerCase(); +} + +function extractSqlTableRefs(query: string): string[] { + const refs: string[] = []; + const regex = /\b(?:from|join)\s+((?:"[^"]+"|[a-zA-Z_][a-zA-Z0-9_]*)(?:\s*\.\s*(?:"[^"]+"|[a-zA-Z_][a-zA-Z0-9_]*))?)/gi; + for (const match of query.matchAll(regex)) { + if (match[1]) refs.push(normalizeSqlRef(match[1])); + } + return refs; +} + +export function queryReferencesInterceptedTables(query: string): boolean { + return extractSqlTableRefs(query).some((ref) => + ref === "memory" || + ref === "sessions" || + ref === "sessions_text" || + ref === "hivemind.memory" || + ref === "hivemind.sessions" || + ref === "hivemind.sessions_text"); +} + +export function queryUsesOnlyInterceptedTables(query: string): boolean { + const refs = extractSqlTableRefs(query); + return refs.length > 0 && refs.every((ref) => + ref === "memory" || + ref === "sessions" || + ref === "sessions_text" || + ref === "hivemind.memory" || + ref === "hivemind.sessions" || + ref === "hivemind.sessions_text"); +} + +export function queryUsesBareMemoryTables(query: string): boolean { + return extractSqlTableRefs(query).some((ref) => ref === "memory" || ref === "sessions"); +} + +function parsePsqlSegment(pipeline: string[], tokens: string[]): CompiledSegment | null { + if (tokens[0] !== "psql" || !isPsqlMode()) return null; + const query = extractPsqlQuery(tokens); + let tuplesOnly = false; + let fieldSeparator = "|"; + + for (let i = 1; i < tokens.length; i++) { + const token = tokens[i]; + if (token === "-F" || token === "--field-separator") { + fieldSeparator = tokens[i + 1] ?? fieldSeparator; + i += 1; + continue; + } + if (token.startsWith("-F") && token.length > 2) { + fieldSeparator = token.slice(2); + continue; + } + if (token === "-t" || token === "--tuples-only") { + tuplesOnly = true; + continue; + } + if (token.startsWith("-") && !token.startsWith("--")) { + const shortFlags = token.slice(1); + if (shortFlags.includes("t")) tuplesOnly = true; + continue; + } + } + + if (!query || !queryUsesOnlyInterceptedTables(query)) return null; + + let lineLimit = 0; + if (pipeline.length > 1) { + if (pipeline.length !== 2) return null; + const headStage = pipeline[1].trim(); + if (!isValidPipelineHeadTailStage(headStage)) return null; + const headTail = parseHeadTailStage(headStage); + if (!headTail || headTail.fromEnd) return null; + lineLimit = headTail.lineLimit; + } + + return { kind: "psql", query, lineLimit, tuplesOnly, fieldSeparator }; +} + +function normalizePsqlQuery(query: string, memoryTable: string, sessionsTable: string): string { + let sql = query.trim().replace(/;+\s*$/, ""); + sql = sql + .replace(/\bFROM\s+"?memory"?\b/gi, `FROM "${memoryTable}"`) + .replace(/\bJOIN\s+"?memory"?\b/gi, `JOIN "${memoryTable}"`) + .replace(/\bFROM\s+"?sessions_text"?\b/gi, `FROM "__hivemind_sessions_text"`) + .replace(/\bJOIN\s+"?sessions_text"?\b/gi, `JOIN "__hivemind_sessions_text"`) + .replace(/\bFROM\s+"?sessions"?\b/gi, `FROM "${sessionsTable}"`) + .replace(/\bJOIN\s+"?sessions"?\b/gi, `JOIN "${sessionsTable}"`) + .replace(/\bFROM\s+"?hivemind"?\."?memory"?\b/gi, `FROM "${memoryTable}"`) + .replace(/\bJOIN\s+"?hivemind"?\."?memory"?\b/gi, `JOIN "${memoryTable}"`) + .replace(/\bFROM\s+"?hivemind"?\."?sessions_text"?\b/gi, `FROM "__hivemind_sessions_text"`) + .replace(/\bJOIN\s+"?hivemind"?\."?sessions_text"?\b/gi, `JOIN "__hivemind_sessions_text"`) + .replace(/\bFROM\s+"?hivemind"?\."?sessions"?\b/gi, `FROM "${sessionsTable}"`) + .replace(/\bJOIN\s+"?hivemind"?\."?sessions"?\b/gi, `JOIN "${sessionsTable}"`); + if (/\b__hivemind_sessions_text\b/i.test(sql)) { + const cte = `"__hivemind_sessions_text" AS (SELECT path, creation_date, message::text AS message_text FROM "${sessionsTable}")`; + sql = /^\s*with\b/i.test(sql) + ? sql.replace(/^\s*with\b/i, `WITH ${cte},`) + : `WITH ${cte} ${sql}`; + } + return sql; +} + +function validatePsqlQuery(query: string, memoryTable: string, sessionsTable: string): string { + if (!queryUsesOnlyInterceptedTables(query)) { + throw new Error("psql queries must reference only memory, sessions, sessions_text, hivemind.memory, hivemind.sessions, or hivemind.sessions_text"); + } + const sql = normalizePsqlQuery(query, memoryTable, sessionsTable); + const compact = sql.replace(/\s+/g, " ").trim(); + if (!/^(select|with)\b/i.test(compact)) { + throw new Error("psql mode only supports SELECT queries"); + } + const allowedTables = new Set([memoryTable, sessionsTable, "__hivemind_sessions_text"]); + const tableMatches = [...compact.matchAll(/\b(?:from|join)\s+"?([a-zA-Z_][a-zA-Z0-9_]*)"?/gi)]; + if (tableMatches.length === 0) { + throw new Error("psql query must reference memory or sessions"); + } + for (const match of tableMatches) { + if (!allowedTables.has(match[1])) { + throw new Error(`psql query references unsupported table: ${match[1]}`); + } + } + return sql; +} + +function formatPsqlValue(value: unknown): string { + if (value === null || value === undefined) return ""; + if (typeof value === "string") return value; + if (typeof value === "number" || typeof value === "boolean") return String(value); + return JSON.stringify(value); +} + +function formatPsqlRows( + rows: VirtualRow[], + tuplesOnly: boolean, + fieldSeparator: string, +): string { + if (rows.length === 0) return tuplesOnly ? "" : "(0 rows)"; + const columns = Object.keys(rows[0] ?? {}); + const body = rows.map((row) => columns.map((column) => formatPsqlValue(row[column])).join(fieldSeparator)); + if (tuplesOnly) return body.join("\n"); + return [columns.join(fieldSeparator), ...body].join("\n"); +} + export function parseCompiledSegment(segment: string): CompiledSegment | null { const { clean, ignoreMissing } = stripAllowedModifiers(segment); if (hasUnsupportedRedirection(clean)) return null; @@ -250,6 +421,9 @@ export function parseCompiledSegment(segment: string): CompiledSegment | null { const tokens = tokenizeShellWords(pipeline[0]); if (!tokens || tokens.length === 0) return null; + const psqlSegment = parsePsqlSegment(pipeline, tokens); + if (psqlSegment) return psqlSegment; + if (tokens[0] === "echo" && pipeline.length === 1) { const text = tokens.slice(1).join(" "); return { kind: "echo", text }; @@ -551,6 +725,15 @@ export async function executeCompiledBashCommand( continue; } + if (segment.kind === "psql") { + const sql = validatePsqlQuery(segment.query, memoryTable, sessionsTable); + const rows = await api.query(sql); + const formatted = formatPsqlRows(rows, segment.tuplesOnly, segment.fieldSeparator); + const limited = segment.lineLimit > 0 ? formatted.split("\n").slice(0, segment.lineLimit).join("\n") : formatted; + outputs.push(limited); + continue; + } + if (segment.kind === "grep") { const result = await handleGrepDirectFn(api, memoryTable, sessionsTable, segment.params); if (result === null) return null; diff --git a/src/hooks/codex/pre-tool-use.ts b/src/hooks/codex/pre-tool-use.ts index 51cc5ee..5dda0f7 100644 --- a/src/hooks/codex/pre-tool-use.ts +++ b/src/hooks/codex/pre-tool-use.ts @@ -22,7 +22,12 @@ import { loadConfig } from "../../config.js"; import { DeeplakeApi } from "../../deeplake-api.js"; import { sqlLike } from "../../utils/sql.js"; import { parseBashGrep, handleGrepDirect } from "../grep-direct.js"; -import { executeCompiledBashCommand } from "../bash-command-compiler.js"; +import { + executeCompiledBashCommand, + extractPsqlQueryFromCommand, + queryReferencesInterceptedTables, + queryUsesOnlyInterceptedTables, +} from "../bash-command-compiler.js"; import { findVirtualPaths, readVirtualPathContents, @@ -37,10 +42,26 @@ import { import { log as _log } from "../../utils/debug.js"; import { isDirectRun } from "../../utils/direct-run.js"; import { isSafe, touchesMemory, rewritePaths } from "../memory-path-utils.js"; -import { isIndexDisabled, isSessionsOnlyMode } from "../../utils/retrieval-mode.js"; +import { isIndexDisabled, isPsqlMode, isSessionsOnlyMode } from "../../utils/retrieval-mode.js"; export { isSafe, touchesMemory, rewritePaths }; +function isAnyPsqlCommand(cmd: string): boolean { + return /^\s*psql\b/.test(cmd.trim()); +} + +function isHivemindPsqlCommand(cmd: string): boolean { + if (!isPsqlMode()) return false; + const query = extractPsqlQueryFromCommand(cmd); + return !!query && queryUsesOnlyInterceptedTables(query); +} + +function needsHivemindPsqlRewrite(cmd: string): boolean { + if (!isPsqlMode() || !isAnyPsqlCommand(cmd)) return false; + const query = extractPsqlQueryFromCommand(cmd); + return !!query && queryReferencesInterceptedTables(query) && !queryUsesOnlyInterceptedTables(query); +} + const log = (msg: string) => _log("codex-pre", msg); const __bundleDir = dirname(fileURLToPath(import.meta.url)); @@ -67,11 +88,22 @@ export interface CodexPreToolDecision { export function buildUnsupportedGuidance(): string { return "This command is not supported for ~/.deeplake/memory/ operations. " + - "Only bash builtins are available: cat, ls, grep, echo, jq, head, tail, sed, awk, wc, sort, find, etc. " + + "Only bash builtins are available, plus benchmark SQL mode via psql -At -F '|' -c \"SELECT ...\". " + "Do NOT use python, python3, node, curl, or other interpreters. " + "Rewrite your command using only bash tools and retry."; } +export function buildPsqlOnlyGuidance(): string { + return "Hivemind recall is SQL-only in this mode. " + + "Use psql with the memory and sessions tables only. " + + "Do NOT use grep, cat, ls, Read, Glob, or filesystem paths for memory lookups."; +} + +export function buildPsqlSchemaGuidance(): string { + return "Only psql SELECT queries over memory and sessions are intercepted in SQL mode. " + + "Rewrite the query to reference only those tables with normal psql SELECT syntax."; +} + export function runVirtualShell(cmd: string, shellBundle = SHELL_BUNDLE, logFn: (msg: string) => void = log): string { try { return execFileSync("node", [shellBundle, "-c", cmd], { @@ -131,11 +163,30 @@ export async function processCodexPreToolUse( const cmd = input.tool_input?.command ?? ""; logFn(`hook fired: cmd=${cmd}`); - if (!touchesMemory(cmd)) return { action: "pass" }; + if (!touchesMemory(cmd) && !isAnyPsqlCommand(cmd)) return { action: "pass" }; + + if (isAnyPsqlCommand(cmd) && !isHivemindPsqlCommand(cmd)) { + if (needsHivemindPsqlRewrite(cmd)) { + return { + action: "guide", + output: buildPsqlSchemaGuidance(), + rewrittenCommand: cmd.trim(), + }; + } + return { action: "pass" }; + } - const rewritten = rewritePaths(cmd); + if (isPsqlMode() && touchesMemory(cmd)) { + return { + action: "guide", + output: buildPsqlOnlyGuidance(), + rewrittenCommand: cmd.trim(), + }; + } + + const rewritten = isHivemindPsqlCommand(cmd) ? cmd.trim() : rewritePaths(cmd); if (!isSafe(rewritten)) { - const guidance = buildUnsupportedGuidance(); + const guidance = isPsqlMode() ? buildPsqlOnlyGuidance() : buildUnsupportedGuidance(); logFn(`unsupported command, returning guidance: ${rewritten}`); return { action: "guide", @@ -144,6 +195,14 @@ export async function processCodexPreToolUse( }; } + if (isHivemindPsqlCommand(rewritten) && !config) { + return { + action: "guide", + output: "Hivemind SQL mode is unavailable because Deeplake credentials are missing.", + rewrittenCommand: rewritten, + }; + } + if (config) { const table = process.env["HIVEMIND_TABLE"] ?? "memory"; const sessionsTable = process.env["HIVEMIND_SESSIONS_TABLE"] ?? "sessions"; @@ -337,9 +396,24 @@ export async function processCodexPreToolUse( } } catch (e: any) { logFn(`direct query failed, falling back to shell: ${e.message}`); + if (isHivemindPsqlCommand(rewritten)) { + return { + action: "guide", + output: "Hivemind SQL mode could not satisfy the query. Rewrite it as a narrower SELECT over memory or sessions.", + rewrittenCommand: rewritten, + }; + } } } + if (isHivemindPsqlCommand(rewritten)) { + return { + action: "guide", + output: "Hivemind SQL mode could not satisfy the query. Rewrite it as a narrower SELECT over memory or sessions.", + rewrittenCommand: rewritten, + }; + } + logFn(`intercepted → running via virtual shell: ${rewritten}`); const result = runVirtualShellFn(rewritten, shellBundle, logFn); return { diff --git a/src/hooks/codex/session-start.ts b/src/hooks/codex/session-start.ts index 3873a3b..30c18e1 100644 --- a/src/hooks/codex/session-start.ts +++ b/src/hooks/codex/session-start.ts @@ -14,6 +14,7 @@ import { loadCredentials } from "../../commands/auth.js"; import { readStdin } from "../../utils/stdin.js"; import { log as _log } from "../../utils/debug.js"; import { isDirectRun } from "../../utils/direct-run.js"; +import { isIndexDisabled, isPsqlMode, isSessionsOnlyMode } from "../../utils/retrieval-mode.js"; import { getInstalledVersion } from "../version-check.js"; const log = (msg: string) => _log("codex-session-start", msg); @@ -38,6 +39,87 @@ Search: grep -r "keyword" ~/.deeplake/memory/ IMPORTANT: Only use bash commands (cat, ls, grep, echo, jq, head, tail, sed, awk, etc.) to interact with ~/.deeplake/memory/. Do NOT use python, python3, node, curl, or other interpreters — they are not available in the memory filesystem. Do NOT spawn subagents to read deeplake memory.`; +export const CODEX_SESSION_START_CONTEXT_SESSIONS_ONLY = `DEEPLAKE MEMORY: Persistent memory at ~/.deeplake/memory/ shared across sessions, users, and agents. + +SESSIONS-ONLY mode is active for benchmark comparison. Available Deeplake recall paths are raw session files under sessions/{author}/*. +Do NOT start with index.md or summaries in this mode, and do NOT assume those paths exist. +Open the most likely session file directly before broader grep variants. +Do NOT probe unrelated local paths such as ~/.claude/projects/, arbitrary home directories, or guessed summary roots for Deeplake recall tasks. +TEMPORAL GROUNDING: If a transcript uses relative time like "last year", "last week", or "next month", resolve it against that session's own date/date_time metadata, not today's date. +TEMPORAL FOLLOW-THROUGH: If a session only gives a relative time, use its date/date_time to convert the final answer into an absolute month/date/year or explicit range before responding. +ANSWER SHAPE: Once you have enough evidence, answer with the smallest exact phrase supported by memory. For identity or relationship questions, use just the noun phrase. For education questions, answer with the likely field or credential directly, not the broader life story. For "when" questions, prefer absolute dates/months/years over relative phrases. Avoid extra biography, explanation, or hedging. +NOT-FOUND BAR: Do NOT answer "not found" until you have checked at least one likely raw session file for the named person. If keyword grep is empty, grep the person's name alone and inspect the candidate session files. +NEGATIVE-EVIDENCE QUESTIONS: For identity, relationship status, and research-topic questions, raw sessions may contain the exact phrase even when broad keyword grep looks sparse. Read the candidate transcript and look for positive clues before concluding the answer is absent. +SELF-LABEL PRIORITY: For identity questions, prefer the person's own explicit self-label from the transcript over broader category descriptions or paraphrases. +RELATIONSHIP STATUS INFERENCE: For relationship-status questions, treat explicit self-descriptions about partnership, dating, marriage, or parenting plans as status evidence. If the transcript strongly supports an unpartnered status, answer with the concise status phrase instead of "not found." +Search: grep -r "keyword" ~/.deeplake/memory/ +IMPORTANT: Only use bash commands (cat, ls, grep, echo, jq, head, tail, sed, awk, etc.) to interact with ~/.deeplake/memory/. Do NOT use python, python3, node, curl, or other interpreters — they are not available in the memory filesystem. +Do NOT spawn subagents to read deeplake memory.`; + +export const CODEX_SESSION_START_CONTEXT_NO_INDEX = `DEEPLAKE MEMORY: Persistent memory at ~/.deeplake/memory/ shared across sessions, users, and agents. + +Structure in this mode: summaries/*.md → sessions/{author}/* (last resort). /index.md is intentionally unavailable, so do NOT read it or rely on it. +Start by grepping summaries for the named person, topic, or keyword. Then open the specific matching summaries. Only read raw session files if summaries do not contain the exact detail. +If a summary points to a likely source session, read that exact raw session before broader grep variants. +Do NOT probe unrelated local paths such as ~/.claude/projects/, arbitrary home directories, or guessed summary roots for Deeplake recall tasks. +TEMPORAL GROUNDING: If a summary or transcript uses relative time like "last year", "last week", or "next month", resolve it against that session's own date/date_time metadata, not today's date. +TEMPORAL FOLLOW-THROUGH: If a summary only gives a relative time, open the linked source session and use its date/date_time to convert the final answer into an absolute month/date/year or explicit range before responding. +ANSWER SHAPE: Once you have enough evidence, answer with the smallest exact phrase supported by memory. For identity or relationship questions, use just the noun phrase. For education questions, answer with the likely field or credential directly, not the broader life story. For "when" questions, prefer absolute dates/months/years over relative phrases. Avoid extra biography, explanation, or hedging. +NOT-FOUND BAR: Do NOT answer "not found" until you have checked at least one likely summary plus one likely raw session file when the summary is ambiguous. If keyword grep is empty, grep the person's name alone and inspect the candidate files. +NEGATIVE-EVIDENCE QUESTIONS: For identity, relationship status, and research-topic questions, summaries may omit the exact phrase. If likely summaries are ambiguous, read the candidate raw session transcript and look for positive clues before concluding the answer is absent. +SELF-LABEL PRIORITY: For identity questions, prefer the person's own explicit self-label from the transcript over broader category descriptions or paraphrases. +RELATIONSHIP STATUS INFERENCE: For relationship-status questions, treat explicit self-descriptions about partnership, dating, marriage, or parenting plans as status evidence. If the transcript strongly supports an unpartnered status, answer with the concise status phrase instead of "not found." +Search: grep -r "keyword" ~/.deeplake/memory/ +IMPORTANT: Only use bash commands (cat, ls, grep, echo, jq, head, tail, sed, awk, etc.) to interact with ~/.deeplake/memory/. Do NOT use python, python3, node, curl, or other interpreters — they are not available in the memory filesystem. +Do NOT spawn subagents to read deeplake memory.`; + +export const CODEX_SESSION_START_CONTEXT_PSQL = `DEEPLAKE MEMORY SQL MODE: Use SQL only for Deeplake recall tasks. + +Available tables: +- memory(path, summary, project, description, creation_date, last_update_date) +- sessions_text(path, creation_date, message_text) +- sessions(path, message, creation_date) + +Use this command shape: +- psql -At -F '|' -c "SELECT ..." + +Workflow: +1. Query memory first to identify likely summaries. +2. In the first pass, combine the named person/entity term with one or more topic terms. Prefer narrow AND filters over broad OR filters. +3. Re-query memory by exact path for the small set of summary rows you selected. +4. Query sessions_text by exact path for transcript evidence or unresolved dates. +5. Prefer small targeted SELECTs with ORDER BY and LIMIT 5-10. +6. Do not use filesystem commands, grep, cat, ls, Read, or Glob for recall in this mode. +7. If the first summary query returns 0-3 weak rows or the answer still seems semantically off, retry with BM25 ranking on memory before concluding the data is absent. +8. Use sessions only when you need the raw structured payload; use sessions_text for normal text filtering. + +Good query patterns: +- Candidate summaries: + psql -At -F '|' -c "SELECT path, summary, creation_date FROM memory WHERE summary ILIKE '%%' AND (summary ILIKE '%%' OR summary ILIKE '%%') ORDER BY creation_date DESC LIMIT 5" +- Exact summary reread: + psql -At -F '|' -c "SELECT path, summary FROM memory WHERE path IN ('/summaries/...', '/summaries/...')" +- Transcript grounding by exact path: + psql -At -F '|' -c "SELECT path, creation_date, message_text FROM sessions_text WHERE path IN ('/sessions/...', '/sessions/...') ORDER BY creation_date ASC" +- Transcript search inside known sessions: + psql -At -F '|' -c "SELECT path, creation_date, message_text FROM sessions_text WHERE path IN ('/sessions/...', '/sessions/...') AND message_text ILIKE '%%' ORDER BY creation_date ASC" +- If literal ILIKE retrieval is sparse or semantically weak, retry with BM25 text ranking on summaries: + psql -At -F '|' -c "SELECT path, summary, summary <#> ' ' AS score FROM memory WHERE summary ILIKE '%%' ORDER BY score DESC LIMIT 5" + +Avoid these mistakes: +- Do NOT search person names via path ILIKE. Person names live in summary text, not session paths. +- Do NOT filter sessions.message directly. Use sessions_text.message_text for transcript text search. +- Do NOT blend multiple different events when the question asks about one specific event. Prefer the most direct supporting row. + +Answer rules: +- Return the smallest exact answer supported by the data. +- Resolve relative dates against session metadata, not today's date. +- Do not answer "not found" until you have checked both memory and a likely sessions_text row. +- Preserve direct relative-duration answers when they already match the question. +- Aggregate across the small candidate set before answering profile or list questions. +- For "likely", "would", or profile questions, a concise inference from strong summary evidence is allowed even if the exact final phrase is not quoted verbatim. + +Only psql SELECT queries over memory and sessions are intercepted in this mode. Do NOT use python, python3, node, curl, or filesystem paths for recall in this mode.`; + export interface CodexSessionStartInput { session_id: string; transcript_path?: string | null; @@ -53,9 +135,16 @@ export function buildCodexSessionStartContext(args: { authCommand: string; }): string { const versionNotice = args.currentVersion ? `\nHivemind v${args.currentVersion}` : ""; + const template = isPsqlMode() + ? CODEX_SESSION_START_CONTEXT_PSQL + : isSessionsOnlyMode() + ? CODEX_SESSION_START_CONTEXT_SESSIONS_ONLY + : isIndexDisabled() + ? CODEX_SESSION_START_CONTEXT_NO_INDEX + : CODEX_SESSION_START_CONTEXT; return args.creds?.token - ? `${CODEX_SESSION_START_CONTEXT}\nLogged in to Deeplake as org: ${args.creds.orgName ?? args.creds.orgId} (workspace: ${args.creds.workspaceId ?? "default"})${versionNotice}` - : `${CODEX_SESSION_START_CONTEXT}\nNot logged in to Deeplake. Run: node "${args.authCommand}" login${versionNotice}`; + ? `${template}\nLogged in to Deeplake as org: ${args.creds.orgName ?? args.creds.orgId} (workspace: ${args.creds.workspaceId ?? "default"})${versionNotice}` + : `${template}\nNot logged in to Deeplake. Run: node "${args.authCommand}" login${versionNotice}`; } interface CodexSessionStartDeps { diff --git a/src/hooks/codex/spawn-wiki-worker.ts b/src/hooks/codex/spawn-wiki-worker.ts index d7c57e1..0db3f15 100644 --- a/src/hooks/codex/spawn-wiki-worker.ts +++ b/src/hooks/codex/spawn-wiki-worker.ts @@ -13,7 +13,9 @@ import type { Config } from "../../config.js"; const HOME = homedir(); export const WIKI_LOG = join(HOME, ".codex", "hooks", "deeplake-wiki.log"); -export const WIKI_PROMPT_TEMPLATE = `You are building a personal wiki from a coding session. Your goal is to extract every piece of knowledge — entities, decisions, relationships, and facts — into a structured, searchable wiki entry. +export const WIKI_PROMPT_TEMPLATE = `You are maintaining a persistent wiki from a session transcript. This page will become part of a long-lived knowledge base that future agents will search through index.md before opening the source session. Write for retrieval, not storytelling. + +The session may be a coding session, a meeting, or a personal conversation. Your job is to turn the raw transcript into a dense, factual wiki page that preserves names, dates, relationships, preferences, plans, titles, and exact status changes. SESSION JSONL path: __JSONL__ SUMMARY FILE to write: __SUMMARY__ @@ -27,39 +29,49 @@ Steps: - If PREVIOUS JSONL OFFSET > 0, this is a resumed session. Read the existing summary file first, then focus on lines AFTER the offset for new content. Merge new facts into the existing summary. - If offset is 0, generate from scratch. + - Treat the JSONL as the source of truth. Do not invent facts. 2. Write the summary file at the path above with this EXACT format: # Session __SESSION_ID__ - **Source**: __JSONL_SERVER_PATH__ +- **Date**: +- **Participants**: - **Started**: - **Ended**: - **Project**: __PROJECT__ +- **Topics**: - **JSONL offset**: __JSONL_LINES__ ## What Happened -<2-3 dense sentences. What was the goal, what was accomplished, what's left.> +<2-4 dense sentences. What happened, why it mattered, and what changed. Prefer specific names/titles/dates over abstractions.> + +## Searchable Facts + ## People - + ## Entities - + ## Decisions & Reasoning - - -## Key Facts - + ## Files Modified - + ## Open Questions / TODO - - -IMPORTANT: Be exhaustive. Extract EVERY entity, decision, and fact. + + +IMPORTANT: +- Be exhaustive. If a detail exists in the session and could answer a later question, it should be in the wiki. +- Favor exact nouns and titles over generic paraphrases. Preserve exact book names, organization names, file names, feature names, and self-descriptions. +- Keep facts canonical and query-friendly: "Ava is single", "Leo's home country is Brazil", "The team chose retries because the API returned 429s". +- Resolve relative dates like "last year" or "next month" against the session's own date when the source makes that possible. If it is ambiguous, keep the relative phrase instead of guessing. +- Do not omit beneficiary groups or targets of goals. PRIVACY: Never include absolute filesystem paths in the summary. LENGTH LIMIT: Keep the total summary under 4000 characters.`; diff --git a/src/hooks/memory-path-utils.ts b/src/hooks/memory-path-utils.ts index 0650b63..2506b10 100644 --- a/src/hooks/memory-path-utils.ts +++ b/src/hooks/memory-path-utils.ts @@ -15,6 +15,7 @@ export const SAFE_BUILTINS = new Set([ "jq", "yq", "xan", "base64", "od", "tar", "gzip", "gunzip", "zcat", "md5sum", "sha1sum", "sha256sum", + "psql", "echo", "printf", "tee", "pwd", "cd", "basename", "dirname", "env", "printenv", "hostname", "whoami", "date", "seq", "expr", "sleep", "timeout", "time", "true", "false", "test", diff --git a/src/hooks/pre-tool-use.ts b/src/hooks/pre-tool-use.ts index 3fbe58a..961a8dd 100644 --- a/src/hooks/pre-tool-use.ts +++ b/src/hooks/pre-tool-use.ts @@ -10,7 +10,12 @@ import { sqlLike } from "../utils/sql.js"; import { log as _log } from "../utils/debug.js"; import { isDirectRun } from "../utils/direct-run.js"; import { type GrepParams, parseBashGrep, handleGrepDirect } from "./grep-direct.js"; -import { executeCompiledBashCommand } from "./bash-command-compiler.js"; +import { + executeCompiledBashCommand, + extractPsqlQueryFromCommand, + queryReferencesInterceptedTables, + queryUsesOnlyInterceptedTables, +} from "./bash-command-compiler.js"; import { findVirtualPaths, readVirtualPathContents, @@ -23,10 +28,37 @@ import { writeCachedIndexContent, } from "./query-cache.js"; import { isSafe, touchesMemory, rewritePaths } from "./memory-path-utils.js"; -import { isIndexDisabled, isSessionsOnlyMode } from "../utils/retrieval-mode.js"; +import { isIndexDisabled, isPsqlMode, isSessionsOnlyMode } from "../utils/retrieval-mode.js"; export { isSafe, touchesMemory, rewritePaths }; +function isAnyPsqlCommand(cmd: string): boolean { + return /^\s*psql\b/.test(cmd.trim()); +} + +function isHivemindPsqlCommand(cmd: string): boolean { + if (!isPsqlMode()) return false; + const query = extractPsqlQueryFromCommand(cmd); + return !!query && queryUsesOnlyInterceptedTables(query); +} + +function needsHivemindPsqlRewrite(cmd: string): boolean { + if (!isPsqlMode() || !isAnyPsqlCommand(cmd)) return false; + const query = extractPsqlQueryFromCommand(cmd); + return !!query && queryReferencesInterceptedTables(query) && !queryUsesOnlyInterceptedTables(query); +} + +function buildPsqlOnlyGuidance(): string { + return "[RETRY REQUIRED] Hivemind recall is SQL-only in this mode. " + + "Use psql with the memory and sessions tables only. " + + "Do NOT use grep, cat, ls, Read, Glob, or filesystem paths for memory lookups."; +} + +function buildPsqlSchemaGuidance(): string { + return "[RETRY REQUIRED] Only psql SELECT queries over memory and sessions are intercepted in SQL mode. " + + "Rewrite the query to reference only those tables with normal psql SELECT syntax."; +} + const log = (msg: string) => _log("pre", msg); const __bundleDir = dirname(fileURLToPath(import.meta.url)); @@ -62,6 +94,7 @@ export function getShellCommand(toolName: string, toolInput: Record _log("query-cache", msg); const DEFAULT_CACHE_ROOT = join(homedir(), ".deeplake", "query-cache"); const INDEX_CACHE_FILE = "index.md"; +const INDEX_CACHE_TTL_MS = 15 * 60 * 1000; interface QueryCacheDeps { cacheRoot?: string; @@ -29,7 +30,13 @@ export function clearSessionQueryCache(sessionId: string, deps: QueryCacheDeps = export function readCachedIndexContent(sessionId: string, deps: QueryCacheDeps = {}): string | null { const { logFn = log } = deps; try { - return readFileSync(join(getSessionQueryCacheDir(sessionId, deps), INDEX_CACHE_FILE), "utf-8"); + const cachePath = join(getSessionQueryCacheDir(sessionId, deps), INDEX_CACHE_FILE); + const stats = statSync(cachePath); + if ((Date.now() - stats.mtimeMs) > INDEX_CACHE_TTL_MS) { + clearSessionQueryCache(sessionId, deps); + return null; + } + return readFileSync(cachePath, "utf-8"); } catch (e: any) { if (e?.code === "ENOENT") return null; logFn(`read failed for session=${sessionId}: ${e.message}`); diff --git a/src/hooks/session-start.ts b/src/hooks/session-start.ts index e3ec180..f39d263 100644 --- a/src/hooks/session-start.ts +++ b/src/hooks/session-start.ts @@ -15,6 +15,7 @@ import { loadCredentials, saveCredentials } from "../commands/auth.js"; import { readStdin } from "../utils/stdin.js"; import { log as _log } from "../utils/debug.js"; import { isDirectRun } from "../utils/direct-run.js"; +import { isIndexDisabled, isPsqlMode, isSessionsOnlyMode } from "../utils/retrieval-mode.js"; import { DEFAULT_VERSION_CACHE_TTL_MS, getInstalledVersion, @@ -68,6 +69,133 @@ LIMITS: Do NOT spawn subagents to read deeplake memory. If a file returns empty Debugging: Set HIVEMIND_DEBUG=1 to enable verbose logging to ~/.deeplake/hook-debug.log`; +export const CLAUDE_SESSION_START_CONTEXT_SESSIONS_ONLY = `DEEPLAKE MEMORY: You have TWO memory sources. ALWAYS check BOTH when the user asks you to recall, remember, or look up ANY information: + +1. Your built-in memory (~/.claude/) — personal per-project notes +2. Deeplake global memory (~/.deeplake/memory/) — shared org memory, currently exposed in SESSIONS-ONLY mode for benchmark comparison + +Deeplake memory structure available in this mode: +- ~/.deeplake/memory/sessions/{author}/* — raw session data + +SEARCH STRATEGY: Search raw session files directly. In this mode, do NOT start with index.md or summaries and do NOT assume those paths exist. +Open the most likely session file directly before broadening into synonym greps or wide exploratory scans. +Do NOT probe unrelated local paths such as ~/.claude/projects/, arbitrary home directories, or guessed summary roots when the question is about Deeplake memory. +TEMPORAL GROUNDING: If a transcript uses relative time like "last year", "last week", or "next month", resolve it against that session's own date/date_time metadata, not today's date. +TEMPORAL FOLLOW-THROUGH: If a session only gives a relative time, use that session's date/date_time to convert the final answer into an absolute month/date/year or explicit range before responding. +ANSWER SHAPE: Once you have enough evidence, answer with the smallest exact phrase supported by memory. For identity or relationship questions, use just the noun phrase. For education questions, answer with the likely field or credential directly, not the broader life story. For "when" questions, prefer absolute dates/months/years over relative phrases. Avoid extra biography, explanation, or hedging. +NOT-FOUND BAR: Do NOT answer "not found" until you have checked at least one likely raw session file for the named person. If keyword grep is empty, grep the person's name alone and inspect the candidate session files. +NEGATIVE-EVIDENCE QUESTIONS: For identity, relationship status, and research-topic questions, raw sessions may contain the exact phrase even when broad keyword grep looks sparse. Read the candidate transcript and look for positive clues before concluding the answer is absent. +SELF-LABEL PRIORITY: For identity questions, prefer the person's own explicit self-label from the transcript over broader category descriptions or paraphrases. +RELATIONSHIP STATUS INFERENCE: For relationship-status questions, treat explicit self-descriptions about partnership, dating, marriage, or parenting plans as status evidence. If the transcript strongly supports an unpartnered status, answer with the concise status phrase instead of "not found." + +Search command: Grep pattern="keyword" path="~/.deeplake/memory" + +Organization management — each argument is SEPARATE (do NOT quote subcommands together): +- node "HIVEMIND_AUTH_CMD" login — SSO login +- node "HIVEMIND_AUTH_CMD" whoami — show current user/org +- node "HIVEMIND_AUTH_CMD" org list — list organizations +- node "HIVEMIND_AUTH_CMD" org switch — switch organization +- node "HIVEMIND_AUTH_CMD" workspaces — list workspaces +- node "HIVEMIND_AUTH_CMD" workspace — switch workspace +- node "HIVEMIND_AUTH_CMD" invite — invite member (ALWAYS ask user which role before inviting) +- node "HIVEMIND_AUTH_CMD" members — list members +- node "HIVEMIND_AUTH_CMD" remove — remove member + +IMPORTANT: Only use bash commands (cat, ls, grep, echo, jq, head, tail, etc.) to interact with ~/.deeplake/memory/. Do NOT use python, python3, node, curl, or other interpreters — they are not available in the memory filesystem. If a task seems to require Python, rewrite it using bash commands and standard text-processing tools (awk, sed, jq, grep, etc.). + +LIMITS: Do NOT spawn subagents to read deeplake memory. If a file returns empty after 2 attempts, skip it and move on. Report what you found rather than exhaustively retrying. + +Debugging: Set HIVEMIND_DEBUG=1 to enable verbose logging to ~/.deeplake/hook-debug.log`; + +export const CLAUDE_SESSION_START_CONTEXT_NO_INDEX = `DEEPLAKE MEMORY: You have TWO memory sources. ALWAYS check BOTH when the user asks you to recall, remember, or look up ANY information: + +1. Your built-in memory (~/.claude/) — personal per-project notes +2. Deeplake global memory (~/.deeplake/memory/) — global memory shared across all sessions, users, and agents in the org + +Deeplake memory structure in this mode: +- ~/.deeplake/memory/summaries/username/*.md — AI-generated wiki summaries per session +- ~/.deeplake/memory/sessions/{author}/* — raw session data (last resort) + +INDEX MODE: /index.md is intentionally unavailable for this run. Do NOT try to read it or rely on it. +SEARCH STRATEGY: Start by grepping summaries for the named person, topic, or keyword. Then read the specific matching summaries. Only read raw session files if the summaries don't have enough detail. Do NOT jump straight to raw session files. +If a summary points to a likely source session, open that exact raw session before broadening into synonym greps or wide exploratory scans. +Do NOT probe unrelated local paths such as ~/.claude/projects/, arbitrary home directories, or guessed summary roots when the question is about Deeplake memory. +TEMPORAL GROUNDING: If a summary or transcript uses relative time like "last year", "last week", or "next month", resolve it against that session's own date/date_time metadata, not today's date. +TEMPORAL FOLLOW-THROUGH: If a summary only gives a relative time, open the linked source session and use its date/date_time to convert the final answer into an absolute month/date/year or explicit range before responding. +ANSWER SHAPE: Once you have enough evidence, answer with the smallest exact phrase supported by memory. For identity or relationship questions, use just the noun phrase. For education questions, answer with the likely field or credential directly, not the broader life story. For "when" questions, prefer absolute dates/months/years over relative phrases. Avoid extra biography, explanation, or hedging. +NOT-FOUND BAR: Do NOT answer "not found" until you have checked at least one likely summary plus one likely raw session file for the named person when the summary is ambiguous. If keyword grep is empty, grep the person's name alone and inspect the candidate files. +NEGATIVE-EVIDENCE QUESTIONS: For identity, relationship status, and research-topic questions, summaries may omit the exact phrase. If likely summaries are ambiguous, read the candidate raw session transcript and look for positive clues before concluding the answer is absent. +SELF-LABEL PRIORITY: For identity questions, prefer the person's own explicit self-label from the transcript over broader category descriptions or paraphrases. +RELATIONSHIP STATUS INFERENCE: For relationship-status questions, treat explicit self-descriptions about partnership, dating, marriage, or parenting plans as status evidence. If the transcript strongly supports an unpartnered status, answer with the concise status phrase instead of "not found." + +Search command: Grep pattern="keyword" path="~/.deeplake/memory" + +Organization management — each argument is SEPARATE (do NOT quote subcommands together): +- node "HIVEMIND_AUTH_CMD" login — SSO login +- node "HIVEMIND_AUTH_CMD" whoami — show current user/org +- node "HIVEMIND_AUTH_CMD" org list — list organizations +- node "HIVEMIND_AUTH_CMD" org switch — switch organization +- node "HIVEMIND_AUTH_CMD" workspaces — list workspaces +- node "HIVEMIND_AUTH_CMD" workspace — switch workspace +- node "HIVEMIND_AUTH_CMD" invite — invite member (ALWAYS ask user which role before inviting) +- node "HIVEMIND_AUTH_CMD" members — list members +- node "HIVEMIND_AUTH_CMD" remove — remove member + +IMPORTANT: Only use bash commands (cat, ls, grep, echo, jq, head, tail, etc.) to interact with ~/.deeplake/memory/. Do NOT use python, python3, node, curl, or other interpreters — they are not available in the memory filesystem. If a task seems to require Python, rewrite it using bash commands and standard text-processing tools (awk, sed, jq, grep, etc.). + +LIMITS: Do NOT spawn subagents to read deeplake memory. If a file returns empty after 2 attempts, skip it and move on. Report what you found rather than exhaustively retrying. + +Debugging: Set HIVEMIND_DEBUG=1 to enable verbose logging to ~/.deeplake/hook-debug.log`; + +export const CLAUDE_SESSION_START_CONTEXT_PSQL = `DEEPLAKE MEMORY SQL MODE: For this run, use SQL only when answering recall questions. + +Available Deeplake tables: +- memory(path, summary, project, description, creation_date, last_update_date) +- sessions_text(path, creation_date, message_text) +- sessions(path, message, creation_date) + +Use this command shape: +- psql -At -F '|' -c "SELECT ..." + +SQL strategy: +1. Start with targeted SELECTs against memory to find likely summaries. +2. In the first pass, combine the named person/entity term with one or more topic terms. Prefer narrow AND filters over broad OR filters. +3. After finding candidate summary rows, re-query memory by exact path to inspect only those summaries. +4. If the answer needs exact wording, exact dates, or transcript grounding, query sessions_text by exact path for those candidate sessions. +5. Prefer precise WHERE filters, ORDER BY creation_date/last_update_date, and LIMIT 5-10. +6. Do not use filesystem commands, grep, cat, ls, Read, or Glob for recall in this mode. +7. If the first summary query returns 0-3 weak rows or the answer still seems semantically off, retry with BM25 ranking on memory before concluding the data is absent. +8. Use sessions only when you need the raw structured payload; use sessions_text for normal text filtering. + +Good query patterns: +- Candidate summaries: + psql -At -F '|' -c "SELECT path, summary, creation_date FROM memory WHERE summary ILIKE '%%' AND (summary ILIKE '%%' OR summary ILIKE '%%') ORDER BY creation_date DESC LIMIT 5" +- Exact summary reread: + psql -At -F '|' -c "SELECT path, summary FROM memory WHERE path IN ('/summaries/...', '/summaries/...')" +- Transcript grounding by exact path: + psql -At -F '|' -c "SELECT path, creation_date, message_text FROM sessions_text WHERE path IN ('/sessions/...', '/sessions/...') ORDER BY creation_date ASC" +- Transcript search inside known sessions: + psql -At -F '|' -c "SELECT path, creation_date, message_text FROM sessions_text WHERE path IN ('/sessions/...', '/sessions/...') AND message_text ILIKE '%%' ORDER BY creation_date ASC" +- If literal ILIKE retrieval is sparse or semantically weak, retry with BM25 text ranking on summaries: + psql -At -F '|' -c "SELECT path, summary, summary <#> ' ' AS score FROM memory WHERE summary ILIKE '%%' ORDER BY score DESC LIMIT 5" + +Avoid these mistakes: +- Do NOT search person names via path ILIKE. Person names live in summary text, not session paths. +- Do NOT filter sessions.message directly. Use sessions_text.message_text for transcript text search. +- Do NOT blend multiple different events when the question asks about one specific event. Prefer the most direct supporting row. + +Answer rules: +- Return the smallest exact answer supported by the data. +- Resolve relative dates against the session's own creation_date or transcript date metadata, not today's date. +- Do not answer "not found" until you have checked both memory and a likely sessions_text row for the named person. +- For duration or age-style answers, preserve the stored relative phrase when it directly answers the question instead of over-converting it. +- For list or profile questions, aggregate across the small set of candidate sessions before answering. +- For "likely", "would", or profile questions, a concise inference from strong summary evidence is allowed even if the exact final phrase is not quoted verbatim. + +IMPORTANT: Only psql SELECT queries over memory and sessions are intercepted in this mode. Do NOT use python, python3, node, curl, or filesystem paths for recall in this mode. + +Debugging: Set HIVEMIND_DEBUG=1 to enable verbose logging to ~/.deeplake/hook-debug.log`; + const GITHUB_RAW_PKG = "https://raw.githubusercontent.com/activeloopai/hivemind/main/package.json"; export function buildSessionStartAdditionalContext(args: { @@ -76,7 +204,14 @@ export function buildSessionStartAdditionalContext(args: { currentVersion: string | null; latestVersion: string | null; }): string { - const resolvedContext = CLAUDE_SESSION_START_CONTEXT.replace(/HIVEMIND_AUTH_CMD/g, args.authCommand); + const template = isPsqlMode() + ? CLAUDE_SESSION_START_CONTEXT_PSQL + : isSessionsOnlyMode() + ? CLAUDE_SESSION_START_CONTEXT_SESSIONS_ONLY + : isIndexDisabled() + ? CLAUDE_SESSION_START_CONTEXT_NO_INDEX + : CLAUDE_SESSION_START_CONTEXT; + const resolvedContext = template.replace(/HIVEMIND_AUTH_CMD/g, args.authCommand); let updateNotice = ""; if (args.currentVersion) { diff --git a/src/hooks/spawn-wiki-worker.ts b/src/hooks/spawn-wiki-worker.ts index a2440b6..2aed495 100644 --- a/src/hooks/spawn-wiki-worker.ts +++ b/src/hooks/spawn-wiki-worker.ts @@ -14,7 +14,9 @@ import { utcTimestamp } from "../utils/debug.js"; const HOME = homedir(); export const WIKI_LOG = join(HOME, ".claude", "hooks", "deeplake-wiki.log"); -export const WIKI_PROMPT_TEMPLATE = `You are building a personal wiki from a coding session. Your goal is to extract every piece of knowledge — entities, decisions, relationships, and facts — into a structured, searchable wiki entry. Think of this as building a knowledge graph, not writing a summary. +export const WIKI_PROMPT_TEMPLATE = `You are maintaining a persistent wiki from a session transcript. This page will become part of a long-lived knowledge base that future agents will search through index.md before opening the source session. Write for retrieval, not storytelling. + +The session may be a coding session, a meeting, or a personal conversation. Your job is to turn the raw transcript into a dense, factual wiki page that preserves names, dates, relationships, preferences, plans, titles, and exact status changes. SESSION JSONL path: __JSONL__ SUMMARY FILE to write: __SUMMARY__ @@ -28,40 +30,50 @@ Steps: - If PREVIOUS JSONL OFFSET > 0, this is a resumed session. Read the existing summary file first, then focus on lines AFTER the offset for new content. Merge new facts into the existing summary. - If offset is 0, generate from scratch. + - Treat the JSONL as the source of truth. Do not invent facts. 2. Write the summary file at the path above with this EXACT format. The header fields (Source, Project) are pre-filled — copy them VERBATIM, do NOT replace them with paths from the JSONL content: # Session __SESSION_ID__ - **Source**: __JSONL_SERVER_PATH__ +- **Date**: +- **Participants**: - **Started**: - **Ended**: - **Project**: __PROJECT__ +- **Topics**: - **JSONL offset**: __JSONL_LINES__ ## What Happened -<2-3 dense sentences. What was the goal, what was accomplished, what's left.> +<2-4 dense sentences. What happened, why it mattered, and what changed. Prefer specific names/titles/dates over abstractions.> + +## Searchable Facts + ## People - + ## Entities - + ## Decisions & Reasoning - - -## Key Facts - + ## Files Modified - + ## Open Questions / TODO - - -IMPORTANT: Be exhaustive. Extract EVERY entity, decision, and fact. Future you will search this wiki to answer questions like "who worked on X", "why did we choose Y", "what's the status of Z". If a detail exists in the session, it should be in the wiki. + + +IMPORTANT: +- Be exhaustive. If a detail exists in the session and could answer a later question, it should be in the wiki. +- Favor exact nouns and titles over generic paraphrases. Preserve exact book names, organization names, file names, feature names, and self-descriptions. +- Keep facts canonical and query-friendly: "Ava is single", "Leo's home country is Brazil", "The team chose retries because the API returned 429s". +- Resolve relative dates like "last year" or "next month" against the session's own date when the source makes that possible. If it is ambiguous, keep the relative phrase instead of guessing. +- Do not omit beneficiary groups or targets of goals (for example who a project, career, or effort is meant to help). +- Do not leak absolute filesystem paths beyond the pre-filled Source field. PRIVACY: Never include absolute filesystem paths (e.g. /home/user/..., /Users/..., C:\\\\...) in the summary. Use only project-relative paths or the project name. The Source and Project fields above are already correct — do not change them. diff --git a/src/hooks/upload-summary.ts b/src/hooks/upload-summary.ts index f6c96a0..e0d0489 100644 --- a/src/hooks/upload-summary.ts +++ b/src/hooks/upload-summary.ts @@ -9,6 +9,7 @@ */ import { randomUUID } from "node:crypto"; +import { buildSummaryBlurb } from "../utils/summary-format.js"; export type QueryFn = (sql: string) => Promise>>; @@ -39,10 +40,9 @@ export function esc(s: string): string { .replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, ""); } -/** Derive the short description from the "## What Happened" section of a wiki summary. */ +/** Derive the short catalog description from the structured summary. */ export function extractDescription(text: string): string { - const match = text.match(/## What Happened\n([\s\S]*?)(?=\n##|$)/); - return match ? match[1].trim().slice(0, 300) : "completed"; + return buildSummaryBlurb(text); } /** diff --git a/src/tools/backfill-locomo-memory.ts b/src/tools/backfill-locomo-memory.ts new file mode 100644 index 0000000..0cab6eb --- /dev/null +++ b/src/tools/backfill-locomo-memory.ts @@ -0,0 +1,403 @@ +#!/usr/bin/env node + +import { execFile } from "node:child_process"; +import { mkdtemp, readFile, rm, writeFile } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join, basename } from "node:path"; +import { promisify } from "node:util"; +import { loadCredentials } from "../commands/auth.js"; +import { DeeplakeApi } from "../deeplake-api.js"; +import { uploadSummary } from "../hooks/upload-summary.js"; +import { WIKI_PROMPT_TEMPLATE, findClaudeBin } from "../hooks/spawn-wiki-worker.js"; + +const execFileAsync = promisify(execFile); + +interface SessionRow { + path: string; + filename: string; + message: unknown; +} + +interface SessionTask { + sessionId: string; + sourcePath: string; + summaryPath: string; + summaryFilename: string; + jsonlContent: string; + jsonlLines: number; +} + +interface Args { + sessionsTable: string; + memoryTable: string; + concurrency: number; + model: string; + clearMemory: boolean; +} + +const VISIBILITY_RETRIES = 5; +const VISIBILITY_DELAY_MS = 1500; +const REPAIR_ROUNDS = 2; + +function parseArgs(): Args { + const args = process.argv.slice(2); + const opts: Args = { + sessionsTable: "sessions", + memoryTable: "memory", + concurrency: 5, + model: "haiku", + clearMemory: true, + }; + + for (let i = 0; i < args.length; i++) { + switch (args[i]) { + case "--sessions-table": + opts.sessionsTable = args[++i]; + break; + case "--memory-table": + opts.memoryTable = args[++i]; + break; + case "--concurrency": + opts.concurrency = Math.max(1, parseInt(args[++i], 10) || 5); + break; + case "--model": + opts.model = args[++i] || "haiku"; + break; + case "--no-clear-memory": + opts.clearMemory = false; + break; + } + } + + return opts; +} + +function parseSessionPayload(raw: unknown): Record { + if (typeof raw === "string") { + try { + return JSON.parse(raw) as Record; + } catch { + return { raw }; + } + } + if (raw && typeof raw === "object") return raw as Record; + return { raw }; +} + +function buildSessionTask(row: SessionRow): SessionTask { + const sessionId = basename(row.path).replace(/\.[^.]+$/, ""); + const summaryFilename = `${sessionId}_summary.md`; + const summaryPath = `/summaries/locomo/${summaryFilename}`; + const payload = parseSessionPayload(row.message); + const turns = Array.isArray(payload["turns"]) ? payload["turns"] as Array> : []; + + if (turns.length === 0) { + return { + sessionId, + sourcePath: row.path, + summaryPath, + summaryFilename, + jsonlContent: `${typeof row.message === "string" ? row.message : JSON.stringify(row.message)}\n`, + jsonlLines: 1, + }; + } + + const speakers = payload["speakers"] && typeof payload["speakers"] === "object" + ? payload["speakers"] as Record + : {}; + const meta = { + type: "session_meta", + session_id: sessionId, + source_path: row.path, + conversation_id: payload["conversation_id"] ?? null, + session_number: payload["session_number"] ?? null, + date_time: payload["date_time"] ?? payload["date"] ?? null, + speaker_a: speakers["speaker_a"] ?? null, + speaker_b: speakers["speaker_b"] ?? null, + }; + + const lines = [JSON.stringify(meta)]; + for (const turn of turns) { + lines.push(JSON.stringify({ + type: "dialogue_turn", + session_id: sessionId, + date_time: payload["date_time"] ?? payload["date"] ?? null, + speaker: turn["speaker"] ?? null, + dia_id: turn["dia_id"] ?? null, + text: turn["text"] ?? null, + })); + } + + return { + sessionId, + sourcePath: row.path, + summaryPath, + summaryFilename, + jsonlContent: `${lines.join("\n")}\n`, + jsonlLines: lines.length, + }; +} + +function buildPrompt(task: SessionTask): string { + return WIKI_PROMPT_TEMPLATE + .replace(/__JSONL__/g, "__TMP_JSONL__") + .replace(/__SUMMARY__/g, "__TMP_SUMMARY__") + .replace(/__SESSION_ID__/g, task.sessionId) + .replace(/__PROJECT__/g, "locomo") + .replace(/__PREV_OFFSET__/g, "0") + .replace(/__JSONL_LINES__/g, String(task.jsonlLines)) + .replace(/__JSONL_SERVER_PATH__/g, task.sourcePath); +} + +async function generateSummary(task: SessionTask, claudeBin: string, model: string): Promise { + const tmpRoot = await mkdtemp(join(tmpdir(), `locomo-summary-${task.sessionId}-`)); + const tmpJsonl = join(tmpRoot, "session.jsonl"); + const tmpSummary = join(tmpRoot, "summary.md"); + + try { + await writeFile(tmpJsonl, task.jsonlContent, "utf-8"); + const prompt = buildPrompt(task) + .replace(/__TMP_JSONL__/g, tmpJsonl) + .replace(/__TMP_SUMMARY__/g, tmpSummary); + + await execFileAsync(claudeBin, [ + "-p", + prompt, + "--no-session-persistence", + "--model", + model, + "--permission-mode", + "bypassPermissions", + ], { + timeout: 120_000, + env: { + ...process.env, + DEEPLAKE_CAPTURE: "false", + HIVEMIND_CAPTURE: "false", + HIVEMIND_WIKI_WORKER: "1", + DEEPLAKE_WIKI_WORKER: "1", + }, + }); + + return await readFile(tmpSummary, "utf-8"); + } finally { + await rm(tmpRoot, { recursive: true, force: true }); + } +} + +async function generateSummaryWithRetry(task: SessionTask, claudeBin: string, model: string, retries = 2): Promise { + let lastError: unknown; + for (let attempt = 0; attempt <= retries; attempt++) { + try { + return await generateSummary(task, claudeBin, model); + } catch (error) { + lastError = error; + if (attempt === retries) break; + await new Promise((resolve) => setTimeout(resolve, 2000 * (attempt + 1))); + } + } + throw lastError instanceof Error ? lastError : new Error(String(lastError)); +} + +async function sleep(ms: number): Promise { + await new Promise((resolve) => setTimeout(resolve, ms)); +} + +async function listExistingSummaryPaths(api: DeeplakeApi, memoryTable: string): Promise> { + const existingRows = await api.query( + `SELECT path FROM "${memoryTable}" WHERE path LIKE '/summaries/locomo/%'` + ); + return new Set( + existingRows + .map((row) => row["path"]) + .filter((value): value is string => typeof value === "string" && value.length > 0), + ); +} + +async function waitForVisibleSummaryPath( + api: DeeplakeApi, + memoryTable: string, + summaryPath: string, + retries = VISIBILITY_RETRIES, +): Promise { + for (let attempt = 0; attempt <= retries; attempt++) { + const rows = await api.query( + `SELECT path FROM "${memoryTable}" WHERE path = '${summaryPath.replace(/\\/g, "\\\\").replace(/'/g, "''")}' LIMIT 1` + ); + if (rows.length > 0) return true; + if (attempt < retries) await sleep(VISIBILITY_DELAY_MS * (attempt + 1)); + } + return false; +} + +async function uploadSummaryWithVerification( + api: DeeplakeApi, + memoryTable: string, + task: SessionTask, + text: string, + retries = 2, +): Promise { + let lastError: unknown; + for (let attempt = 0; attempt <= retries; attempt++) { + try { + await uploadSummary(api.query.bind(api), { + tableName: memoryTable, + vpath: task.summaryPath, + fname: task.summaryFilename, + userName: "locomo", + project: "locomo", + agent: "claude_code", + sessionId: task.sessionId, + text, + }); + const visible = await waitForVisibleSummaryPath(api, memoryTable, task.summaryPath); + if (visible) return; + lastError = new Error("summary row not visible after upload"); + } catch (error) { + lastError = error; + } + if (attempt < retries) await sleep(2000 * (attempt + 1)); + } + throw lastError instanceof Error ? lastError : new Error(String(lastError)); +} + +async function withConcurrency(items: T[], concurrency: number, fn: (item: T, idx: number) => Promise) { + let running = 0; + let next = 0; + await new Promise((resolve) => { + function launch() { + while (running < concurrency && next < items.length) { + const idx = next++; + running++; + fn(items[idx], idx) + .finally(() => { + running--; + if (next >= items.length && running === 0) resolve(); + else launch(); + }); + } + } + launch(); + }); +} + +async function main(): Promise { + const opts = parseArgs(); + const creds = loadCredentials(); + if (!creds?.token) { + throw new Error("No Deeplake credentials found. Run hivemind login first."); + } + + const api = new DeeplakeApi( + creds.token, + creds.apiUrl ?? "https://api.deeplake.ai", + creds.orgId, + creds.workspaceId ?? "default", + opts.memoryTable, + ); + + const claudeBin = findClaudeBin(); + const sessionRowsRaw = await api.query( + `SELECT path, filename, message FROM "${opts.sessionsTable}" ` + + `WHERE path LIKE '/sessions/conv_%_session_%.json%' ORDER BY path` + ); + const sessionRows = sessionRowsRaw + .filter((row) => + typeof row["path"] === "string" && + typeof row["filename"] === "string" && + "message" in row, + ) + .map((row) => ({ + path: row["path"] as string, + filename: row["filename"] as string, + message: row["message"], + })) as SessionRow[]; + + const allTasks = sessionRows + .filter((row) => typeof row.path === "string" && row.path.includes("/conv_")) + .map(buildSessionTask); + let tasks = [...allTasks]; + const tasksByPath = new Map(allTasks.map((task) => [task.summaryPath, task])); + const expectedPaths = new Set(allTasks.map((task) => task.summaryPath)); + + console.log(`Workspace: ${creds.workspaceId ?? "default"} | Org: ${creds.orgName ?? creds.orgId}`); + console.log(`Sessions table: ${opts.sessionsTable} | Memory table: ${opts.memoryTable}`); + console.log(`Model: ${opts.model} | Concurrency: ${opts.concurrency}`); + console.log(`Found ${tasks.length} LOCOMO sessions`); + + if (opts.clearMemory) { + console.log(`Clearing "${opts.memoryTable}" before backfill...`); + await api.query(`DELETE FROM "${opts.memoryTable}"`); + } else { + const existingPaths = await listExistingSummaryPaths(api, opts.memoryTable); + const before = tasks.length; + tasks = tasks.filter((task) => !existingPaths.has(task.summaryPath)); + console.log(`Existing LOCOMO summaries: ${existingPaths.size}. Pending tasks: ${tasks.length}/${before}`); + } + + let completed = 0; + let failed = 0; + const failures: string[] = []; + + await withConcurrency(tasks, opts.concurrency, async (task) => { + try { + const text = await generateSummaryWithRetry(task, claudeBin, opts.model); + if (!text.trim()) throw new Error("empty summary"); + + await uploadSummaryWithVerification(api, opts.memoryTable, task, text); + + completed++; + if (completed % 10 === 0 || completed === tasks.length) { + console.log(` ${completed}/${tasks.length}`); + } + } catch (error) { + failed++; + failures.push(`${task.sessionId}: ${error instanceof Error ? error.message : String(error)}`); + console.error(`FAIL ${task.sessionId}: ${error instanceof Error ? error.message : String(error)}`); + } + }); + + let existingPaths = await listExistingSummaryPaths(api, opts.memoryTable); + let missingPaths = [...expectedPaths].filter((path) => !existingPaths.has(path)); + + for (let round = 1; round <= REPAIR_ROUNDS && missingPaths.length > 0; round++) { + console.log(`Repair round ${round}: ${missingPaths.length} missing summaries`); + await withConcurrency( + missingPaths + .map((path) => tasksByPath.get(path)) + .filter((task): task is SessionTask => Boolean(task)), + 1, + async (task) => { + try { + const text = await generateSummaryWithRetry(task, claudeBin, opts.model); + if (!text.trim()) throw new Error("empty summary"); + await uploadSummaryWithVerification(api, opts.memoryTable, task, text, 3); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + failures.push(`${task.sessionId}: repair round ${round}: ${message}`); + console.error(`FAIL ${task.sessionId} (repair ${round}): ${message}`); + } + }, + ); + existingPaths = await listExistingSummaryPaths(api, opts.memoryTable); + missingPaths = [...expectedPaths].filter((path) => !existingPaths.has(path)); + } + + const finalCount = existingPaths.size; + console.log(`Done. summaries=${completed} failed=${failed} memory_rows=${finalCount}`); + if (missingPaths.length > 0) { + console.error(`Still missing ${missingPaths.length} summaries:`); + for (const path of missingPaths.slice(0, 20)) console.error(` ${path}`); + process.exitCode = 1; + } + if (failures.length > 0) { + console.error(`Failures (${failures.length}):`); + for (const failure of failures.slice(0, 20)) console.error(` ${failure}`); + process.exitCode = 1; + } +} + +main().catch((error) => { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +}); diff --git a/src/tools/smoke-summary-bm25.ts b/src/tools/smoke-summary-bm25.ts new file mode 100644 index 0000000..0711ad1 --- /dev/null +++ b/src/tools/smoke-summary-bm25.ts @@ -0,0 +1,54 @@ +import { loadConfig } from "../config.js"; +import { DeeplakeApi } from "../deeplake-api.js"; +import { sqlLike, sqlStr } from "../utils/sql.js"; + +async function main(): Promise { + const config = loadConfig(); + if (!config) { + throw new Error("Missing Hivemind/Deeplake config"); + } + + const queryText = process.argv.slice(2).join(" ").trim() || "book novel literature"; + const api = new DeeplakeApi( + config.token, + config.apiUrl, + config.orgId, + config.workspaceId, + config.tableName, + ); + + const started = Date.now(); + await api.ensureSummaryBm25Index(); + const createMs = Date.now() - started; + + const bm25Started = Date.now(); + const bm25Rows = await api.query( + `SELECT path, (summary <#> '${sqlStr(queryText)}') AS score ` + + `FROM "${config.tableName}" WHERE path LIKE '/summaries/%' ` + + `ORDER BY score DESC LIMIT 10`, + ); + const bm25Ms = Date.now() - bm25Started; + + const ilikeStarted = Date.now(); + const ilikeRows = await api.query( + `SELECT path FROM "${config.tableName}" WHERE path LIKE '/summaries/%' ` + + `AND summary ILIKE '%${sqlLike(queryText.split(/\s+/)[0] ?? queryText)}%' LIMIT 10`, + ); + const ilikeMs = Date.now() - ilikeStarted; + + console.log(JSON.stringify({ + table: config.tableName, + queryText, + createIndexMs: createMs, + bm25Ms, + bm25TopPaths: bm25Rows.slice(0, 5).map((row) => ({ path: row["path"], score: row["score"] })), + ilikeMs, + ilikeTopPaths: ilikeRows.slice(0, 5).map((row) => row["path"]), + }, null, 2)); +} + +main().catch((error: unknown) => { + const message = error instanceof Error ? error.message : String(error); + console.error(message); + process.exit(1); +}); diff --git a/src/utils/retrieval-mode.ts b/src/utils/retrieval-mode.ts index 3433a07..cc2639b 100644 --- a/src/utils/retrieval-mode.ts +++ b/src/utils/retrieval-mode.ts @@ -12,3 +12,8 @@ export function isSummaryBm25Disabled(): boolean { const raw = process.env["HIVEMIND_DISABLE_SUMMARY_BM25"] ?? process.env["DEEPLAKE_DISABLE_SUMMARY_BM25"] ?? ""; return /^(1|true|yes|on)$/i.test(raw.trim()); } + +export function isPsqlMode(): boolean { + const raw = process.env["HIVEMIND_PSQL_MODE"] ?? process.env["DEEPLAKE_PSQL_MODE"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} From 61e207b9791bd82f26df82fdc2687df7c3012583 Mon Sep 17 00:00:00 2001 From: davitbun Date: Mon, 20 Apr 2026 16:15:52 -0700 Subject: [PATCH 3/7] Store sessions physically per message for SQL recall --- claude-code/bundle/capture.js | 113 ++++++++- claude-code/bundle/commands/auth-login.js | 44 +++- claude-code/bundle/pre-tool-use.js | 61 +++-- claude-code/bundle/session-end.js | 53 +++- claude-code/bundle/session-start-setup.js | 53 +++- claude-code/bundle/session-start.js | 19 +- claude-code/bundle/shell/deeplake-shell.js | 48 +++- claude-code/bundle/wiki-worker.js | 2 +- .../tests/bash-command-compiler.test.ts | 24 +- claude-code/tests/deeplake-api.test.ts | 55 ++-- claude-code/tests/hooks-source.test.ts | 19 +- codex/bundle/capture.js | 60 +++++ codex/bundle/commands/auth-login.js | 44 +++- codex/bundle/pre-tool-use.js | 61 +++-- codex/bundle/session-start-setup.js | 53 +++- codex/bundle/session-start.js | 19 +- codex/bundle/shell/deeplake-shell.js | 48 +++- codex/bundle/stop.js | 113 ++++++++- codex/bundle/wiki-worker.js | 2 +- src/deeplake-api.ts | 56 +++-- src/hooks/bash-command-compiler.ts | 25 +- src/hooks/capture.ts | 1 + src/hooks/codex/capture.ts | 1 + src/hooks/codex/session-start.ts | 19 +- src/hooks/codex/stop.ts | 1 + src/hooks/codex/wiki-worker.ts | 2 +- src/hooks/session-queue.ts | 95 ++++++- src/hooks/session-start.ts | 19 +- src/hooks/wiki-worker.ts | 2 +- src/shell/deeplake-fs.ts | 4 +- src/tools/backfill-locomo-memory.ts | 86 ++++++- src/tools/migrate-locomo-sessions.ts | 238 ++++++++++++++++++ 32 files changed, 1252 insertions(+), 188 deletions(-) create mode 100644 src/tools/migrate-locomo-sessions.ts diff --git a/claude-code/bundle/capture.js b/claude-code/bundle/capture.js index a046850..2eb5206 100755 --- a/claude-code/bundle/capture.js +++ b/claude-code/bundle/capture.js @@ -410,17 +410,55 @@ var DeeplakeApi = class { this._tablesCache = [...tables, tbl]; } } - /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ + /** Create the sessions table (one physical row per message/event, with direct search columns). */ async ensureSessionsTable(name) { + const sessionColumns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `message JSONB`, + `session_id TEXT NOT NULL DEFAULT ''`, + `event_type TEXT NOT NULL DEFAULT ''`, + `turn_index BIGINT NOT NULL DEFAULT 0`, + `dia_id TEXT NOT NULL DEFAULT ''`, + `speaker TEXT NOT NULL DEFAULT ''`, + `text TEXT NOT NULL DEFAULT ''`, + `turn_summary TEXT NOT NULL DEFAULT ''`, + `source_date_time TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; const tables = await this.listTables(); if (!tables.includes(name)) { log2(`table "${name}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (` + sessionColumns.join(", ") + `) USING deeplake`); log2(`table "${name}" created`); if (!tables.includes(name)) this._tablesCache = [...tables, name]; } - await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); + const alterColumns = [ + ["session_id", `TEXT NOT NULL DEFAULT ''`], + ["event_type", `TEXT NOT NULL DEFAULT ''`], + ["turn_index", `BIGINT NOT NULL DEFAULT 0`], + ["dia_id", `TEXT NOT NULL DEFAULT ''`], + ["speaker", `TEXT NOT NULL DEFAULT ''`], + ["text", `TEXT NOT NULL DEFAULT ''`], + ["turn_summary", `TEXT NOT NULL DEFAULT ''`], + ["source_date_time", `TEXT NOT NULL DEFAULT ''`] + ]; + for (const [column, ddl] of alterColumns) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); } }; @@ -697,11 +735,20 @@ function buildSessionPath(config, sessionId) { return `/sessions/${config.userName}/${config.userName}_${config.orgName}_${config.workspaceId}_${sessionId}.jsonl`; } function buildQueuedSessionRow(args) { + const structured = extractStructuredSessionFields(args.line, args.sessionId); return { id: crypto.randomUUID(), path: args.sessionPath, filename: args.sessionPath.split("/").pop() ?? "", message: args.line, + sessionId: structured.sessionId, + eventType: structured.eventType, + turnIndex: structured.turnIndex, + diaId: structured.diaId, + speaker: structured.speaker, + text: structured.text, + turnSummary: structured.turnSummary, + sourceDateTime: structured.sourceDateTime, author: args.userName, sizeBytes: Buffer.byteLength(args.line, "utf-8"), project: args.projectName, @@ -724,10 +771,10 @@ function buildSessionInsertSql(sessionsTable, rows) { throw new Error("buildSessionInsertSql: rows must not be empty"); const table = sqlIdent(sessionsTable); const values = rows.map((row) => { - const jsonForSql = sqlStr(coerceJsonbPayload(row.message)); - return `('${sqlStr(row.id)}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', '${jsonForSql}'::jsonb, '${sqlStr(row.author)}', ${row.sizeBytes}, '${sqlStr(row.project)}', '${sqlStr(row.description)}', '${sqlStr(row.agent)}', '${sqlStr(row.creationDate)}', '${sqlStr(row.lastUpdateDate)}')`; + const jsonForSql = escapeJsonbLiteral(coerceJsonbPayload(row.message)); + return `('${sqlStr(row.id)}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', '${jsonForSql}'::jsonb, '${sqlStr(row.sessionId)}', '${sqlStr(row.eventType)}', ${row.turnIndex}, '${sqlStr(row.diaId)}', '${sqlStr(row.speaker)}', '${sqlStr(row.text)}', '${sqlStr(row.turnSummary)}', '${sqlStr(row.sourceDateTime)}', '${sqlStr(row.author)}', ${row.sizeBytes}, '${sqlStr(row.project)}', '${sqlStr(row.description)}', '${sqlStr(row.agent)}', '${sqlStr(row.creationDate)}', '${sqlStr(row.lastUpdateDate)}')`; }).join(", "); - return `INSERT INTO "${table}" (id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ${values}`; + return `INSERT INTO "${table}" (id, path, filename, message, session_id, event_type, turn_index, dia_id, speaker, text, turn_summary, source_date_time, author, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ${values}`; } function coerceJsonbPayload(message) { try { @@ -739,6 +786,59 @@ function coerceJsonbPayload(message) { }); } } +function escapeJsonbLiteral(value) { + return value.replace(/'/g, "''").replace(/\0/g, ""); +} +function extractString(value) { + return typeof value === "string" ? value : value == null ? "" : String(value); +} +function extractNumber(value) { + if (typeof value === "number" && Number.isFinite(value)) + return value; + if (typeof value === "string" && value.trim() !== "") { + const parsed = Number(value); + if (Number.isFinite(parsed)) + return parsed; + } + return 0; +} +function extractStructuredSessionFields(message, fallbackSessionId = "") { + let parsed = null; + try { + const raw = JSON.parse(message); + if (raw && typeof raw === "object") + parsed = raw; + } catch { + parsed = null; + } + if (!parsed) { + return { + sessionId: fallbackSessionId, + eventType: "raw_message", + turnIndex: 0, + diaId: "", + speaker: "", + text: message, + turnSummary: "", + sourceDateTime: "" + }; + } + const eventType = extractString(parsed["type"]); + const content = extractString(parsed["content"]); + const toolName = extractString(parsed["tool_name"]); + const speaker = extractString(parsed["speaker"]) || (eventType === "user_message" ? "user" : eventType === "assistant_message" ? "assistant" : ""); + const text = extractString(parsed["text"]) || content || (eventType === "tool_call" ? toolName : ""); + return { + sessionId: extractString(parsed["session_id"]) || fallbackSessionId, + eventType, + turnIndex: extractNumber(parsed["turn_index"]), + diaId: extractString(parsed["dia_id"]), + speaker, + text, + turnSummary: extractString(parsed["summary"]) || extractString(parsed["message_summary"]) || extractString(parsed["msg_summary"]), + sourceDateTime: extractString(parsed["source_date_time"]) || extractString(parsed["date_time"]) || extractString(parsed["date"]) + }; +} async function flushSessionQueue(api, opts) { const queueDir = opts.queueDir ?? DEFAULT_QUEUE_DIR; const maxBatchRows = opts.maxBatchRows ?? DEFAULT_MAX_BATCH_ROWS; @@ -1044,6 +1144,7 @@ async function runCaptureHook(input, deps = {}) { appendQueuedSessionRowFn(buildQueuedSessionRowFn({ sessionPath, line, + sessionId: input.session_id, userName: config.userName, projectName, description: input.hook_event_name ?? "", diff --git a/claude-code/bundle/commands/auth-login.js b/claude-code/bundle/commands/auth-login.js index 3547fd0..e36fc73 100755 --- a/claude-code/bundle/commands/auth-login.js +++ b/claude-code/bundle/commands/auth-login.js @@ -585,17 +585,55 @@ var DeeplakeApi = class { this._tablesCache = [...tables, tbl]; } } - /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ + /** Create the sessions table (one physical row per message/event, with direct search columns). */ async ensureSessionsTable(name) { + const sessionColumns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `message JSONB`, + `session_id TEXT NOT NULL DEFAULT ''`, + `event_type TEXT NOT NULL DEFAULT ''`, + `turn_index BIGINT NOT NULL DEFAULT 0`, + `dia_id TEXT NOT NULL DEFAULT ''`, + `speaker TEXT NOT NULL DEFAULT ''`, + `text TEXT NOT NULL DEFAULT ''`, + `turn_summary TEXT NOT NULL DEFAULT ''`, + `source_date_time TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; const tables = await this.listTables(); if (!tables.includes(name)) { log2(`table "${name}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (` + sessionColumns.join(", ") + `) USING deeplake`); log2(`table "${name}" created`); if (!tables.includes(name)) this._tablesCache = [...tables, name]; } - await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); + const alterColumns = [ + ["session_id", `TEXT NOT NULL DEFAULT ''`], + ["event_type", `TEXT NOT NULL DEFAULT ''`], + ["turn_index", `BIGINT NOT NULL DEFAULT 0`], + ["dia_id", `TEXT NOT NULL DEFAULT ''`], + ["speaker", `TEXT NOT NULL DEFAULT ''`], + ["text", `TEXT NOT NULL DEFAULT ''`], + ["turn_summary", `TEXT NOT NULL DEFAULT ''`], + ["source_date_time", `TEXT NOT NULL DEFAULT ''`] + ]; + for (const [column, ddl] of alterColumns) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); } }; diff --git a/claude-code/bundle/pre-tool-use.js b/claude-code/bundle/pre-tool-use.js index f6a9b1a..5f5340d 100755 --- a/claude-code/bundle/pre-tool-use.js +++ b/claude-code/bundle/pre-tool-use.js @@ -409,17 +409,55 @@ var DeeplakeApi = class { this._tablesCache = [...tables, tbl]; } } - /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ + /** Create the sessions table (one physical row per message/event, with direct search columns). */ async ensureSessionsTable(name) { + const sessionColumns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `message JSONB`, + `session_id TEXT NOT NULL DEFAULT ''`, + `event_type TEXT NOT NULL DEFAULT ''`, + `turn_index BIGINT NOT NULL DEFAULT 0`, + `dia_id TEXT NOT NULL DEFAULT ''`, + `speaker TEXT NOT NULL DEFAULT ''`, + `text TEXT NOT NULL DEFAULT ''`, + `turn_summary TEXT NOT NULL DEFAULT ''`, + `source_date_time TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; const tables = await this.listTables(); if (!tables.includes(name)) { log2(`table "${name}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (` + sessionColumns.join(", ") + `) USING deeplake`); log2(`table "${name}" created`); if (!tables.includes(name)) this._tablesCache = [...tables, name]; } - await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); + const alterColumns = [ + ["session_id", `TEXT NOT NULL DEFAULT ''`], + ["event_type", `TEXT NOT NULL DEFAULT ''`], + ["turn_index", `BIGINT NOT NULL DEFAULT 0`], + ["dia_id", `TEXT NOT NULL DEFAULT ''`], + ["speaker", `TEXT NOT NULL DEFAULT ''`], + ["text", `TEXT NOT NULL DEFAULT ''`], + ["turn_summary", `TEXT NOT NULL DEFAULT ''`], + ["source_date_time", `TEXT NOT NULL DEFAULT ''`] + ]; + for (const [column, ddl] of alterColumns) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); } }; @@ -1842,11 +1880,11 @@ function extractSqlTableRefs(query) { return refs; } function queryReferencesInterceptedTables(query) { - return extractSqlTableRefs(query).some((ref) => ref === "memory" || ref === "sessions" || ref === "sessions_text" || ref === "hivemind.memory" || ref === "hivemind.sessions" || ref === "hivemind.sessions_text"); + return extractSqlTableRefs(query).some((ref) => ref === "memory" || ref === "sessions" || ref === "hivemind.memory" || ref === "hivemind.sessions"); } function queryUsesOnlyInterceptedTables(query) { const refs = extractSqlTableRefs(query); - return refs.length > 0 && refs.every((ref) => ref === "memory" || ref === "sessions" || ref === "sessions_text" || ref === "hivemind.memory" || ref === "hivemind.sessions" || ref === "hivemind.sessions_text"); + return refs.length > 0 && refs.every((ref) => ref === "memory" || ref === "sessions" || ref === "hivemind.memory" || ref === "hivemind.sessions"); } function parsePsqlSegment(pipeline, tokens) { if (tokens[0] !== "psql" || !isPsqlMode()) @@ -1894,23 +1932,19 @@ function parsePsqlSegment(pipeline, tokens) { } function normalizePsqlQuery(query, memoryTable, sessionsTable) { let sql = query.trim().replace(/;+\s*$/, ""); - sql = sql.replace(/\bFROM\s+"?memory"?\b/gi, `FROM "${memoryTable}"`).replace(/\bJOIN\s+"?memory"?\b/gi, `JOIN "${memoryTable}"`).replace(/\bFROM\s+"?sessions_text"?\b/gi, `FROM "__hivemind_sessions_text"`).replace(/\bJOIN\s+"?sessions_text"?\b/gi, `JOIN "__hivemind_sessions_text"`).replace(/\bFROM\s+"?sessions"?\b/gi, `FROM "${sessionsTable}"`).replace(/\bJOIN\s+"?sessions"?\b/gi, `JOIN "${sessionsTable}"`).replace(/\bFROM\s+"?hivemind"?\."?memory"?\b/gi, `FROM "${memoryTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?memory"?\b/gi, `JOIN "${memoryTable}"`).replace(/\bFROM\s+"?hivemind"?\."?sessions_text"?\b/gi, `FROM "__hivemind_sessions_text"`).replace(/\bJOIN\s+"?hivemind"?\."?sessions_text"?\b/gi, `JOIN "__hivemind_sessions_text"`).replace(/\bFROM\s+"?hivemind"?\."?sessions"?\b/gi, `FROM "${sessionsTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?sessions"?\b/gi, `JOIN "${sessionsTable}"`); - if (/\b__hivemind_sessions_text\b/i.test(sql)) { - const cte = `"__hivemind_sessions_text" AS (SELECT path, creation_date, message::text AS message_text FROM "${sessionsTable}")`; - sql = /^\s*with\b/i.test(sql) ? sql.replace(/^\s*with\b/i, `WITH ${cte},`) : `WITH ${cte} ${sql}`; - } + sql = sql.replace(/\bFROM\s+"?memory"?\b/gi, `FROM "${memoryTable}"`).replace(/\bJOIN\s+"?memory"?\b/gi, `JOIN "${memoryTable}"`).replace(/\bFROM\s+"?sessions"?\b/gi, `FROM "${sessionsTable}"`).replace(/\bJOIN\s+"?sessions"?\b/gi, `JOIN "${sessionsTable}"`).replace(/\bFROM\s+"?hivemind"?\."?memory"?\b/gi, `FROM "${memoryTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?memory"?\b/gi, `JOIN "${memoryTable}"`).replace(/\bFROM\s+"?hivemind"?\."?sessions"?\b/gi, `FROM "${sessionsTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?sessions"?\b/gi, `JOIN "${sessionsTable}"`); return sql; } function validatePsqlQuery(query, memoryTable, sessionsTable) { if (!queryUsesOnlyInterceptedTables(query)) { - throw new Error("psql queries must reference only memory, sessions, sessions_text, hivemind.memory, hivemind.sessions, or hivemind.sessions_text"); + throw new Error("psql queries must reference only memory, sessions, hivemind.memory, or hivemind.sessions"); } const sql = normalizePsqlQuery(query, memoryTable, sessionsTable); const compact = sql.replace(/\s+/g, " ").trim(); if (!/^(select|with)\b/i.test(compact)) { throw new Error("psql mode only supports SELECT queries"); } - const allowedTables = /* @__PURE__ */ new Set([memoryTable, sessionsTable, "__hivemind_sessions_text"]); + const allowedTables = /* @__PURE__ */ new Set([memoryTable, sessionsTable]); const tableMatches = [...compact.matchAll(/\b(?:from|join)\s+"?([a-zA-Z_][a-zA-Z0-9_]*)"?/gi)]; if (tableMatches.length === 0) { throw new Error("psql query must reference memory or sessions"); @@ -2224,8 +2258,7 @@ async function executeCompiledBashCommand(api, memoryTable, sessionsTable, cmd, continue; } if (segment.kind === "psql") { - const sql = validatePsqlQuery(segment.query, memoryTable, sessionsTable); - const rows = await api.query(sql); + const rows = await api.query(validatePsqlQuery(segment.query, memoryTable, sessionsTable)); const formatted = formatPsqlRows(rows, segment.tuplesOnly, segment.fieldSeparator); const limited = segment.lineLimit > 0 ? formatted.split("\n").slice(0, segment.lineLimit).join("\n") : formatted; outputs.push(limited); diff --git a/claude-code/bundle/session-end.js b/claude-code/bundle/session-end.js index c1c98c0..e31f36b 100755 --- a/claude-code/bundle/session-end.js +++ b/claude-code/bundle/session-end.js @@ -410,17 +410,55 @@ var DeeplakeApi = class { this._tablesCache = [...tables, tbl]; } } - /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ + /** Create the sessions table (one physical row per message/event, with direct search columns). */ async ensureSessionsTable(name) { + const sessionColumns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `message JSONB`, + `session_id TEXT NOT NULL DEFAULT ''`, + `event_type TEXT NOT NULL DEFAULT ''`, + `turn_index BIGINT NOT NULL DEFAULT 0`, + `dia_id TEXT NOT NULL DEFAULT ''`, + `speaker TEXT NOT NULL DEFAULT ''`, + `text TEXT NOT NULL DEFAULT ''`, + `turn_summary TEXT NOT NULL DEFAULT ''`, + `source_date_time TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; const tables = await this.listTables(); if (!tables.includes(name)) { log2(`table "${name}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (` + sessionColumns.join(", ") + `) USING deeplake`); log2(`table "${name}" created`); if (!tables.includes(name)) this._tablesCache = [...tables, name]; } - await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); + const alterColumns = [ + ["session_id", `TEXT NOT NULL DEFAULT ''`], + ["event_type", `TEXT NOT NULL DEFAULT ''`], + ["turn_index", `BIGINT NOT NULL DEFAULT 0`], + ["dia_id", `TEXT NOT NULL DEFAULT ''`], + ["speaker", `TEXT NOT NULL DEFAULT ''`], + ["text", `TEXT NOT NULL DEFAULT ''`], + ["turn_summary", `TEXT NOT NULL DEFAULT ''`], + ["source_date_time", `TEXT NOT NULL DEFAULT ''`] + ]; + for (const [column, ddl] of alterColumns) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); } }; @@ -579,10 +617,10 @@ function buildSessionInsertSql(sessionsTable, rows) { throw new Error("buildSessionInsertSql: rows must not be empty"); const table = sqlIdent(sessionsTable); const values = rows.map((row) => { - const jsonForSql = sqlStr(coerceJsonbPayload(row.message)); - return `('${sqlStr(row.id)}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', '${jsonForSql}'::jsonb, '${sqlStr(row.author)}', ${row.sizeBytes}, '${sqlStr(row.project)}', '${sqlStr(row.description)}', '${sqlStr(row.agent)}', '${sqlStr(row.creationDate)}', '${sqlStr(row.lastUpdateDate)}')`; + const jsonForSql = escapeJsonbLiteral(coerceJsonbPayload(row.message)); + return `('${sqlStr(row.id)}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', '${jsonForSql}'::jsonb, '${sqlStr(row.sessionId)}', '${sqlStr(row.eventType)}', ${row.turnIndex}, '${sqlStr(row.diaId)}', '${sqlStr(row.speaker)}', '${sqlStr(row.text)}', '${sqlStr(row.turnSummary)}', '${sqlStr(row.sourceDateTime)}', '${sqlStr(row.author)}', ${row.sizeBytes}, '${sqlStr(row.project)}', '${sqlStr(row.description)}', '${sqlStr(row.agent)}', '${sqlStr(row.creationDate)}', '${sqlStr(row.lastUpdateDate)}')`; }).join(", "); - return `INSERT INTO "${table}" (id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ${values}`; + return `INSERT INTO "${table}" (id, path, filename, message, session_id, event_type, turn_index, dia_id, speaker, text, turn_summary, source_date_time, author, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ${values}`; } function coerceJsonbPayload(message) { try { @@ -594,6 +632,9 @@ function coerceJsonbPayload(message) { }); } } +function escapeJsonbLiteral(value) { + return value.replace(/'/g, "''").replace(/\0/g, ""); +} async function flushSessionQueue(api, opts) { const queueDir = opts.queueDir ?? DEFAULT_QUEUE_DIR; const maxBatchRows = opts.maxBatchRows ?? DEFAULT_MAX_BATCH_ROWS; diff --git a/claude-code/bundle/session-start-setup.js b/claude-code/bundle/session-start-setup.js index 73357f5..c1b7371 100755 --- a/claude-code/bundle/session-start-setup.js +++ b/claude-code/bundle/session-start-setup.js @@ -422,17 +422,55 @@ var DeeplakeApi = class { this._tablesCache = [...tables, tbl]; } } - /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ + /** Create the sessions table (one physical row per message/event, with direct search columns). */ async ensureSessionsTable(name) { + const sessionColumns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `message JSONB`, + `session_id TEXT NOT NULL DEFAULT ''`, + `event_type TEXT NOT NULL DEFAULT ''`, + `turn_index BIGINT NOT NULL DEFAULT 0`, + `dia_id TEXT NOT NULL DEFAULT ''`, + `speaker TEXT NOT NULL DEFAULT ''`, + `text TEXT NOT NULL DEFAULT ''`, + `turn_summary TEXT NOT NULL DEFAULT ''`, + `source_date_time TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; const tables = await this.listTables(); if (!tables.includes(name)) { log2(`table "${name}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (` + sessionColumns.join(", ") + `) USING deeplake`); log2(`table "${name}" created`); if (!tables.includes(name)) this._tablesCache = [...tables, name]; } - await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); + const alterColumns = [ + ["session_id", `TEXT NOT NULL DEFAULT ''`], + ["event_type", `TEXT NOT NULL DEFAULT ''`], + ["turn_index", `BIGINT NOT NULL DEFAULT 0`], + ["dia_id", `TEXT NOT NULL DEFAULT ''`], + ["speaker", `TEXT NOT NULL DEFAULT ''`], + ["text", `TEXT NOT NULL DEFAULT ''`], + ["turn_summary", `TEXT NOT NULL DEFAULT ''`], + ["source_date_time", `TEXT NOT NULL DEFAULT ''`] + ]; + for (const [column, ddl] of alterColumns) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); } }; @@ -488,10 +526,10 @@ function buildSessionInsertSql(sessionsTable, rows) { throw new Error("buildSessionInsertSql: rows must not be empty"); const table = sqlIdent(sessionsTable); const values = rows.map((row) => { - const jsonForSql = sqlStr(coerceJsonbPayload(row.message)); - return `('${sqlStr(row.id)}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', '${jsonForSql}'::jsonb, '${sqlStr(row.author)}', ${row.sizeBytes}, '${sqlStr(row.project)}', '${sqlStr(row.description)}', '${sqlStr(row.agent)}', '${sqlStr(row.creationDate)}', '${sqlStr(row.lastUpdateDate)}')`; + const jsonForSql = escapeJsonbLiteral(coerceJsonbPayload(row.message)); + return `('${sqlStr(row.id)}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', '${jsonForSql}'::jsonb, '${sqlStr(row.sessionId)}', '${sqlStr(row.eventType)}', ${row.turnIndex}, '${sqlStr(row.diaId)}', '${sqlStr(row.speaker)}', '${sqlStr(row.text)}', '${sqlStr(row.turnSummary)}', '${sqlStr(row.sourceDateTime)}', '${sqlStr(row.author)}', ${row.sizeBytes}, '${sqlStr(row.project)}', '${sqlStr(row.description)}', '${sqlStr(row.agent)}', '${sqlStr(row.creationDate)}', '${sqlStr(row.lastUpdateDate)}')`; }).join(", "); - return `INSERT INTO "${table}" (id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ${values}`; + return `INSERT INTO "${table}" (id, path, filename, message, session_id, event_type, turn_index, dia_id, speaker, text, turn_summary, source_date_time, author, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ${values}`; } function coerceJsonbPayload(message) { try { @@ -503,6 +541,9 @@ function coerceJsonbPayload(message) { }); } } +function escapeJsonbLiteral(value) { + return value.replace(/'/g, "''").replace(/\0/g, ""); +} async function flushSessionQueue(api, opts) { const queueDir = opts.queueDir ?? DEFAULT_QUEUE_DIR; const maxBatchRows = opts.maxBatchRows ?? DEFAULT_MAX_BATCH_ROWS; diff --git a/claude-code/bundle/session-start.js b/claude-code/bundle/session-start.js index 54a1b42..938a8b2 100755 --- a/claude-code/bundle/session-start.js +++ b/claude-code/bundle/session-start.js @@ -265,8 +265,7 @@ var CLAUDE_SESSION_START_CONTEXT_PSQL = `DEEPLAKE MEMORY SQL MODE: For this run, Available Deeplake tables: - memory(path, summary, project, description, creation_date, last_update_date) -- sessions_text(path, creation_date, message_text) -- sessions(path, message, creation_date) +- sessions(path, creation_date, turn_index, event_type, dia_id, speaker, text, turn_summary, source_date_time, message) Use this command shape: - psql -At -F '|' -c "SELECT ..." @@ -275,11 +274,13 @@ SQL strategy: 1. Start with targeted SELECTs against memory to find likely summaries. 2. In the first pass, combine the named person/entity term with one or more topic terms. Prefer narrow AND filters over broad OR filters. 3. After finding candidate summary rows, re-query memory by exact path to inspect only those summaries. -4. If the answer needs exact wording, exact dates, or transcript grounding, query sessions_text by exact path for those candidate sessions. +4. If the answer needs exact wording, exact dates, or transcript grounding, query sessions by exact path for those candidate sessions. 5. Prefer precise WHERE filters, ORDER BY creation_date/last_update_date, and LIMIT 5-10. 6. Do not use filesystem commands, grep, cat, ls, Read, or Glob for recall in this mode. 7. If the first summary query returns 0-3 weak rows or the answer still seems semantically off, retry with BM25 ranking on memory before concluding the data is absent. -8. Use sessions only when you need the raw structured payload; use sessions_text for normal text filtering. +8. Use sessions.text, sessions.speaker, sessions.turn_index, and sessions.source_date_time for transcript retrieval. Use sessions.message only when you need the raw JSON payload. +9. If a summary answer is vague or relative (for example "home country", "next month", "last week"), immediately open the linked sessions rows and convert it to the most concrete answer supported there. +10. For identity, origin, relationship, and "what did they decide" questions, prefer the exact self-description or named entity from sessions over a paraphrased summary label. Good query patterns: - Candidate summaries: @@ -287,22 +288,24 @@ Good query patterns: - Exact summary reread: psql -At -F '|' -c "SELECT path, summary FROM memory WHERE path IN ('/summaries/...', '/summaries/...')" - Transcript grounding by exact path: - psql -At -F '|' -c "SELECT path, creation_date, message_text FROM sessions_text WHERE path IN ('/sessions/...', '/sessions/...') ORDER BY creation_date ASC" + psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') ORDER BY path ASC, turn_index ASC" - Transcript search inside known sessions: - psql -At -F '|' -c "SELECT path, creation_date, message_text FROM sessions_text WHERE path IN ('/sessions/...', '/sessions/...') AND message_text ILIKE '%%' ORDER BY creation_date ASC" + psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') AND (speaker ILIKE '%%' OR text ILIKE '%%') ORDER BY path ASC, turn_index ASC" - If literal ILIKE retrieval is sparse or semantically weak, retry with BM25 text ranking on summaries: psql -At -F '|' -c "SELECT path, summary, summary <#> ' ' AS score FROM memory WHERE summary ILIKE '%%' ORDER BY score DESC LIMIT 5" Avoid these mistakes: - Do NOT search person names via path ILIKE. Person names live in summary text, not session paths. -- Do NOT filter sessions.message directly. Use sessions_text.message_text for transcript text search. +- Do NOT filter sessions.message directly when sessions.text / sessions.speaker already contain the needed transcript fields. - Do NOT blend multiple different events when the question asks about one specific event. Prefer the most direct supporting row. Answer rules: - Return the smallest exact answer supported by the data. - Resolve relative dates against the session's own creation_date or transcript date metadata, not today's date. -- Do not answer "not found" until you have checked both memory and a likely sessions_text row for the named person. +- Do not answer "not found" until you have checked both memory and a likely sessions row for the named person. - For duration or age-style answers, preserve the stored relative phrase when it directly answers the question instead of over-converting it. +- If the transcript already directly answers with a relative duration like "10 years ago", return that phrase instead of recalculating to today's date. +- If a summary says something vague like "home country", search sessions for the exact named place before answering. - For list or profile questions, aggregate across the small set of candidate sessions before answering. - For "likely", "would", or profile questions, a concise inference from strong summary evidence is allowed even if the exact final phrase is not quoted verbatim. diff --git a/claude-code/bundle/shell/deeplake-shell.js b/claude-code/bundle/shell/deeplake-shell.js index 84cf810..14f7835 100755 --- a/claude-code/bundle/shell/deeplake-shell.js +++ b/claude-code/bundle/shell/deeplake-shell.js @@ -67107,17 +67107,55 @@ var DeeplakeApi = class { this._tablesCache = [...tables, tbl]; } } - /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ + /** Create the sessions table (one physical row per message/event, with direct search columns). */ async ensureSessionsTable(name) { + const sessionColumns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `message JSONB`, + `session_id TEXT NOT NULL DEFAULT ''`, + `event_type TEXT NOT NULL DEFAULT ''`, + `turn_index BIGINT NOT NULL DEFAULT 0`, + `dia_id TEXT NOT NULL DEFAULT ''`, + `speaker TEXT NOT NULL DEFAULT ''`, + `text TEXT NOT NULL DEFAULT ''`, + `turn_summary TEXT NOT NULL DEFAULT ''`, + `source_date_time TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; const tables = await this.listTables(); if (!tables.includes(name)) { log2(`table "${name}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (` + sessionColumns.join(", ") + `) USING deeplake`); log2(`table "${name}" created`); if (!tables.includes(name)) this._tablesCache = [...tables, name]; } - await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); + const alterColumns = [ + ["session_id", `TEXT NOT NULL DEFAULT ''`], + ["event_type", `TEXT NOT NULL DEFAULT ''`], + ["turn_index", `BIGINT NOT NULL DEFAULT 0`], + ["dia_id", `TEXT NOT NULL DEFAULT ''`], + ["speaker", `TEXT NOT NULL DEFAULT ''`], + ["text", `TEXT NOT NULL DEFAULT ''`], + ["turn_summary", `TEXT NOT NULL DEFAULT ''`], + ["source_date_time", `TEXT NOT NULL DEFAULT ''`] + ]; + for (const [column, ddl] of alterColumns) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); } }; @@ -68175,7 +68213,7 @@ var DeeplakeFs = class _DeeplakeFs { return buf2; } if (this.sessionPaths.has(p22) && this.sessionsTable) { - const rows2 = await this.client.query(`SELECT message FROM "${this.sessionsTable}" WHERE path = '${sqlStr(p22)}' ORDER BY creation_date ASC`); + const rows2 = await this.client.query(`SELECT message FROM "${this.sessionsTable}" WHERE path = '${sqlStr(p22)}' ORDER BY creation_date ASC, turn_index ASC`); if (rows2.length === 0) throw fsErr("ENOENT", "no such file or directory", p22); const text = joinSessionMessages(p22, rows2.map((row) => row["message"])); @@ -68213,7 +68251,7 @@ var DeeplakeFs = class _DeeplakeFs { if (pend) return pend.contentText; if (this.sessionPaths.has(p22) && this.sessionsTable) { - const rows2 = await this.client.query(`SELECT message FROM "${this.sessionsTable}" WHERE path = '${sqlStr(p22)}' ORDER BY creation_date ASC`); + const rows2 = await this.client.query(`SELECT message FROM "${this.sessionsTable}" WHERE path = '${sqlStr(p22)}' ORDER BY creation_date ASC, turn_index ASC`); if (rows2.length === 0) throw fsErr("ENOENT", "no such file or directory", p22); const text2 = joinSessionMessages(p22, rows2.map((row) => row["message"])); diff --git a/claude-code/bundle/wiki-worker.js b/claude-code/bundle/wiki-worker.js index f280a41..19c1150 100755 --- a/claude-code/bundle/wiki-worker.js +++ b/claude-code/bundle/wiki-worker.js @@ -219,7 +219,7 @@ function cleanup() { async function main() { try { wlog("fetching session events"); - const rows = await query(`SELECT message, creation_date FROM "${cfg.sessionsTable}" WHERE path LIKE '${esc2(`/sessions/%${cfg.sessionId}%`)}' ORDER BY creation_date ASC`); + const rows = await query(`SELECT message, creation_date FROM "${cfg.sessionsTable}" WHERE path LIKE '${esc2(`/sessions/%${cfg.sessionId}%`)}' ORDER BY creation_date ASC, turn_index ASC`); if (rows.length === 0) { wlog("no session events found \u2014 exiting"); return; diff --git a/claude-code/tests/bash-command-compiler.test.ts b/claude-code/tests/bash-command-compiler.test.ts index 78d9502..e1cfd4f 100644 --- a/claude-code/tests/bash-command-compiler.test.ts +++ b/claude-code/tests/bash-command-compiler.test.ts @@ -309,9 +309,9 @@ describe("bash-command-compiler parsing", () => { fieldSeparator: "|", }); - expect(parseCompiledSegment("psql -At -F '|' -c \"SELECT path, creation_date, message_text FROM sessions_text WHERE message_text ILIKE '%camp%' LIMIT 2\"")).toEqual({ + expect(parseCompiledSegment("psql -At -F '|' -c \"SELECT path, creation_date, turn_index, speaker, text FROM sessions WHERE text ILIKE '%camp%' LIMIT 2\"")).toEqual({ kind: "psql", - query: "SELECT path, creation_date, message_text FROM sessions_text WHERE message_text ILIKE '%camp%' LIMIT 2", + query: "SELECT path, creation_date, turn_index, speaker, text FROM sessions WHERE text ILIKE '%camp%' LIMIT 2", lineLimit: 0, tuplesOnly: true, fieldSeparator: "|", @@ -530,13 +530,18 @@ describe("bash-command-compiler execution", () => { restorePsqlMode(); }); - it("rewrites sessions_text queries into a text CTE over the backing sessions table", async () => { + it("executes direct sessions queries against physical per-message rows", async () => { const query = vi.fn(async (sql: string) => { - expect(sql).toContain('WITH "__hivemind_sessions_text" AS (SELECT path, creation_date, message::text AS message_text FROM "sessions_actual")'); - expect(sql).toContain('FROM "__hivemind_sessions_text"'); - expect(sql).toContain("message_text ILIKE '%camp%'"); + expect(sql).toContain('FROM "sessions_actual"'); + expect(sql).toContain("WHERE path = '/sessions/conv_0_session_8.json'"); return [ - { path: "/sessions/conv_0_session_8.json", creation_date: "2023-08-10", message_text: "{\"turns\":[{\"text\":\"We planned a camping trip\"}]}" }, + { + path: "/sessions/conv_0_session_8.json", + creation_date: "2023-08-10", + turn_index: 1, + speaker: "Melanie", + text: "We planned a camping trip", + }, ]; }); @@ -545,9 +550,10 @@ describe("bash-command-compiler execution", () => { { query } as any, "memory_actual", "sessions_actual", - "psql -At -F '|' -c \"SELECT path, creation_date, message_text FROM sessions_text WHERE message_text ILIKE '%camp%' LIMIT 1\"", + "psql -At -F '|' -c \"SELECT path, creation_date, turn_index, speaker, text FROM sessions WHERE path = '/sessions/conv_0_session_8.json' AND text ILIKE '%camp%' ORDER BY turn_index ASC LIMIT 1\"", ); - expect(output).toBe('/sessions/conv_0_session_8.json|2023-08-10|{"turns":[{"text":"We planned a camping trip"}]}'); + expect(output).toBe("/sessions/conv_0_session_8.json|2023-08-10|1|Melanie|We planned a camping trip"); + expect(query).toHaveBeenCalledTimes(1); restorePsqlMode(); }); diff --git a/claude-code/tests/deeplake-api.test.ts b/claude-code/tests/deeplake-api.test.ts index 306c2df..046e347 100644 --- a/claude-code/tests/deeplake-api.test.ts +++ b/claude-code/tests/deeplake-api.test.ts @@ -457,21 +457,25 @@ describe("DeeplakeApi.ensureTable", () => { ok: true, status: 200, json: async () => ({ tables: [{ table_name: "memory" }] }), }); - mockFetch.mockResolvedValueOnce(jsonResponse({})); - mockFetch.mockResolvedValueOnce(jsonResponse({})); + mockFetch.mockResolvedValue(jsonResponse({})); const api = makeApi("memory"); await api.ensureTable(); await api.ensureSessionsTable("sessions"); - expect(mockFetch).toHaveBeenCalledTimes(3); - const createSql = JSON.parse(mockFetch.mock.calls[1][1].body).query; + const tableListCalls = mockFetch.mock.calls.filter(([url]) => String(url).endsWith("/tables")); + expect(tableListCalls).toHaveLength(1); + const querySqls = mockFetch.mock.calls + .map(([, init]) => init?.body ? JSON.parse(init.body).query : null) + .filter((sql): sql is string => typeof sql === "string"); + const createSql = querySqls.find((sql) => sql.includes("CREATE TABLE IF NOT EXISTS")) ?? ""; expect(createSql).toContain("CREATE TABLE IF NOT EXISTS"); expect(createSql).toContain("sessions"); - const indexSql = JSON.parse(mockFetch.mock.calls[2][1].body).query; + const indexSql = querySqls.find((sql) => sql.includes("CREATE INDEX IF NOT EXISTS")) ?? ""; expect(indexSql).toContain("CREATE INDEX IF NOT EXISTS"); expect(indexSql).toContain("\"path\""); expect(indexSql).toContain("\"creation_date\""); + expect(indexSql).toContain("\"turn_index\""); }); }); @@ -483,19 +487,26 @@ describe("DeeplakeApi.ensureSessionsTable", () => { ok: true, status: 200, json: async () => ({ tables: [] }), }); - mockFetch.mockResolvedValueOnce(jsonResponse({})); - mockFetch.mockResolvedValueOnce(jsonResponse({})); + mockFetch.mockResolvedValue(jsonResponse({})); const api = makeApi(); await api.ensureSessionsTable("sessions"); - const createSql = JSON.parse(mockFetch.mock.calls[1][1].body).query; + const querySqls = mockFetch.mock.calls + .map(([, init]) => init?.body ? JSON.parse(init.body).query : null) + .filter((sql): sql is string => typeof sql === "string"); + const createSql = querySqls.find((sql) => sql.includes("CREATE TABLE IF NOT EXISTS")) ?? ""; expect(createSql).toContain("CREATE TABLE IF NOT EXISTS"); expect(createSql).toContain("sessions"); expect(createSql).toContain("JSONB"); expect(createSql).toContain("USING deeplake"); - const indexSql = JSON.parse(mockFetch.mock.calls[2][1].body).query; + expect(createSql).toContain("session_id TEXT"); + expect(createSql).toContain("turn_index BIGINT"); + expect(createSql).toContain("text TEXT"); + const alterSqls = querySqls.filter((sql) => sql.startsWith("ALTER TABLE")); + expect(alterSqls).toHaveLength(8); + const indexSql = querySqls.find((sql) => sql.includes("CREATE INDEX IF NOT EXISTS")) ?? ""; expect(indexSql).toContain("CREATE INDEX IF NOT EXISTS"); expect(indexSql).toContain("\"sessions\""); - expect(indexSql).toContain("(\"path\", \"creation_date\")"); + expect(indexSql).toContain("(\"path\", \"creation_date\", \"turn_index\")"); }); it("ensures the lookup index when sessions table already exists", async () => { @@ -503,11 +514,14 @@ describe("DeeplakeApi.ensureSessionsTable", () => { ok: true, status: 200, json: async () => ({ tables: [{ table_name: "sessions" }] }), }); - mockFetch.mockResolvedValueOnce(jsonResponse({})); + mockFetch.mockResolvedValue(jsonResponse({})); const api = makeApi(); await api.ensureSessionsTable("sessions"); - expect(mockFetch).toHaveBeenCalledTimes(2); - const indexSql = JSON.parse(mockFetch.mock.calls[1][1].body).query; + const querySqls = mockFetch.mock.calls + .map(([, init]) => init?.body ? JSON.parse(init.body).query : null) + .filter((sql): sql is string => typeof sql === "string"); + expect(querySqls.filter((sql) => sql.startsWith("ALTER TABLE"))).toHaveLength(8); + const indexSql = querySqls.find((sql) => sql.includes("CREATE INDEX IF NOT EXISTS")) ?? ""; expect(indexSql).toContain("CREATE INDEX IF NOT EXISTS"); }); @@ -516,11 +530,16 @@ describe("DeeplakeApi.ensureSessionsTable", () => { ok: true, status: 200, json: async () => ({ tables: [{ table_name: "sessions" }] }), }); + for (let i = 0; i < 8; i++) mockFetch.mockResolvedValueOnce(jsonResponse({})); mockFetch.mockResolvedValueOnce(jsonResponse("forbidden", 403)); const api = makeApi(); await expect(api.ensureSessionsTable("sessions")).resolves.toBeUndefined(); - expect(mockFetch).toHaveBeenCalledTimes(2); + const querySqls = mockFetch.mock.calls + .map(([, init]) => init?.body ? JSON.parse(init.body).query : null) + .filter((sql): sql is string => typeof sql === "string"); + expect(querySqls.filter((sql) => sql.startsWith("ALTER TABLE"))).toHaveLength(8); + expect(querySqls.some((sql) => sql.includes("CREATE INDEX IF NOT EXISTS"))).toBe(true); }); it("treats duplicate concurrent index creation errors as success and records a local marker", async () => { @@ -528,14 +547,20 @@ describe("DeeplakeApi.ensureSessionsTable", () => { ok: true, status: 200, json: async () => ({ tables: [{ table_name: "sessions" }] }), }); + for (let i = 0; i < 8; i++) mockFetch.mockResolvedValueOnce(jsonResponse({})); mockFetch.mockResolvedValueOnce(jsonResponse("duplicate key value violates unique constraint \"pg_class_relname_nsp_index\"", 400)); const api = makeApi(); await expect(api.ensureSessionsTable("sessions")).resolves.toBeUndefined(); mockFetch.mockReset(); + mockFetch.mockResolvedValue(jsonResponse({})); await api.ensureSessionsTable("sessions"); - expect(mockFetch).not.toHaveBeenCalled(); + const querySqls = mockFetch.mock.calls + .map(([, init]) => init?.body ? JSON.parse(init.body).query : null) + .filter((sql): sql is string => typeof sql === "string"); + expect(querySqls.filter((sql) => sql.startsWith("ALTER TABLE"))).toHaveLength(8); + expect(querySqls.some((sql) => sql.includes("CREATE INDEX IF NOT EXISTS"))).toBe(false); }); }); diff --git a/claude-code/tests/hooks-source.test.ts b/claude-code/tests/hooks-source.test.ts index a2b8ff9..6345224 100644 --- a/claude-code/tests/hooks-source.test.ts +++ b/claude-code/tests/hooks-source.test.ts @@ -293,8 +293,8 @@ describe("claude pre-tool source", () => { command: "psql -At -F '|' -c \"SELECT path, summary FROM hivemind.memory LIMIT 1\"", })).toBe("psql -At -F '|' -c \"SELECT path, summary FROM hivemind.memory LIMIT 1\""); expect(getShellCommand("Bash", { - command: "psql -At -F '|' -c \"SELECT path, creation_date, message_text FROM sessions_text LIMIT 1\"", - })).toBe("psql -At -F '|' -c \"SELECT path, creation_date, message_text FROM sessions_text LIMIT 1\""); + command: "psql -At -F '|' -c \"SELECT path, creation_date, turn_index, speaker, text FROM sessions LIMIT 1\"", + })).toBe("psql -At -F '|' -c \"SELECT path, creation_date, turn_index, speaker, text FROM sessions LIMIT 1\""); expect(getShellCommand("Read", { file_path: "~/.deeplake/memory/index.md" })).toBeNull(); expect(getShellCommand("Glob", { path: "~/.deeplake/memory/summaries" })).toBeNull(); } finally { @@ -332,18 +332,18 @@ describe("claude pre-tool source", () => { }); expect(passthrough).toBeNull(); - const sessionsText = await processPreToolUse({ + const sessionsQuery = await processPreToolUse({ session_id: "s1", tool_name: "Bash", tool_input: { - command: "psql -At -F '|' -c \"SELECT path, creation_date, message_text FROM sessions_text WHERE message_text ILIKE '%camp%' LIMIT 1\"", + command: "psql -At -F '|' -c \"SELECT path, creation_date, turn_index, speaker, text FROM sessions WHERE text ILIKE '%camp%' LIMIT 1\"", }, tool_use_id: "tu-psql-sessions-text", }, { config: baseConfig, - executeCompiledBashCommandFn: vi.fn(async () => "/sessions/conv_0_session_8.json|2023-08-10|{\"turns\":[{\"text\":\"We planned a camping trip\"}]}") as any, + executeCompiledBashCommandFn: vi.fn(async () => "/sessions/conv_0_session_8.json|2023-08-10|1|Melanie|We planned a camping trip") as any, }); - expect(sessionsText?.command).toContain("camping trip"); + expect(sessionsQuery?.command).toContain("camping trip"); } finally { if (prev === undefined) delete process.env.HIVEMIND_PSQL_MODE; else process.env.HIVEMIND_PSQL_MODE = prev; @@ -755,11 +755,10 @@ describe("claude session start source", () => { }); expect(context).toContain("DEEPLAKE MEMORY SQL MODE"); expect(context).toContain("memory(path, summary"); - expect(context).toContain("sessions_text(path, creation_date, message_text)"); - expect(context).toContain("sessions(path, message"); + expect(context).toContain("sessions(path, creation_date, turn_index, event_type, dia_id, speaker, text, turn_summary, source_date_time, message)"); expect(context).toContain("psql -At -F '|'"); - expect(context).toContain("Use sessions only when you need the raw structured payload"); - expect(context).toContain("Do NOT filter sessions.message directly"); + expect(context).toContain("Use sessions.text, sessions.speaker, sessions.turn_index, and sessions.source_date_time"); + expect(context).toContain("Use sessions.message only when you need the raw JSON payload"); expect(context).toContain("Do not use filesystem commands"); expect(context).not.toContain("Always read index.md first"); expect(context).not.toContain("~/.deeplake/memory"); diff --git a/codex/bundle/capture.js b/codex/bundle/capture.js index 261697c..30fce82 100755 --- a/codex/bundle/capture.js +++ b/codex/bundle/capture.js @@ -327,11 +327,20 @@ function buildSessionPath(config, sessionId) { return `/sessions/${config.userName}/${config.userName}_${config.orgName}_${config.workspaceId}_${sessionId}.jsonl`; } function buildQueuedSessionRow(args) { + const structured = extractStructuredSessionFields(args.line, args.sessionId); return { id: crypto.randomUUID(), path: args.sessionPath, filename: args.sessionPath.split("/").pop() ?? "", message: args.line, + sessionId: structured.sessionId, + eventType: structured.eventType, + turnIndex: structured.turnIndex, + diaId: structured.diaId, + speaker: structured.speaker, + text: structured.text, + turnSummary: structured.turnSummary, + sourceDateTime: structured.sourceDateTime, author: args.userName, sizeBytes: Buffer.byteLength(args.line, "utf-8"), project: args.projectName, @@ -349,6 +358,56 @@ function appendQueuedSessionRow(row, queueDir = DEFAULT_QUEUE_DIR) { `); return queuePath; } +function extractString(value) { + return typeof value === "string" ? value : value == null ? "" : String(value); +} +function extractNumber(value) { + if (typeof value === "number" && Number.isFinite(value)) + return value; + if (typeof value === "string" && value.trim() !== "") { + const parsed = Number(value); + if (Number.isFinite(parsed)) + return parsed; + } + return 0; +} +function extractStructuredSessionFields(message, fallbackSessionId = "") { + let parsed = null; + try { + const raw = JSON.parse(message); + if (raw && typeof raw === "object") + parsed = raw; + } catch { + parsed = null; + } + if (!parsed) { + return { + sessionId: fallbackSessionId, + eventType: "raw_message", + turnIndex: 0, + diaId: "", + speaker: "", + text: message, + turnSummary: "", + sourceDateTime: "" + }; + } + const eventType = extractString(parsed["type"]); + const content = extractString(parsed["content"]); + const toolName = extractString(parsed["tool_name"]); + const speaker = extractString(parsed["speaker"]) || (eventType === "user_message" ? "user" : eventType === "assistant_message" ? "assistant" : ""); + const text = extractString(parsed["text"]) || content || (eventType === "tool_call" ? toolName : ""); + return { + sessionId: extractString(parsed["session_id"]) || fallbackSessionId, + eventType, + turnIndex: extractNumber(parsed["turn_index"]), + diaId: extractString(parsed["dia_id"]), + speaker, + text, + turnSummary: extractString(parsed["summary"]) || extractString(parsed["message_summary"]) || extractString(parsed["msg_summary"]), + sourceDateTime: extractString(parsed["source_date_time"]) || extractString(parsed["date_time"]) || extractString(parsed["date"]) + }; +} function getQueuePath(queueDir, sessionId) { return join5(queueDir, `${sessionId}.jsonl`); } @@ -463,6 +522,7 @@ async function runCodexCaptureHook(input, deps = {}) { appendQueuedSessionRowFn(buildQueuedSessionRowFn({ sessionPath, line, + sessionId: input.session_id, userName: config.userName, projectName, description: input.hook_event_name ?? "", diff --git a/codex/bundle/commands/auth-login.js b/codex/bundle/commands/auth-login.js index 3547fd0..e36fc73 100755 --- a/codex/bundle/commands/auth-login.js +++ b/codex/bundle/commands/auth-login.js @@ -585,17 +585,55 @@ var DeeplakeApi = class { this._tablesCache = [...tables, tbl]; } } - /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ + /** Create the sessions table (one physical row per message/event, with direct search columns). */ async ensureSessionsTable(name) { + const sessionColumns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `message JSONB`, + `session_id TEXT NOT NULL DEFAULT ''`, + `event_type TEXT NOT NULL DEFAULT ''`, + `turn_index BIGINT NOT NULL DEFAULT 0`, + `dia_id TEXT NOT NULL DEFAULT ''`, + `speaker TEXT NOT NULL DEFAULT ''`, + `text TEXT NOT NULL DEFAULT ''`, + `turn_summary TEXT NOT NULL DEFAULT ''`, + `source_date_time TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; const tables = await this.listTables(); if (!tables.includes(name)) { log2(`table "${name}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (` + sessionColumns.join(", ") + `) USING deeplake`); log2(`table "${name}" created`); if (!tables.includes(name)) this._tablesCache = [...tables, name]; } - await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); + const alterColumns = [ + ["session_id", `TEXT NOT NULL DEFAULT ''`], + ["event_type", `TEXT NOT NULL DEFAULT ''`], + ["turn_index", `BIGINT NOT NULL DEFAULT 0`], + ["dia_id", `TEXT NOT NULL DEFAULT ''`], + ["speaker", `TEXT NOT NULL DEFAULT ''`], + ["text", `TEXT NOT NULL DEFAULT ''`], + ["turn_summary", `TEXT NOT NULL DEFAULT ''`], + ["source_date_time", `TEXT NOT NULL DEFAULT ''`] + ]; + for (const [column, ddl] of alterColumns) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); } }; diff --git a/codex/bundle/pre-tool-use.js b/codex/bundle/pre-tool-use.js index 48ba30b..245a213 100755 --- a/codex/bundle/pre-tool-use.js +++ b/codex/bundle/pre-tool-use.js @@ -410,17 +410,55 @@ var DeeplakeApi = class { this._tablesCache = [...tables, tbl]; } } - /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ + /** Create the sessions table (one physical row per message/event, with direct search columns). */ async ensureSessionsTable(name) { + const sessionColumns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `message JSONB`, + `session_id TEXT NOT NULL DEFAULT ''`, + `event_type TEXT NOT NULL DEFAULT ''`, + `turn_index BIGINT NOT NULL DEFAULT 0`, + `dia_id TEXT NOT NULL DEFAULT ''`, + `speaker TEXT NOT NULL DEFAULT ''`, + `text TEXT NOT NULL DEFAULT ''`, + `turn_summary TEXT NOT NULL DEFAULT ''`, + `source_date_time TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; const tables = await this.listTables(); if (!tables.includes(name)) { log2(`table "${name}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (` + sessionColumns.join(", ") + `) USING deeplake`); log2(`table "${name}" created`); if (!tables.includes(name)) this._tablesCache = [...tables, name]; } - await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); + const alterColumns = [ + ["session_id", `TEXT NOT NULL DEFAULT ''`], + ["event_type", `TEXT NOT NULL DEFAULT ''`], + ["turn_index", `BIGINT NOT NULL DEFAULT 0`], + ["dia_id", `TEXT NOT NULL DEFAULT ''`], + ["speaker", `TEXT NOT NULL DEFAULT ''`], + ["text", `TEXT NOT NULL DEFAULT ''`], + ["turn_summary", `TEXT NOT NULL DEFAULT ''`], + ["source_date_time", `TEXT NOT NULL DEFAULT ''`] + ]; + for (const [column, ddl] of alterColumns) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); } }; @@ -1829,11 +1867,11 @@ function extractSqlTableRefs(query) { return refs; } function queryReferencesInterceptedTables(query) { - return extractSqlTableRefs(query).some((ref) => ref === "memory" || ref === "sessions" || ref === "sessions_text" || ref === "hivemind.memory" || ref === "hivemind.sessions" || ref === "hivemind.sessions_text"); + return extractSqlTableRefs(query).some((ref) => ref === "memory" || ref === "sessions" || ref === "hivemind.memory" || ref === "hivemind.sessions"); } function queryUsesOnlyInterceptedTables(query) { const refs = extractSqlTableRefs(query); - return refs.length > 0 && refs.every((ref) => ref === "memory" || ref === "sessions" || ref === "sessions_text" || ref === "hivemind.memory" || ref === "hivemind.sessions" || ref === "hivemind.sessions_text"); + return refs.length > 0 && refs.every((ref) => ref === "memory" || ref === "sessions" || ref === "hivemind.memory" || ref === "hivemind.sessions"); } function parsePsqlSegment(pipeline, tokens) { if (tokens[0] !== "psql" || !isPsqlMode()) @@ -1881,23 +1919,19 @@ function parsePsqlSegment(pipeline, tokens) { } function normalizePsqlQuery(query, memoryTable, sessionsTable) { let sql = query.trim().replace(/;+\s*$/, ""); - sql = sql.replace(/\bFROM\s+"?memory"?\b/gi, `FROM "${memoryTable}"`).replace(/\bJOIN\s+"?memory"?\b/gi, `JOIN "${memoryTable}"`).replace(/\bFROM\s+"?sessions_text"?\b/gi, `FROM "__hivemind_sessions_text"`).replace(/\bJOIN\s+"?sessions_text"?\b/gi, `JOIN "__hivemind_sessions_text"`).replace(/\bFROM\s+"?sessions"?\b/gi, `FROM "${sessionsTable}"`).replace(/\bJOIN\s+"?sessions"?\b/gi, `JOIN "${sessionsTable}"`).replace(/\bFROM\s+"?hivemind"?\."?memory"?\b/gi, `FROM "${memoryTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?memory"?\b/gi, `JOIN "${memoryTable}"`).replace(/\bFROM\s+"?hivemind"?\."?sessions_text"?\b/gi, `FROM "__hivemind_sessions_text"`).replace(/\bJOIN\s+"?hivemind"?\."?sessions_text"?\b/gi, `JOIN "__hivemind_sessions_text"`).replace(/\bFROM\s+"?hivemind"?\."?sessions"?\b/gi, `FROM "${sessionsTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?sessions"?\b/gi, `JOIN "${sessionsTable}"`); - if (/\b__hivemind_sessions_text\b/i.test(sql)) { - const cte = `"__hivemind_sessions_text" AS (SELECT path, creation_date, message::text AS message_text FROM "${sessionsTable}")`; - sql = /^\s*with\b/i.test(sql) ? sql.replace(/^\s*with\b/i, `WITH ${cte},`) : `WITH ${cte} ${sql}`; - } + sql = sql.replace(/\bFROM\s+"?memory"?\b/gi, `FROM "${memoryTable}"`).replace(/\bJOIN\s+"?memory"?\b/gi, `JOIN "${memoryTable}"`).replace(/\bFROM\s+"?sessions"?\b/gi, `FROM "${sessionsTable}"`).replace(/\bJOIN\s+"?sessions"?\b/gi, `JOIN "${sessionsTable}"`).replace(/\bFROM\s+"?hivemind"?\."?memory"?\b/gi, `FROM "${memoryTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?memory"?\b/gi, `JOIN "${memoryTable}"`).replace(/\bFROM\s+"?hivemind"?\."?sessions"?\b/gi, `FROM "${sessionsTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?sessions"?\b/gi, `JOIN "${sessionsTable}"`); return sql; } function validatePsqlQuery(query, memoryTable, sessionsTable) { if (!queryUsesOnlyInterceptedTables(query)) { - throw new Error("psql queries must reference only memory, sessions, sessions_text, hivemind.memory, hivemind.sessions, or hivemind.sessions_text"); + throw new Error("psql queries must reference only memory, sessions, hivemind.memory, or hivemind.sessions"); } const sql = normalizePsqlQuery(query, memoryTable, sessionsTable); const compact = sql.replace(/\s+/g, " ").trim(); if (!/^(select|with)\b/i.test(compact)) { throw new Error("psql mode only supports SELECT queries"); } - const allowedTables = /* @__PURE__ */ new Set([memoryTable, sessionsTable, "__hivemind_sessions_text"]); + const allowedTables = /* @__PURE__ */ new Set([memoryTable, sessionsTable]); const tableMatches = [...compact.matchAll(/\b(?:from|join)\s+"?([a-zA-Z_][a-zA-Z0-9_]*)"?/gi)]; if (tableMatches.length === 0) { throw new Error("psql query must reference memory or sessions"); @@ -2211,8 +2245,7 @@ async function executeCompiledBashCommand(api, memoryTable, sessionsTable, cmd, continue; } if (segment.kind === "psql") { - const sql = validatePsqlQuery(segment.query, memoryTable, sessionsTable); - const rows = await api.query(sql); + const rows = await api.query(validatePsqlQuery(segment.query, memoryTable, sessionsTable)); const formatted = formatPsqlRows(rows, segment.tuplesOnly, segment.fieldSeparator); const limited = segment.lineLimit > 0 ? formatted.split("\n").slice(0, segment.lineLimit).join("\n") : formatted; outputs.push(limited); diff --git a/codex/bundle/session-start-setup.js b/codex/bundle/session-start-setup.js index f6d17c7..8cb1e70 100755 --- a/codex/bundle/session-start-setup.js +++ b/codex/bundle/session-start-setup.js @@ -419,17 +419,55 @@ var DeeplakeApi = class { this._tablesCache = [...tables, tbl]; } } - /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ + /** Create the sessions table (one physical row per message/event, with direct search columns). */ async ensureSessionsTable(name) { + const sessionColumns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `message JSONB`, + `session_id TEXT NOT NULL DEFAULT ''`, + `event_type TEXT NOT NULL DEFAULT ''`, + `turn_index BIGINT NOT NULL DEFAULT 0`, + `dia_id TEXT NOT NULL DEFAULT ''`, + `speaker TEXT NOT NULL DEFAULT ''`, + `text TEXT NOT NULL DEFAULT ''`, + `turn_summary TEXT NOT NULL DEFAULT ''`, + `source_date_time TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; const tables = await this.listTables(); if (!tables.includes(name)) { log2(`table "${name}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (` + sessionColumns.join(", ") + `) USING deeplake`); log2(`table "${name}" created`); if (!tables.includes(name)) this._tablesCache = [...tables, name]; } - await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); + const alterColumns = [ + ["session_id", `TEXT NOT NULL DEFAULT ''`], + ["event_type", `TEXT NOT NULL DEFAULT ''`], + ["turn_index", `BIGINT NOT NULL DEFAULT 0`], + ["dia_id", `TEXT NOT NULL DEFAULT ''`], + ["speaker", `TEXT NOT NULL DEFAULT ''`], + ["text", `TEXT NOT NULL DEFAULT ''`], + ["turn_summary", `TEXT NOT NULL DEFAULT ''`], + ["source_date_time", `TEXT NOT NULL DEFAULT ''`] + ]; + for (const [column, ddl] of alterColumns) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); } }; @@ -485,10 +523,10 @@ function buildSessionInsertSql(sessionsTable, rows) { throw new Error("buildSessionInsertSql: rows must not be empty"); const table = sqlIdent(sessionsTable); const values = rows.map((row) => { - const jsonForSql = sqlStr(coerceJsonbPayload(row.message)); - return `('${sqlStr(row.id)}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', '${jsonForSql}'::jsonb, '${sqlStr(row.author)}', ${row.sizeBytes}, '${sqlStr(row.project)}', '${sqlStr(row.description)}', '${sqlStr(row.agent)}', '${sqlStr(row.creationDate)}', '${sqlStr(row.lastUpdateDate)}')`; + const jsonForSql = escapeJsonbLiteral(coerceJsonbPayload(row.message)); + return `('${sqlStr(row.id)}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', '${jsonForSql}'::jsonb, '${sqlStr(row.sessionId)}', '${sqlStr(row.eventType)}', ${row.turnIndex}, '${sqlStr(row.diaId)}', '${sqlStr(row.speaker)}', '${sqlStr(row.text)}', '${sqlStr(row.turnSummary)}', '${sqlStr(row.sourceDateTime)}', '${sqlStr(row.author)}', ${row.sizeBytes}, '${sqlStr(row.project)}', '${sqlStr(row.description)}', '${sqlStr(row.agent)}', '${sqlStr(row.creationDate)}', '${sqlStr(row.lastUpdateDate)}')`; }).join(", "); - return `INSERT INTO "${table}" (id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ${values}`; + return `INSERT INTO "${table}" (id, path, filename, message, session_id, event_type, turn_index, dia_id, speaker, text, turn_summary, source_date_time, author, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ${values}`; } function coerceJsonbPayload(message) { try { @@ -500,6 +538,9 @@ function coerceJsonbPayload(message) { }); } } +function escapeJsonbLiteral(value) { + return value.replace(/'/g, "''").replace(/\0/g, ""); +} async function flushSessionQueue(api, opts) { const queueDir = opts.queueDir ?? DEFAULT_QUEUE_DIR; const maxBatchRows = opts.maxBatchRows ?? DEFAULT_MAX_BATCH_ROWS; diff --git a/codex/bundle/session-start.js b/codex/bundle/session-start.js index 9ab2ac7..59c5cf4 100755 --- a/codex/bundle/session-start.js +++ b/codex/bundle/session-start.js @@ -167,8 +167,7 @@ var CODEX_SESSION_START_CONTEXT_PSQL = `DEEPLAKE MEMORY SQL MODE: Use SQL only f Available tables: - memory(path, summary, project, description, creation_date, last_update_date) -- sessions_text(path, creation_date, message_text) -- sessions(path, message, creation_date) +- sessions(path, creation_date, turn_index, event_type, dia_id, speaker, text, turn_summary, source_date_time, message) Use this command shape: - psql -At -F '|' -c "SELECT ..." @@ -177,11 +176,13 @@ Workflow: 1. Query memory first to identify likely summaries. 2. In the first pass, combine the named person/entity term with one or more topic terms. Prefer narrow AND filters over broad OR filters. 3. Re-query memory by exact path for the small set of summary rows you selected. -4. Query sessions_text by exact path for transcript evidence or unresolved dates. +4. Query sessions by exact path for transcript evidence or unresolved dates. 5. Prefer small targeted SELECTs with ORDER BY and LIMIT 5-10. 6. Do not use filesystem commands, grep, cat, ls, Read, or Glob for recall in this mode. 7. If the first summary query returns 0-3 weak rows or the answer still seems semantically off, retry with BM25 ranking on memory before concluding the data is absent. -8. Use sessions only when you need the raw structured payload; use sessions_text for normal text filtering. +8. Use sessions.text, sessions.speaker, sessions.turn_index, and sessions.source_date_time for transcript retrieval. Use sessions.message only when you need the raw JSON payload. +9. If a summary answer is vague or relative (for example "home country", "next month", "last week"), immediately open the linked sessions rows and convert it to the most concrete answer supported there. +10. For identity, origin, relationship, and "what did they decide" questions, prefer the exact self-description or named entity from sessions over a paraphrased summary label. Good query patterns: - Candidate summaries: @@ -189,22 +190,24 @@ Good query patterns: - Exact summary reread: psql -At -F '|' -c "SELECT path, summary FROM memory WHERE path IN ('/summaries/...', '/summaries/...')" - Transcript grounding by exact path: - psql -At -F '|' -c "SELECT path, creation_date, message_text FROM sessions_text WHERE path IN ('/sessions/...', '/sessions/...') ORDER BY creation_date ASC" + psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') ORDER BY path ASC, turn_index ASC" - Transcript search inside known sessions: - psql -At -F '|' -c "SELECT path, creation_date, message_text FROM sessions_text WHERE path IN ('/sessions/...', '/sessions/...') AND message_text ILIKE '%%' ORDER BY creation_date ASC" + psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') AND (speaker ILIKE '%%' OR text ILIKE '%%') ORDER BY path ASC, turn_index ASC" - If literal ILIKE retrieval is sparse or semantically weak, retry with BM25 text ranking on summaries: psql -At -F '|' -c "SELECT path, summary, summary <#> ' ' AS score FROM memory WHERE summary ILIKE '%%' ORDER BY score DESC LIMIT 5" Avoid these mistakes: - Do NOT search person names via path ILIKE. Person names live in summary text, not session paths. -- Do NOT filter sessions.message directly. Use sessions_text.message_text for transcript text search. +- Do NOT filter sessions.message directly when sessions.text / sessions.speaker already contain the needed transcript fields. - Do NOT blend multiple different events when the question asks about one specific event. Prefer the most direct supporting row. Answer rules: - Return the smallest exact answer supported by the data. - Resolve relative dates against session metadata, not today's date. -- Do not answer "not found" until you have checked both memory and a likely sessions_text row. +- Do not answer "not found" until you have checked both memory and a likely sessions row. - Preserve direct relative-duration answers when they already match the question. +- If the transcript already directly answers with a relative duration like "10 years ago", return that phrase instead of recalculating to today's date. +- If a summary says something vague like "home country", search sessions for the exact named place before answering. - Aggregate across the small candidate set before answering profile or list questions. - For "likely", "would", or profile questions, a concise inference from strong summary evidence is allowed even if the exact final phrase is not quoted verbatim. diff --git a/codex/bundle/shell/deeplake-shell.js b/codex/bundle/shell/deeplake-shell.js index 84cf810..14f7835 100755 --- a/codex/bundle/shell/deeplake-shell.js +++ b/codex/bundle/shell/deeplake-shell.js @@ -67107,17 +67107,55 @@ var DeeplakeApi = class { this._tablesCache = [...tables, tbl]; } } - /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ + /** Create the sessions table (one physical row per message/event, with direct search columns). */ async ensureSessionsTable(name) { + const sessionColumns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `message JSONB`, + `session_id TEXT NOT NULL DEFAULT ''`, + `event_type TEXT NOT NULL DEFAULT ''`, + `turn_index BIGINT NOT NULL DEFAULT 0`, + `dia_id TEXT NOT NULL DEFAULT ''`, + `speaker TEXT NOT NULL DEFAULT ''`, + `text TEXT NOT NULL DEFAULT ''`, + `turn_summary TEXT NOT NULL DEFAULT ''`, + `source_date_time TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; const tables = await this.listTables(); if (!tables.includes(name)) { log2(`table "${name}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (` + sessionColumns.join(", ") + `) USING deeplake`); log2(`table "${name}" created`); if (!tables.includes(name)) this._tablesCache = [...tables, name]; } - await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); + const alterColumns = [ + ["session_id", `TEXT NOT NULL DEFAULT ''`], + ["event_type", `TEXT NOT NULL DEFAULT ''`], + ["turn_index", `BIGINT NOT NULL DEFAULT 0`], + ["dia_id", `TEXT NOT NULL DEFAULT ''`], + ["speaker", `TEXT NOT NULL DEFAULT ''`], + ["text", `TEXT NOT NULL DEFAULT ''`], + ["turn_summary", `TEXT NOT NULL DEFAULT ''`], + ["source_date_time", `TEXT NOT NULL DEFAULT ''`] + ]; + for (const [column, ddl] of alterColumns) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); } }; @@ -68175,7 +68213,7 @@ var DeeplakeFs = class _DeeplakeFs { return buf2; } if (this.sessionPaths.has(p22) && this.sessionsTable) { - const rows2 = await this.client.query(`SELECT message FROM "${this.sessionsTable}" WHERE path = '${sqlStr(p22)}' ORDER BY creation_date ASC`); + const rows2 = await this.client.query(`SELECT message FROM "${this.sessionsTable}" WHERE path = '${sqlStr(p22)}' ORDER BY creation_date ASC, turn_index ASC`); if (rows2.length === 0) throw fsErr("ENOENT", "no such file or directory", p22); const text = joinSessionMessages(p22, rows2.map((row) => row["message"])); @@ -68213,7 +68251,7 @@ var DeeplakeFs = class _DeeplakeFs { if (pend) return pend.contentText; if (this.sessionPaths.has(p22) && this.sessionsTable) { - const rows2 = await this.client.query(`SELECT message FROM "${this.sessionsTable}" WHERE path = '${sqlStr(p22)}' ORDER BY creation_date ASC`); + const rows2 = await this.client.query(`SELECT message FROM "${this.sessionsTable}" WHERE path = '${sqlStr(p22)}' ORDER BY creation_date ASC, turn_index ASC`); if (rows2.length === 0) throw fsErr("ENOENT", "no such file or directory", p22); const text2 = joinSessionMessages(p22, rows2.map((row) => row["message"])); diff --git a/codex/bundle/stop.js b/codex/bundle/stop.js index a149f2e..e0d68ea 100755 --- a/codex/bundle/stop.js +++ b/codex/bundle/stop.js @@ -410,17 +410,55 @@ var DeeplakeApi = class { this._tablesCache = [...tables, tbl]; } } - /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ + /** Create the sessions table (one physical row per message/event, with direct search columns). */ async ensureSessionsTable(name) { + const sessionColumns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `message JSONB`, + `session_id TEXT NOT NULL DEFAULT ''`, + `event_type TEXT NOT NULL DEFAULT ''`, + `turn_index BIGINT NOT NULL DEFAULT 0`, + `dia_id TEXT NOT NULL DEFAULT ''`, + `speaker TEXT NOT NULL DEFAULT ''`, + `text TEXT NOT NULL DEFAULT ''`, + `turn_summary TEXT NOT NULL DEFAULT ''`, + `source_date_time TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; const tables = await this.listTables(); if (!tables.includes(name)) { log2(`table "${name}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (` + sessionColumns.join(", ") + `) USING deeplake`); log2(`table "${name}" created`); if (!tables.includes(name)) this._tablesCache = [...tables, name]; } - await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); + const alterColumns = [ + ["session_id", `TEXT NOT NULL DEFAULT ''`], + ["event_type", `TEXT NOT NULL DEFAULT ''`], + ["turn_index", `BIGINT NOT NULL DEFAULT 0`], + ["dia_id", `TEXT NOT NULL DEFAULT ''`], + ["speaker", `TEXT NOT NULL DEFAULT ''`], + ["text", `TEXT NOT NULL DEFAULT ''`], + ["turn_summary", `TEXT NOT NULL DEFAULT ''`], + ["source_date_time", `TEXT NOT NULL DEFAULT ''`] + ]; + for (const [column, ddl] of alterColumns) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); } }; @@ -575,11 +613,20 @@ function buildSessionPath(config, sessionId) { return `/sessions/${config.userName}/${config.userName}_${config.orgName}_${config.workspaceId}_${sessionId}.jsonl`; } function buildQueuedSessionRow(args) { + const structured = extractStructuredSessionFields(args.line, args.sessionId); return { id: crypto.randomUUID(), path: args.sessionPath, filename: args.sessionPath.split("/").pop() ?? "", message: args.line, + sessionId: structured.sessionId, + eventType: structured.eventType, + turnIndex: structured.turnIndex, + diaId: structured.diaId, + speaker: structured.speaker, + text: structured.text, + turnSummary: structured.turnSummary, + sourceDateTime: structured.sourceDateTime, author: args.userName, sizeBytes: Buffer.byteLength(args.line, "utf-8"), project: args.projectName, @@ -602,10 +649,10 @@ function buildSessionInsertSql(sessionsTable, rows) { throw new Error("buildSessionInsertSql: rows must not be empty"); const table = sqlIdent(sessionsTable); const values = rows.map((row) => { - const jsonForSql = sqlStr(coerceJsonbPayload(row.message)); - return `('${sqlStr(row.id)}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', '${jsonForSql}'::jsonb, '${sqlStr(row.author)}', ${row.sizeBytes}, '${sqlStr(row.project)}', '${sqlStr(row.description)}', '${sqlStr(row.agent)}', '${sqlStr(row.creationDate)}', '${sqlStr(row.lastUpdateDate)}')`; + const jsonForSql = escapeJsonbLiteral(coerceJsonbPayload(row.message)); + return `('${sqlStr(row.id)}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', '${jsonForSql}'::jsonb, '${sqlStr(row.sessionId)}', '${sqlStr(row.eventType)}', ${row.turnIndex}, '${sqlStr(row.diaId)}', '${sqlStr(row.speaker)}', '${sqlStr(row.text)}', '${sqlStr(row.turnSummary)}', '${sqlStr(row.sourceDateTime)}', '${sqlStr(row.author)}', ${row.sizeBytes}, '${sqlStr(row.project)}', '${sqlStr(row.description)}', '${sqlStr(row.agent)}', '${sqlStr(row.creationDate)}', '${sqlStr(row.lastUpdateDate)}')`; }).join(", "); - return `INSERT INTO "${table}" (id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ${values}`; + return `INSERT INTO "${table}" (id, path, filename, message, session_id, event_type, turn_index, dia_id, speaker, text, turn_summary, source_date_time, author, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ${values}`; } function coerceJsonbPayload(message) { try { @@ -617,6 +664,59 @@ function coerceJsonbPayload(message) { }); } } +function escapeJsonbLiteral(value) { + return value.replace(/'/g, "''").replace(/\0/g, ""); +} +function extractString(value) { + return typeof value === "string" ? value : value == null ? "" : String(value); +} +function extractNumber(value) { + if (typeof value === "number" && Number.isFinite(value)) + return value; + if (typeof value === "string" && value.trim() !== "") { + const parsed = Number(value); + if (Number.isFinite(parsed)) + return parsed; + } + return 0; +} +function extractStructuredSessionFields(message, fallbackSessionId = "") { + let parsed = null; + try { + const raw = JSON.parse(message); + if (raw && typeof raw === "object") + parsed = raw; + } catch { + parsed = null; + } + if (!parsed) { + return { + sessionId: fallbackSessionId, + eventType: "raw_message", + turnIndex: 0, + diaId: "", + speaker: "", + text: message, + turnSummary: "", + sourceDateTime: "" + }; + } + const eventType = extractString(parsed["type"]); + const content = extractString(parsed["content"]); + const toolName = extractString(parsed["tool_name"]); + const speaker = extractString(parsed["speaker"]) || (eventType === "user_message" ? "user" : eventType === "assistant_message" ? "assistant" : ""); + const text = extractString(parsed["text"]) || content || (eventType === "tool_call" ? toolName : ""); + return { + sessionId: extractString(parsed["session_id"]) || fallbackSessionId, + eventType, + turnIndex: extractNumber(parsed["turn_index"]), + diaId: extractString(parsed["dia_id"]), + speaker, + text, + turnSummary: extractString(parsed["summary"]) || extractString(parsed["message_summary"]) || extractString(parsed["msg_summary"]), + sourceDateTime: extractString(parsed["source_date_time"]) || extractString(parsed["date_time"]) || extractString(parsed["date"]) + }; +} async function flushSessionQueue(api, opts) { const queueDir = opts.queueDir ?? DEFAULT_QUEUE_DIR; const maxBatchRows = opts.maxBatchRows ?? DEFAULT_MAX_BATCH_ROWS; @@ -869,6 +969,7 @@ async function runCodexStopHook(input, deps = {}) { appendQueuedSessionRowFn(buildQueuedSessionRowFn({ sessionPath, line, + sessionId: input.session_id, userName: config.userName, projectName, description: "Stop", diff --git a/codex/bundle/wiki-worker.js b/codex/bundle/wiki-worker.js index 0ec253c..3b50821 100755 --- a/codex/bundle/wiki-worker.js +++ b/codex/bundle/wiki-worker.js @@ -209,7 +209,7 @@ function cleanup() { async function main() { try { wlog("fetching session events"); - const rows = await query(`SELECT message, creation_date FROM "${cfg.sessionsTable}" WHERE path LIKE E'${esc2(`/sessions/%${cfg.sessionId}%`)}' ORDER BY creation_date ASC`); + const rows = await query(`SELECT message, creation_date FROM "${cfg.sessionsTable}" WHERE path LIKE E'${esc2(`/sessions/%${cfg.sessionId}%`)}' ORDER BY creation_date ASC, turn_index ASC`); if (rows.length === 0) { wlog("no session events found \u2014 exiting"); return; diff --git a/src/deeplake-api.ts b/src/deeplake-api.ts index 0f4a261..e3a72ec 100644 --- a/src/deeplake-api.ts +++ b/src/deeplake-api.ts @@ -396,30 +396,58 @@ export class DeeplakeApi { // } catch { /* index may already exist or not be supported */ } } - /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ + /** Create the sessions table (one physical row per message/event, with direct search columns). */ async ensureSessionsTable(name: string): Promise { + const sessionColumns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `message JSONB`, + `session_id TEXT NOT NULL DEFAULT ''`, + `event_type TEXT NOT NULL DEFAULT ''`, + `turn_index BIGINT NOT NULL DEFAULT 0`, + `dia_id TEXT NOT NULL DEFAULT ''`, + `speaker TEXT NOT NULL DEFAULT ''`, + `text TEXT NOT NULL DEFAULT ''`, + `turn_summary TEXT NOT NULL DEFAULT ''`, + `source_date_time TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''`, + ]; const tables = await this.listTables(); if (!tables.includes(name)) { log(`table "${name}" not found, creating`); await this.query( `CREATE TABLE IF NOT EXISTS "${name}" (` + - `id TEXT NOT NULL DEFAULT '', ` + - `path TEXT NOT NULL DEFAULT '', ` + - `filename TEXT NOT NULL DEFAULT '', ` + - `message JSONB, ` + - `author TEXT NOT NULL DEFAULT '', ` + - `mime_type TEXT NOT NULL DEFAULT 'application/json', ` + - `size_bytes BIGINT NOT NULL DEFAULT 0, ` + - `project TEXT NOT NULL DEFAULT '', ` + - `description TEXT NOT NULL DEFAULT '', ` + - `agent TEXT NOT NULL DEFAULT '', ` + - `creation_date TEXT NOT NULL DEFAULT '', ` + - `last_update_date TEXT NOT NULL DEFAULT ''` + + sessionColumns.join(", ") + `) USING deeplake`, ); log(`table "${name}" created`); if (!tables.includes(name)) this._tablesCache = [...tables, name]; } - await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`); + const alterColumns: Array<[string, string]> = [ + ["session_id", `TEXT NOT NULL DEFAULT ''`], + ["event_type", `TEXT NOT NULL DEFAULT ''`], + ["turn_index", `BIGINT NOT NULL DEFAULT 0`], + ["dia_id", `TEXT NOT NULL DEFAULT ''`], + ["speaker", `TEXT NOT NULL DEFAULT ''`], + ["text", `TEXT NOT NULL DEFAULT ''`], + ["turn_summary", `TEXT NOT NULL DEFAULT ''`], + ["source_date_time", `TEXT NOT NULL DEFAULT ''`], + ]; + for (const [column, ddl] of alterColumns) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + // Some backends may not support ADD COLUMN IF NOT EXISTS; keep going so older tables still work. + } + } + await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); } } diff --git a/src/hooks/bash-command-compiler.ts b/src/hooks/bash-command-compiler.ts index fe09bbd..afa51c1 100644 --- a/src/hooks/bash-command-compiler.ts +++ b/src/hooks/bash-command-compiler.ts @@ -283,10 +283,8 @@ export function queryReferencesInterceptedTables(query: string): boolean { return extractSqlTableRefs(query).some((ref) => ref === "memory" || ref === "sessions" || - ref === "sessions_text" || ref === "hivemind.memory" || - ref === "hivemind.sessions" || - ref === "hivemind.sessions_text"); + ref === "hivemind.sessions"); } export function queryUsesOnlyInterceptedTables(query: string): boolean { @@ -294,10 +292,8 @@ export function queryUsesOnlyInterceptedTables(query: string): boolean { return refs.length > 0 && refs.every((ref) => ref === "memory" || ref === "sessions" || - ref === "sessions_text" || ref === "hivemind.memory" || - ref === "hivemind.sessions" || - ref === "hivemind.sessions_text"); + ref === "hivemind.sessions"); } export function queryUsesBareMemoryTables(query: string): boolean { @@ -352,35 +348,25 @@ function normalizePsqlQuery(query: string, memoryTable: string, sessionsTable: s sql = sql .replace(/\bFROM\s+"?memory"?\b/gi, `FROM "${memoryTable}"`) .replace(/\bJOIN\s+"?memory"?\b/gi, `JOIN "${memoryTable}"`) - .replace(/\bFROM\s+"?sessions_text"?\b/gi, `FROM "__hivemind_sessions_text"`) - .replace(/\bJOIN\s+"?sessions_text"?\b/gi, `JOIN "__hivemind_sessions_text"`) .replace(/\bFROM\s+"?sessions"?\b/gi, `FROM "${sessionsTable}"`) .replace(/\bJOIN\s+"?sessions"?\b/gi, `JOIN "${sessionsTable}"`) .replace(/\bFROM\s+"?hivemind"?\."?memory"?\b/gi, `FROM "${memoryTable}"`) .replace(/\bJOIN\s+"?hivemind"?\."?memory"?\b/gi, `JOIN "${memoryTable}"`) - .replace(/\bFROM\s+"?hivemind"?\."?sessions_text"?\b/gi, `FROM "__hivemind_sessions_text"`) - .replace(/\bJOIN\s+"?hivemind"?\."?sessions_text"?\b/gi, `JOIN "__hivemind_sessions_text"`) .replace(/\bFROM\s+"?hivemind"?\."?sessions"?\b/gi, `FROM "${sessionsTable}"`) .replace(/\bJOIN\s+"?hivemind"?\."?sessions"?\b/gi, `JOIN "${sessionsTable}"`); - if (/\b__hivemind_sessions_text\b/i.test(sql)) { - const cte = `"__hivemind_sessions_text" AS (SELECT path, creation_date, message::text AS message_text FROM "${sessionsTable}")`; - sql = /^\s*with\b/i.test(sql) - ? sql.replace(/^\s*with\b/i, `WITH ${cte},`) - : `WITH ${cte} ${sql}`; - } return sql; } function validatePsqlQuery(query: string, memoryTable: string, sessionsTable: string): string { if (!queryUsesOnlyInterceptedTables(query)) { - throw new Error("psql queries must reference only memory, sessions, sessions_text, hivemind.memory, hivemind.sessions, or hivemind.sessions_text"); + throw new Error("psql queries must reference only memory, sessions, hivemind.memory, or hivemind.sessions"); } const sql = normalizePsqlQuery(query, memoryTable, sessionsTable); const compact = sql.replace(/\s+/g, " ").trim(); if (!/^(select|with)\b/i.test(compact)) { throw new Error("psql mode only supports SELECT queries"); } - const allowedTables = new Set([memoryTable, sessionsTable, "__hivemind_sessions_text"]); + const allowedTables = new Set([memoryTable, sessionsTable]); const tableMatches = [...compact.matchAll(/\b(?:from|join)\s+"?([a-zA-Z_][a-zA-Z0-9_]*)"?/gi)]; if (tableMatches.length === 0) { throw new Error("psql query must reference memory or sessions"); @@ -726,8 +712,7 @@ export async function executeCompiledBashCommand( } if (segment.kind === "psql") { - const sql = validatePsqlQuery(segment.query, memoryTable, sessionsTable); - const rows = await api.query(sql); + const rows = await api.query(validatePsqlQuery(segment.query, memoryTable, sessionsTable)); const formatted = formatPsqlRows(rows, segment.tuplesOnly, segment.fieldSeparator); const limited = segment.lineLimit > 0 ? formatted.split("\n").slice(0, segment.lineLimit).join("\n") : formatted; outputs.push(limited); diff --git a/src/hooks/capture.ts b/src/hooks/capture.ts index ae90ad8..adb8e07 100644 --- a/src/hooks/capture.ts +++ b/src/hooks/capture.ts @@ -209,6 +209,7 @@ export async function runCaptureHook(input: HookInput, deps: CaptureHookDeps = { appendQueuedSessionRowFn(buildQueuedSessionRowFn({ sessionPath, line, + sessionId: input.session_id, userName: config.userName, projectName, description: input.hook_event_name ?? "", diff --git a/src/hooks/codex/capture.ts b/src/hooks/codex/capture.ts index 615b72d..5908b4a 100644 --- a/src/hooks/codex/capture.ts +++ b/src/hooks/codex/capture.ts @@ -179,6 +179,7 @@ export async function runCodexCaptureHook(input: CodexHookInput, deps: CodexCapt appendQueuedSessionRowFn(buildQueuedSessionRowFn({ sessionPath, line, + sessionId: input.session_id, userName: config.userName, projectName, description: input.hook_event_name ?? "", diff --git a/src/hooks/codex/session-start.ts b/src/hooks/codex/session-start.ts index 30c18e1..1cd875f 100644 --- a/src/hooks/codex/session-start.ts +++ b/src/hooks/codex/session-start.ts @@ -77,8 +77,7 @@ export const CODEX_SESSION_START_CONTEXT_PSQL = `DEEPLAKE MEMORY SQL MODE: Use S Available tables: - memory(path, summary, project, description, creation_date, last_update_date) -- sessions_text(path, creation_date, message_text) -- sessions(path, message, creation_date) +- sessions(path, creation_date, turn_index, event_type, dia_id, speaker, text, turn_summary, source_date_time, message) Use this command shape: - psql -At -F '|' -c "SELECT ..." @@ -87,11 +86,13 @@ Workflow: 1. Query memory first to identify likely summaries. 2. In the first pass, combine the named person/entity term with one or more topic terms. Prefer narrow AND filters over broad OR filters. 3. Re-query memory by exact path for the small set of summary rows you selected. -4. Query sessions_text by exact path for transcript evidence or unresolved dates. +4. Query sessions by exact path for transcript evidence or unresolved dates. 5. Prefer small targeted SELECTs with ORDER BY and LIMIT 5-10. 6. Do not use filesystem commands, grep, cat, ls, Read, or Glob for recall in this mode. 7. If the first summary query returns 0-3 weak rows or the answer still seems semantically off, retry with BM25 ranking on memory before concluding the data is absent. -8. Use sessions only when you need the raw structured payload; use sessions_text for normal text filtering. +8. Use sessions.text, sessions.speaker, sessions.turn_index, and sessions.source_date_time for transcript retrieval. Use sessions.message only when you need the raw JSON payload. +9. If a summary answer is vague or relative (for example "home country", "next month", "last week"), immediately open the linked sessions rows and convert it to the most concrete answer supported there. +10. For identity, origin, relationship, and "what did they decide" questions, prefer the exact self-description or named entity from sessions over a paraphrased summary label. Good query patterns: - Candidate summaries: @@ -99,22 +100,24 @@ Good query patterns: - Exact summary reread: psql -At -F '|' -c "SELECT path, summary FROM memory WHERE path IN ('/summaries/...', '/summaries/...')" - Transcript grounding by exact path: - psql -At -F '|' -c "SELECT path, creation_date, message_text FROM sessions_text WHERE path IN ('/sessions/...', '/sessions/...') ORDER BY creation_date ASC" + psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') ORDER BY path ASC, turn_index ASC" - Transcript search inside known sessions: - psql -At -F '|' -c "SELECT path, creation_date, message_text FROM sessions_text WHERE path IN ('/sessions/...', '/sessions/...') AND message_text ILIKE '%%' ORDER BY creation_date ASC" + psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') AND (speaker ILIKE '%%' OR text ILIKE '%%') ORDER BY path ASC, turn_index ASC" - If literal ILIKE retrieval is sparse or semantically weak, retry with BM25 text ranking on summaries: psql -At -F '|' -c "SELECT path, summary, summary <#> ' ' AS score FROM memory WHERE summary ILIKE '%%' ORDER BY score DESC LIMIT 5" Avoid these mistakes: - Do NOT search person names via path ILIKE. Person names live in summary text, not session paths. -- Do NOT filter sessions.message directly. Use sessions_text.message_text for transcript text search. +- Do NOT filter sessions.message directly when sessions.text / sessions.speaker already contain the needed transcript fields. - Do NOT blend multiple different events when the question asks about one specific event. Prefer the most direct supporting row. Answer rules: - Return the smallest exact answer supported by the data. - Resolve relative dates against session metadata, not today's date. -- Do not answer "not found" until you have checked both memory and a likely sessions_text row. +- Do not answer "not found" until you have checked both memory and a likely sessions row. - Preserve direct relative-duration answers when they already match the question. +- If the transcript already directly answers with a relative duration like "10 years ago", return that phrase instead of recalculating to today's date. +- If a summary says something vague like "home country", search sessions for the exact named place before answering. - Aggregate across the small candidate set before answering profile or list questions. - For "likely", "would", or profile questions, a concise inference from strong summary evidence is allowed even if the exact final phrase is not quoted verbatim. diff --git a/src/hooks/codex/stop.ts b/src/hooks/codex/stop.ts index 9118f97..9393c34 100644 --- a/src/hooks/codex/stop.ts +++ b/src/hooks/codex/stop.ts @@ -148,6 +148,7 @@ export async function runCodexStopHook(input: CodexStopInput, deps: CodexStopDep appendQueuedSessionRowFn(buildQueuedSessionRowFn({ sessionPath, line, + sessionId: input.session_id, userName: config.userName, projectName, description: "Stop", diff --git a/src/hooks/codex/wiki-worker.ts b/src/hooks/codex/wiki-worker.ts index a7c50f8..cf93218 100644 --- a/src/hooks/codex/wiki-worker.ts +++ b/src/hooks/codex/wiki-worker.ts @@ -97,7 +97,7 @@ async function main(): Promise { wlog("fetching session events"); const rows = await query( `SELECT message, creation_date FROM "${cfg.sessionsTable}" ` + - `WHERE path LIKE E'${esc(`/sessions/%${cfg.sessionId}%`)}' ORDER BY creation_date ASC` + `WHERE path LIKE E'${esc(`/sessions/%${cfg.sessionId}%`)}' ORDER BY creation_date ASC, turn_index ASC` ); if (rows.length === 0) { diff --git a/src/hooks/session-queue.ts b/src/hooks/session-queue.ts index 1157a44..6d17c7a 100644 --- a/src/hooks/session-queue.ts +++ b/src/hooks/session-queue.ts @@ -25,6 +25,14 @@ export interface QueuedSessionRow { path: string; filename: string; message: string; + sessionId: string; + eventType: string; + turnIndex: number; + diaId: string; + speaker: string; + text: string; + turnSummary: string; + sourceDateTime: string; author: string; sizeBytes: number; project: string; @@ -92,17 +100,27 @@ export function buildSessionPath(config: { userName: string; orgName: string; wo export function buildQueuedSessionRow(args: { sessionPath: string; line: string; + sessionId?: string; userName: string; projectName: string; description: string; agent: string; timestamp: string; }): QueuedSessionRow { + const structured = extractStructuredSessionFields(args.line, args.sessionId); return { id: crypto.randomUUID(), path: args.sessionPath, filename: args.sessionPath.split("/").pop() ?? "", message: args.line, + sessionId: structured.sessionId, + eventType: structured.eventType, + turnIndex: structured.turnIndex, + diaId: structured.diaId, + speaker: structured.speaker, + text: structured.text, + turnSummary: structured.turnSummary, + sourceDateTime: structured.sourceDateTime, author: args.userName, sizeBytes: Buffer.byteLength(args.line, "utf-8"), project: args.projectName, @@ -125,9 +143,11 @@ export function buildSessionInsertSql(sessionsTable: string, rows: QueuedSession if (rows.length === 0) throw new Error("buildSessionInsertSql: rows must not be empty"); const table = sqlIdent(sessionsTable); const values = rows.map((row) => { - const jsonForSql = sqlStr(coerceJsonbPayload(row.message)); + const jsonForSql = escapeJsonbLiteral(coerceJsonbPayload(row.message)); return ( `('${sqlStr(row.id)}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', '${jsonForSql}'::jsonb, ` + + `'${sqlStr(row.sessionId)}', '${sqlStr(row.eventType)}', ${row.turnIndex}, '${sqlStr(row.diaId)}', ` + + `'${sqlStr(row.speaker)}', '${sqlStr(row.text)}', '${sqlStr(row.turnSummary)}', '${sqlStr(row.sourceDateTime)}', ` + `'${sqlStr(row.author)}', ${row.sizeBytes}, '${sqlStr(row.project)}', '${sqlStr(row.description)}', ` + `'${sqlStr(row.agent)}', '${sqlStr(row.creationDate)}', '${sqlStr(row.lastUpdateDate)}')` ); @@ -135,7 +155,7 @@ export function buildSessionInsertSql(sessionsTable: string, rows: QueuedSession return ( `INSERT INTO "${table}" ` + - `(id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date) ` + + `(id, path, filename, message, session_id, event_type, turn_index, dia_id, speaker, text, turn_summary, source_date_time, author, size_bytes, project, description, agent, creation_date, last_update_date) ` + `VALUES ${values}` ); } @@ -151,6 +171,77 @@ function coerceJsonbPayload(message: string): string { } } +function escapeJsonbLiteral(value: string): string { + return value + .replace(/'/g, "''") + .replace(/\0/g, ""); +} + +function extractString(value: unknown): string { + return typeof value === "string" ? value : value == null ? "" : String(value); +} + +function extractNumber(value: unknown): number { + if (typeof value === "number" && Number.isFinite(value)) return value; + if (typeof value === "string" && value.trim() !== "") { + const parsed = Number(value); + if (Number.isFinite(parsed)) return parsed; + } + return 0; +} + +function extractStructuredSessionFields(message: string, fallbackSessionId = ""): { + sessionId: string; + eventType: string; + turnIndex: number; + diaId: string; + speaker: string; + text: string; + turnSummary: string; + sourceDateTime: string; +} { + let parsed: Record | null = null; + try { + const raw = JSON.parse(message); + if (raw && typeof raw === "object") parsed = raw as Record; + } catch { + parsed = null; + } + + if (!parsed) { + return { + sessionId: fallbackSessionId, + eventType: "raw_message", + turnIndex: 0, + diaId: "", + speaker: "", + text: message, + turnSummary: "", + sourceDateTime: "", + }; + } + + const eventType = extractString(parsed["type"]); + const content = extractString(parsed["content"]); + const toolName = extractString(parsed["tool_name"]); + const speaker = extractString(parsed["speaker"]) + || (eventType === "user_message" ? "user" : eventType === "assistant_message" ? "assistant" : ""); + const text = extractString(parsed["text"]) + || content + || (eventType === "tool_call" ? toolName : ""); + + return { + sessionId: extractString(parsed["session_id"]) || fallbackSessionId, + eventType, + turnIndex: extractNumber(parsed["turn_index"]), + diaId: extractString(parsed["dia_id"]), + speaker, + text, + turnSummary: extractString(parsed["summary"]) || extractString(parsed["message_summary"]) || extractString(parsed["msg_summary"]), + sourceDateTime: extractString(parsed["source_date_time"]) || extractString(parsed["date_time"]) || extractString(parsed["date"]), + }; +} + export async function flushSessionQueue(api: SessionQueueApi, opts: FlushSessionQueueOptions): Promise { const queueDir = opts.queueDir ?? DEFAULT_QUEUE_DIR; const maxBatchRows = opts.maxBatchRows ?? DEFAULT_MAX_BATCH_ROWS; diff --git a/src/hooks/session-start.ts b/src/hooks/session-start.ts index f39d263..3b723e1 100644 --- a/src/hooks/session-start.ts +++ b/src/hooks/session-start.ts @@ -151,8 +151,7 @@ export const CLAUDE_SESSION_START_CONTEXT_PSQL = `DEEPLAKE MEMORY SQL MODE: For Available Deeplake tables: - memory(path, summary, project, description, creation_date, last_update_date) -- sessions_text(path, creation_date, message_text) -- sessions(path, message, creation_date) +- sessions(path, creation_date, turn_index, event_type, dia_id, speaker, text, turn_summary, source_date_time, message) Use this command shape: - psql -At -F '|' -c "SELECT ..." @@ -161,11 +160,13 @@ SQL strategy: 1. Start with targeted SELECTs against memory to find likely summaries. 2. In the first pass, combine the named person/entity term with one or more topic terms. Prefer narrow AND filters over broad OR filters. 3. After finding candidate summary rows, re-query memory by exact path to inspect only those summaries. -4. If the answer needs exact wording, exact dates, or transcript grounding, query sessions_text by exact path for those candidate sessions. +4. If the answer needs exact wording, exact dates, or transcript grounding, query sessions by exact path for those candidate sessions. 5. Prefer precise WHERE filters, ORDER BY creation_date/last_update_date, and LIMIT 5-10. 6. Do not use filesystem commands, grep, cat, ls, Read, or Glob for recall in this mode. 7. If the first summary query returns 0-3 weak rows or the answer still seems semantically off, retry with BM25 ranking on memory before concluding the data is absent. -8. Use sessions only when you need the raw structured payload; use sessions_text for normal text filtering. +8. Use sessions.text, sessions.speaker, sessions.turn_index, and sessions.source_date_time for transcript retrieval. Use sessions.message only when you need the raw JSON payload. +9. If a summary answer is vague or relative (for example "home country", "next month", "last week"), immediately open the linked sessions rows and convert it to the most concrete answer supported there. +10. For identity, origin, relationship, and "what did they decide" questions, prefer the exact self-description or named entity from sessions over a paraphrased summary label. Good query patterns: - Candidate summaries: @@ -173,22 +174,24 @@ Good query patterns: - Exact summary reread: psql -At -F '|' -c "SELECT path, summary FROM memory WHERE path IN ('/summaries/...', '/summaries/...')" - Transcript grounding by exact path: - psql -At -F '|' -c "SELECT path, creation_date, message_text FROM sessions_text WHERE path IN ('/sessions/...', '/sessions/...') ORDER BY creation_date ASC" + psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') ORDER BY path ASC, turn_index ASC" - Transcript search inside known sessions: - psql -At -F '|' -c "SELECT path, creation_date, message_text FROM sessions_text WHERE path IN ('/sessions/...', '/sessions/...') AND message_text ILIKE '%%' ORDER BY creation_date ASC" + psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') AND (speaker ILIKE '%%' OR text ILIKE '%%') ORDER BY path ASC, turn_index ASC" - If literal ILIKE retrieval is sparse or semantically weak, retry with BM25 text ranking on summaries: psql -At -F '|' -c "SELECT path, summary, summary <#> ' ' AS score FROM memory WHERE summary ILIKE '%%' ORDER BY score DESC LIMIT 5" Avoid these mistakes: - Do NOT search person names via path ILIKE. Person names live in summary text, not session paths. -- Do NOT filter sessions.message directly. Use sessions_text.message_text for transcript text search. +- Do NOT filter sessions.message directly when sessions.text / sessions.speaker already contain the needed transcript fields. - Do NOT blend multiple different events when the question asks about one specific event. Prefer the most direct supporting row. Answer rules: - Return the smallest exact answer supported by the data. - Resolve relative dates against the session's own creation_date or transcript date metadata, not today's date. -- Do not answer "not found" until you have checked both memory and a likely sessions_text row for the named person. +- Do not answer "not found" until you have checked both memory and a likely sessions row for the named person. - For duration or age-style answers, preserve the stored relative phrase when it directly answers the question instead of over-converting it. +- If the transcript already directly answers with a relative duration like "10 years ago", return that phrase instead of recalculating to today's date. +- If a summary says something vague like "home country", search sessions for the exact named place before answering. - For list or profile questions, aggregate across the small set of candidate sessions before answering. - For "likely", "would", or profile questions, a concise inference from strong summary evidence is allowed even if the exact final phrase is not quoted verbatim. diff --git a/src/hooks/wiki-worker.ts b/src/hooks/wiki-worker.ts index 6e12445..ddc8ec2 100644 --- a/src/hooks/wiki-worker.ts +++ b/src/hooks/wiki-worker.ts @@ -100,7 +100,7 @@ async function main(): Promise { wlog("fetching session events"); const rows = await query( `SELECT message, creation_date FROM "${cfg.sessionsTable}" ` + - `WHERE path LIKE '${esc(`/sessions/%${cfg.sessionId}%`)}' ORDER BY creation_date ASC` + `WHERE path LIKE '${esc(`/sessions/%${cfg.sessionId}%`)}' ORDER BY creation_date ASC, turn_index ASC` ); if (rows.length === 0) { diff --git a/src/shell/deeplake-fs.ts b/src/shell/deeplake-fs.ts index a9e450a..ebac2a0 100644 --- a/src/shell/deeplake-fs.ts +++ b/src/shell/deeplake-fs.ts @@ -333,7 +333,7 @@ export class DeeplakeFs implements IFileSystem { // 3. Session files: concatenate rows from sessions table if (this.sessionPaths.has(p) && this.sessionsTable) { const rows = await this.client.query( - `SELECT message FROM "${this.sessionsTable}" WHERE path = '${esc(p)}' ORDER BY creation_date ASC` + `SELECT message FROM "${this.sessionsTable}" WHERE path = '${esc(p)}' ORDER BY creation_date ASC, turn_index ASC` ); if (rows.length === 0) throw fsErr("ENOENT", "no such file or directory", p); const text = joinSessionMessages(p, rows.map((row) => row["message"])); @@ -385,7 +385,7 @@ export class DeeplakeFs implements IFileSystem { // Session files: concatenate rows from sessions table, ordered by creation_date if (this.sessionPaths.has(p) && this.sessionsTable) { const rows = await this.client.query( - `SELECT message FROM "${this.sessionsTable}" WHERE path = '${esc(p)}' ORDER BY creation_date ASC` + `SELECT message FROM "${this.sessionsTable}" WHERE path = '${esc(p)}' ORDER BY creation_date ASC, turn_index ASC` ); if (rows.length === 0) throw fsErr("ENOENT", "no such file or directory", p); const text = joinSessionMessages(p, rows.map((row) => row["message"])); diff --git a/src/tools/backfill-locomo-memory.ts b/src/tools/backfill-locomo-memory.ts index 0cab6eb..50eb9c1 100644 --- a/src/tools/backfill-locomo-memory.ts +++ b/src/tools/backfill-locomo-memory.ts @@ -15,6 +15,14 @@ const execFileAsync = promisify(execFile); interface SessionRow { path: string; filename: string; + creation_date?: string; + source_date_time?: string; + turn_index?: number; + dia_id?: string; + speaker?: string; + text?: string; + turn_summary?: string; + event_type?: string; message: unknown; } @@ -84,7 +92,7 @@ function parseSessionPayload(raw: unknown): Record { return { raw }; } -function buildSessionTask(row: SessionRow): SessionTask { +function buildSessionTaskFromBlob(row: SessionRow): SessionTask { const sessionId = basename(row.path).replace(/\.[^.]+$/, ""); const summaryFilename = `${sessionId}_summary.md`; const summaryPath = `/summaries/locomo/${summaryFilename}`; @@ -138,6 +146,51 @@ function buildSessionTask(row: SessionRow): SessionTask { }; } +function buildSessionTaskFromRows(rows: SessionRow[]): SessionTask { + if (rows.length === 0) throw new Error("buildSessionTaskFromRows requires at least one row"); + const sorted = [...rows].sort((a, b) => { + const turnA = typeof a.turn_index === "number" ? a.turn_index : Number.MAX_SAFE_INTEGER; + const turnB = typeof b.turn_index === "number" ? b.turn_index : Number.MAX_SAFE_INTEGER; + if (turnA !== turnB) return turnA - turnB; + return (a.creation_date ?? "").localeCompare(b.creation_date ?? ""); + }); + const first = sorted[0]; + const sessionId = basename(first.path).replace(/\.[^.]+$/, ""); + const summaryFilename = `${sessionId}_summary.md`; + const summaryPath = `/summaries/locomo/${summaryFilename}`; + const sessionDateTime = first.source_date_time ?? first.creation_date ?? null; + + const lines = [JSON.stringify({ + type: "session_meta", + session_id: sessionId, + source_path: first.path, + date_time: sessionDateTime, + })]; + + for (const row of sorted) { + if ((row.event_type && row.event_type !== "dialogue_turn") && !row.text) continue; + lines.push(JSON.stringify({ + type: row.event_type || "dialogue_turn", + session_id: sessionId, + date_time: row.source_date_time ?? row.creation_date ?? null, + turn_index: row.turn_index ?? null, + dia_id: row.dia_id ?? null, + speaker: row.speaker ?? null, + text: row.text ?? null, + summary: row.turn_summary ?? null, + })); + } + + return { + sessionId, + sourcePath: first.path, + summaryPath, + summaryFilename, + jsonlContent: `${lines.join("\n")}\n`, + jsonlLines: lines.length, + }; +} + function buildPrompt(task: SessionTask): string { return WIKI_PROMPT_TEMPLATE .replace(/__JSONL__/g, "__TMP_JSONL__") @@ -298,8 +351,9 @@ async function main(): Promise { const claudeBin = findClaudeBin(); const sessionRowsRaw = await api.query( - `SELECT path, filename, message FROM "${opts.sessionsTable}" ` + - `WHERE path LIKE '/sessions/conv_%_session_%.json%' ORDER BY path` + `SELECT path, filename, creation_date, source_date_time, turn_index, dia_id, speaker, text, turn_summary, event_type, message ` + + `FROM "${opts.sessionsTable}" WHERE path LIKE '/sessions/conv_%_session_%.json%' ` + + `ORDER BY path, creation_date, turn_index` ); const sessionRows = sessionRowsRaw .filter((row) => @@ -310,12 +364,32 @@ async function main(): Promise { .map((row) => ({ path: row["path"] as string, filename: row["filename"] as string, + creation_date: typeof row["creation_date"] === "string" ? row["creation_date"] as string : undefined, + source_date_time: typeof row["source_date_time"] === "string" ? row["source_date_time"] as string : undefined, + turn_index: typeof row["turn_index"] === "number" ? row["turn_index"] as number : undefined, + dia_id: typeof row["dia_id"] === "string" ? row["dia_id"] as string : undefined, + speaker: typeof row["speaker"] === "string" ? row["speaker"] as string : undefined, + text: typeof row["text"] === "string" ? row["text"] as string : undefined, + turn_summary: typeof row["turn_summary"] === "string" ? row["turn_summary"] as string : undefined, + event_type: typeof row["event_type"] === "string" ? row["event_type"] as string : undefined, message: row["message"], })) as SessionRow[]; - const allTasks = sessionRows - .filter((row) => typeof row.path === "string" && row.path.includes("/conv_")) - .map(buildSessionTask); + const grouped = new Map(); + for (const row of sessionRows) { + if (!row.path.includes("/conv_")) continue; + const list = grouped.get(row.path) ?? []; + list.push(row); + grouped.set(row.path, list); + } + + const allTasks = [...grouped.values()].map((rows) => { + const blobRow = rows.find((row) => { + const payload = parseSessionPayload(row.message); + return Array.isArray(payload["turns"]) || Array.isArray(payload["dialogue"]); + }); + return blobRow ? buildSessionTaskFromBlob(blobRow) : buildSessionTaskFromRows(rows); + }); let tasks = [...allTasks]; const tasksByPath = new Map(allTasks.map((task) => [task.summaryPath, task])); const expectedPaths = new Set(allTasks.map((task) => task.summaryPath)); diff --git a/src/tools/migrate-locomo-sessions.ts b/src/tools/migrate-locomo-sessions.ts new file mode 100644 index 0000000..8406a13 --- /dev/null +++ b/src/tools/migrate-locomo-sessions.ts @@ -0,0 +1,238 @@ +#!/usr/bin/env node + +import { basename } from "node:path"; +import { loadCredentials } from "../commands/auth.js"; +import { DeeplakeApi } from "../deeplake-api.js"; +import { buildSessionInsertSql, type QueuedSessionRow } from "../hooks/session-queue.js"; + +interface Args { + sessionsTable: string; + backupTable: string; + batchSize: number; + dryRun: boolean; +} + +interface SessionRowRecord extends Record { + id: string; + path: string; + filename: string; + message: unknown; + author: string; + size_bytes: number; + project: string; + description: string; + agent: string; + creation_date: string; + last_update_date: string; +} + +const LOCOMO_PATH_FILTER = `/sessions/conv_%_session_%.json%`; + +function parseArgs(): Args { + const args = process.argv.slice(2); + const opts: Args = { + sessionsTable: "sessions", + backupTable: "sessions_locomo_blob_backup", + batchSize: 100, + dryRun: false, + }; + + for (let i = 0; i < args.length; i++) { + switch (args[i]) { + case "--sessions-table": + opts.sessionsTable = args[++i] ?? opts.sessionsTable; + break; + case "--backup-table": + opts.backupTable = args[++i] ?? opts.backupTable; + break; + case "--batch-size": + opts.batchSize = Math.max(1, Number(args[++i]) || opts.batchSize); + break; + case "--dry-run": + opts.dryRun = true; + break; + } + } + return opts; +} + +function parseJson(value: unknown): Record | null { + if (typeof value === "string") { + try { return JSON.parse(value) as Record; } catch { return null; } + } + return value && typeof value === "object" ? value as Record : null; +} + +function extractString(value: unknown): string { + return typeof value === "string" ? value : value == null ? "" : String(value); +} + +function extractNumber(value: unknown): number { + if (typeof value === "number" && Number.isFinite(value)) return value; + if (typeof value === "string" && value.trim() !== "") { + const parsed = Number(value); + if (Number.isFinite(parsed)) return parsed; + } + return 0; +} + +function isTranscriptBlob(row: SessionRowRecord): boolean { + const parsed = parseJson(row.message); + return !!parsed && (Array.isArray(parsed["turns"]) || Array.isArray(parsed["dialogue"])); +} + +function normalizeMessageJson(value: unknown): string { + try { + return JSON.stringify(typeof value === "string" ? JSON.parse(value) : value); + } catch { + return JSON.stringify({ type: "raw_message", content: String(value ?? "") }); + } +} + +function toQueuedRowFromExisting(row: SessionRowRecord): QueuedSessionRow { + const message = normalizeMessageJson(row.message); + return { + id: extractString(row.id), + path: extractString(row.path), + filename: extractString(row.filename), + message, + sessionId: extractString(parseJson(row.message)?.["session_id"]) || basename(extractString(row.path)).replace(/\.[^.]+$/, ""), + eventType: extractString(parseJson(row.message)?.["type"]), + turnIndex: extractNumber(parseJson(row.message)?.["turn_index"]), + diaId: extractString(parseJson(row.message)?.["dia_id"]), + speaker: extractString(parseJson(row.message)?.["speaker"]), + text: extractString(parseJson(row.message)?.["text"]) || extractString(parseJson(row.message)?.["content"]), + turnSummary: extractString(parseJson(row.message)?.["summary"]) || extractString(parseJson(row.message)?.["message_summary"]), + sourceDateTime: extractString(parseJson(row.message)?.["date_time"]) || extractString(parseJson(row.message)?.["source_date_time"]), + author: extractString(row.author), + sizeBytes: extractNumber(row.size_bytes), + project: extractString(row.project), + description: extractString(row.description), + agent: extractString(row.agent), + creationDate: extractString(row.creation_date), + lastUpdateDate: extractString(row.last_update_date), + }; +} + +function explodeTranscriptRow(row: SessionRowRecord): QueuedSessionRow[] { + const parsed = parseJson(row.message); + if (!parsed) return []; + const turns = Array.isArray(parsed["turns"]) + ? parsed["turns"] as Array> + : Array.isArray(parsed["dialogue"]) + ? parsed["dialogue"] as Array> + : []; + const sessionId = basename(extractString(row.path)).replace(/\.[^.]+$/, ""); + const sourceDateTime = extractString(parsed["date_time"]) || extractString(parsed["date"]) || extractString(row.creation_date); + + return turns.map((turn, index) => { + const messageObject = { + type: "dialogue_turn", + session_id: sessionId, + source_path: extractString(row.path), + conversation_id: parsed["conversation_id"] ?? null, + session_number: parsed["session_number"] ?? null, + date_time: sourceDateTime || null, + turn_index: index + 1, + dia_id: turn["dia_id"] ?? null, + speaker: turn["speaker"] ?? turn["name"] ?? null, + text: turn["text"] ?? turn["content"] ?? null, + summary: turn["summary"] ?? turn["message_summary"] ?? null, + }; + const message = JSON.stringify(messageObject); + return { + id: crypto.randomUUID(), + path: extractString(row.path), + filename: extractString(row.filename), + message, + sessionId, + eventType: "dialogue_turn", + turnIndex: index + 1, + diaId: extractString(turn["dia_id"]), + speaker: extractString(turn["speaker"]) || extractString(turn["name"]), + text: extractString(turn["text"]) || extractString(turn["content"]), + turnSummary: extractString(turn["summary"]) || extractString(turn["message_summary"]), + sourceDateTime, + author: extractString(row.author) || "locomo", + sizeBytes: Buffer.byteLength(message, "utf-8"), + project: extractString(row.project) || "locomo", + description: "dialogue_turn", + agent: extractString(row.agent) || "claude_code", + creationDate: extractString(row.creation_date) || sourceDateTime, + lastUpdateDate: extractString(row.last_update_date) || extractString(row.creation_date) || sourceDateTime, + }; + }); +} + +async function insertRows(api: DeeplakeApi, table: string, rows: QueuedSessionRow[], batchSize: number): Promise { + for (let i = 0; i < rows.length; i += batchSize) { + await api.query(buildSessionInsertSql(table, rows.slice(i, i + batchSize))); + } +} + +async function main(): Promise { + const opts = parseArgs(); + const creds = loadCredentials(); + if (!creds?.token) { + throw new Error("No Deeplake credentials found. Run hivemind login first."); + } + + const api = new DeeplakeApi( + creds.token, + creds.apiUrl ?? "https://api.deeplake.ai", + creds.orgId, + creds.workspaceId ?? "default", + opts.sessionsTable, + ); + + await api.ensureSessionsTable(opts.sessionsTable); + await api.ensureSessionsTable(opts.backupTable); + + const backupRows = await api.query( + `SELECT id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date ` + + `FROM "${opts.backupTable}" WHERE path LIKE '${LOCOMO_PATH_FILTER}' ORDER BY path, creation_date` + ) as SessionRowRecord[]; + + let sourceRows = backupRows; + if (sourceRows.length === 0) { + sourceRows = await api.query( + `SELECT id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date ` + + `FROM "${opts.sessionsTable}" WHERE path LIKE '${LOCOMO_PATH_FILTER}' ORDER BY path, creation_date` + ) as SessionRowRecord[]; + + if (sourceRows.length === 0) { + console.log("No LOCOMO session rows found to migrate."); + return; + } + + if (!opts.dryRun) { + console.log(`Backing up ${sourceRows.length} original LOCOMO session rows to "${opts.backupTable}"...`); + await insertRows(api, opts.backupTable, sourceRows.map(toQueuedRowFromExisting), opts.batchSize); + } + } + + const transcriptBlobRows = sourceRows.filter(isTranscriptBlob); + const migratedRows = transcriptBlobRows.flatMap(explodeTranscriptRow); + + console.log(`Workspace: ${creds.workspaceId ?? "default"} | Sessions table: ${opts.sessionsTable}`); + console.log(`Original LOCOMO blob rows: ${transcriptBlobRows.length}`); + console.log(`Expanded turn rows: ${migratedRows.length}`); + + if (opts.dryRun) return; + + console.log(`Deleting existing LOCOMO rows from "${opts.sessionsTable}"...`); + await api.query(`DELETE FROM "${opts.sessionsTable}" WHERE path LIKE '${LOCOMO_PATH_FILTER}'`); + + console.log(`Inserting ${migratedRows.length} migrated turn rows into "${opts.sessionsTable}"...`); + await insertRows(api, opts.sessionsTable, migratedRows, opts.batchSize); + + const finalRows = await api.query( + `SELECT path, COUNT(*) AS row_count FROM "${opts.sessionsTable}" WHERE path LIKE '${LOCOMO_PATH_FILTER}' GROUP BY path ORDER BY path` + ); + console.log(`Done. migrated_paths=${finalRows.length}`); +} + +main().catch((error) => { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +}); From 5f45b04f2cfd1bd0834ba923a8fa7fc74be1e049 Mon Sep 17 00:00:00 2001 From: davitbun Date: Mon, 20 Apr 2026 21:33:53 -0700 Subject: [PATCH 4/7] embedding and graph experiments --- .gitignore | 3 + claude-code/bundle/capture.js | 283 +++- claude-code/bundle/commands/auth-login.js | 220 ++- claude-code/bundle/pre-tool-use.js | 401 +++++- claude-code/bundle/session-end.js | 283 +++- claude-code/bundle/session-start-setup.js | 225 ++- claude-code/bundle/session-start.js | 42 +- claude-code/bundle/shell/deeplake-shell.js | 220 ++- claude-code/bundle/wiki-worker.js | 596 ++++++++ .../tests/bash-command-compiler.test.ts | 25 +- claude-code/tests/deeplake-api.test.ts | 104 ++ claude-code/tests/embedding-text.test.ts | 61 + claude-code/tests/hooks-source.test.ts | 40 + claude-code/tests/knowledge-graph.test.ts | 67 + claude-code/tests/memory-facts.test.ts | 76 + codex/bundle/capture.js | 68 +- codex/bundle/commands/auth-login.js | 220 ++- codex/bundle/pre-tool-use.js | 401 +++++- codex/bundle/session-start-setup.js | 225 ++- codex/bundle/session-start.js | 44 +- codex/bundle/shell/deeplake-shell.js | 220 ++- codex/bundle/stop.js | 283 +++- codex/bundle/wiki-worker.js | 588 ++++++++ package-lock.json | 459 +++++- package.json | 5 + pyproject.toml | 14 + ...ackfill_harrier_embeddings.cpython-312.pyc | Bin 0 -> 60971 bytes ...ackfill_harrier_embeddings.cpython-314.pyc | Bin 0 -> 72656 bytes scripts/backfill_harrier_embeddings.py | 1256 +++++++++++++++++ scripts/requirements-harrier-embeddings.txt | 4 + src/config.ts | 10 + src/deeplake-api.ts | 228 ++- src/embeddings/harrier.ts | 166 +++ src/embeddings/text.ts | 138 ++ src/hooks/bash-command-compiler.ts | 350 ++++- src/hooks/codex/pre-tool-use.ts | 4 +- src/hooks/codex/session-start-setup.ts | 5 + src/hooks/codex/session-start.ts | 44 +- src/hooks/codex/spawn-wiki-worker.ts | 9 + src/hooks/codex/wiki-worker.ts | 86 ++ src/hooks/knowledge-graph.ts | 285 ++++ src/hooks/memory-facts.ts | 504 +++++++ src/hooks/pre-tool-use.ts | 4 +- src/hooks/session-start-setup.ts | 5 + src/hooks/session-start.ts | 42 +- src/hooks/spawn-wiki-worker.ts | 9 + src/hooks/wiki-worker.ts | 88 ++ src/tools/backfill-harrier-embeddings.ts | 433 ++++++ src/tools/backfill-locomo-facts.ts | 268 ++++ src/tools/backfill-locomo-graph.ts | 525 +++++++ uv.lock | 1004 +++++++++++++ vitest.config.ts | 6 + 52 files changed, 10493 insertions(+), 153 deletions(-) create mode 100644 claude-code/tests/embedding-text.test.ts create mode 100644 claude-code/tests/knowledge-graph.test.ts create mode 100644 claude-code/tests/memory-facts.test.ts create mode 100644 pyproject.toml create mode 100644 scripts/__pycache__/backfill_harrier_embeddings.cpython-312.pyc create mode 100644 scripts/__pycache__/backfill_harrier_embeddings.cpython-314.pyc create mode 100644 scripts/backfill_harrier_embeddings.py create mode 100644 scripts/requirements-harrier-embeddings.txt create mode 100644 src/embeddings/harrier.ts create mode 100644 src/embeddings/text.ts create mode 100644 src/hooks/knowledge-graph.ts create mode 100644 src/hooks/memory-facts.ts create mode 100644 src/tools/backfill-harrier-embeddings.ts create mode 100644 src/tools/backfill-locomo-facts.ts create mode 100644 src/tools/backfill-locomo-graph.ts create mode 100644 uv.lock diff --git a/.gitignore b/.gitignore index 4f538ba..930a1ba 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,9 @@ tmp/ .env.* coverage/ bench/ +.uv-cache/ +.venv/ +.venv-harrier/ .claude/ CLAUDE.md RESULTS-fast-path-all-commands.md diff --git a/claude-code/bundle/capture.js b/claude-code/bundle/capture.js index 2eb5206..a62bd70 100755 --- a/claude-code/bundle/capture.js +++ b/claude-code/bundle/capture.js @@ -49,6 +49,11 @@ function loadConfig() { apiUrl: env.HIVEMIND_API_URL ?? env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", tableName: env.HIVEMIND_TABLE ?? env.DEEPLAKE_TABLE ?? "memory", sessionsTableName: env.HIVEMIND_SESSIONS_TABLE ?? env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + graphNodesTableName: env.HIVEMIND_GRAPH_NODES_TABLE ?? env.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", + graphEdgesTableName: env.HIVEMIND_GRAPH_EDGES_TABLE ?? env.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", + factsTableName: env.HIVEMIND_FACTS_TABLE ?? env.DEEPLAKE_FACTS_TABLE ?? "memory_facts", + entitiesTableName: env.HIVEMIND_ENTITIES_TABLE ?? env.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", + factEntityLinksTableName: env.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", memoryPath: env.HIVEMIND_MEMORY_PATH ?? env.DEEPLAKE_MEMORY_PATH ?? join(home, ".deeplake", "memory") }; } @@ -102,6 +107,22 @@ function traceSql(msg) { if (DEBUG_FILE_LOG) log2(msg); } +var DeeplakeQueryError = class extends Error { + sqlSummary; + status; + responseBody; + sql; + cause; + constructor(message, args = {}) { + super(message); + this.name = "DeeplakeQueryError"; + this.sql = args.sql; + this.sqlSummary = args.sql ? summarizeSql(args.sql) : ""; + this.status = args.status; + this.responseBody = args.responseBody; + this.cause = args.cause; + } +}; var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; @@ -205,10 +226,10 @@ var DeeplakeApi = class { }); } catch (e) { if (isTimeoutError(e)) { - lastError = new Error(`Query timeout after ${QUERY_TIMEOUT_MS}ms`); + lastError = new DeeplakeQueryError(`Query timeout after ${QUERY_TIMEOUT_MS}ms`, { sql, cause: e }); throw lastError; } - lastError = e instanceof Error ? e : new Error(String(e)); + lastError = e instanceof Error ? new DeeplakeQueryError(e.message, { sql, cause: e }) : new DeeplakeQueryError(String(e), { sql, cause: e }); if (attempt < MAX_RETRIES) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); @@ -231,9 +252,13 @@ var DeeplakeApi = class { await sleep(delay); continue; } - throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); + throw new DeeplakeQueryError(`Query failed: ${resp.status}: ${text.slice(0, 200)}`, { + sql, + status: resp.status, + responseBody: text.slice(0, 4e3) + }); } - throw lastError ?? new Error("Query failed: max retries exceeded"); + throw lastError ?? new DeeplakeQueryError("Query failed: max retries exceeded", { sql }); } // ── Writes ────────────────────────────────────────────────────────────────── /** Queue rows for writing. Call commit() to flush. */ @@ -460,6 +485,193 @@ var DeeplakeApi = class { } await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); } + async ensureGraphNodesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `node_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `node_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "node_id", `("node_id")`); + } + async ensureGraphEdgesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `edge_id TEXT NOT NULL DEFAULT ''`, + `source_node_id TEXT NOT NULL DEFAULT ''`, + `target_node_id TEXT NOT NULL DEFAULT ''`, + `relation TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "source_target_relation", `("source_node_id", "target_node_id", "relation")`); + } + async ensureFactsTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `subject_entity_id TEXT NOT NULL DEFAULT ''`, + `subject_name TEXT NOT NULL DEFAULT ''`, + `subject_type TEXT NOT NULL DEFAULT ''`, + `predicate TEXT NOT NULL DEFAULT ''`, + `object_entity_id TEXT NOT NULL DEFAULT ''`, + `object_name TEXT NOT NULL DEFAULT ''`, + `object_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `confidence TEXT NOT NULL DEFAULT ''`, + `valid_at TEXT NOT NULL DEFAULT ''`, + `valid_from TEXT NOT NULL DEFAULT ''`, + `valid_to TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "session_predicate", `("source_session_id", "predicate")`); + await this.ensureLookupIndex(name, "subject_object", `("subject_entity_id", "object_entity_id")`); + } + async ensureEntitiesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `entity_type TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "canonical_name", `("canonical_name")`); + } + async ensureFactEntityLinksTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `link_id TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `entity_role TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "session_entity_role", `("source_session_id", "entity_id", "entity_role")`); + } }; // dist/src/utils/direct-run.js @@ -601,6 +813,60 @@ import { fileURLToPath as fileURLToPath2 } from "node:url"; import { dirname, join as join5 } from "node:path"; import { writeFileSync as writeFileSync3, mkdirSync as mkdirSync3, appendFileSync as appendFileSync2 } from "node:fs"; import { homedir as homedir4, tmpdir as tmpdir2 } from "node:os"; + +// dist/src/hooks/knowledge-graph.js +import { randomUUID as randomUUID3 } from "node:crypto"; + +// dist/src/hooks/upload-summary.js +import { randomUUID as randomUUID2 } from "node:crypto"; + +// dist/src/hooks/knowledge-graph.js +var GRAPH_PROMPT_TEMPLATE = `You are extracting a compact knowledge graph delta from a session summary. + +SESSION ID: __SESSION_ID__ +SOURCE PATH: __SOURCE_PATH__ +PROJECT: __PROJECT__ + +SUMMARY MARKDOWN: +__SUMMARY_TEXT__ + +Return ONLY valid JSON with this exact shape: +{"nodes":[{"name":"canonical entity name","type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","summary":"short factual description","aliases":["optional alias"]}],"edges":[{"source":"canonical source entity","target":"canonical target entity","relation":"snake_case_relation","summary":"short factual relation summary","evidence":"short supporting phrase"}]} + +Rules: +- Use canonical names for repeated entities. +- Include people, places, organizations, books/media, tools, files, goals, status labels, preferences, and notable events when they matter for future recall. +- Convert relationship/status/origin/preferences into edges when possible. Example relation shapes: home_country, relationship_status, enjoys, decided_to_pursue, works_on, uses_tool, located_in, recommended, plans, supports. +- Keep summaries short and factual. Do not invent facts beyond the summary. +- If a source or target appears in an edge but not in nodes, also include it in nodes. +- Prefer stable canonical names over pronouns. +- Return no markdown, no prose, no code fences, only JSON.`; + +// dist/src/hooks/memory-facts.js +import { randomUUID as randomUUID4 } from "node:crypto"; +var MEMORY_FACT_PROMPT_TEMPLATE = `You are extracting durable long-term memory facts from a session summary. + +SESSION ID: __SESSION_ID__ +SOURCE PATH: __SOURCE_PATH__ +PROJECT: __PROJECT__ + +SUMMARY MARKDOWN: +__SUMMARY_TEXT__ + +Return ONLY valid JSON with this exact shape: +{"facts":[{"subject":"canonical entity","subject_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","subject_aliases":["optional alias"],"predicate":"snake_case_relation","object":"canonical object text","object_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","object_aliases":["optional alias"],"summary":"short factual claim","evidence":"short supporting phrase","confidence":0.0,"valid_at":"optional date/time text","valid_from":"optional date/time text","valid_to":"optional date/time text"}]} + +Rules: +- Extract atomic facts that are useful for later recall. One durable claim per fact. +- Prefer canonical names for repeated people, organizations, places, projects, tools, and artifacts. +- Use relation-style predicates such as works_on, home_country, relationship_status, prefers, plans, decided_to_pursue, located_in, uses_tool, recommended, supports, owns, read, attends, moved_from, moved_to. +- Facts should preserve temporal history instead of overwriting it. If the summary says something changed, emit the new fact and include timing in valid_at / valid_from / valid_to when the summary supports it. +- Include assistant-confirmed or tool-confirmed actions when they are stated as completed facts in the summary. +- Do not invent facts that are not supported by the summary. +- Avoid duplicates or near-duplicates. If two facts say the same thing, keep the more specific one. +- Return no markdown, no prose, no code fences, only JSON.`; + +// dist/src/hooks/spawn-wiki-worker.js var HOME = homedir4(); var WIKI_LOG = join5(HOME, ".claude", "hooks", "deeplake-wiki.log"); var WIKI_PROMPT_TEMPLATE = `You are maintaining a persistent wiki from a session transcript. This page will become part of a long-lived knowledge base that future agents will search through index.md before opening the source session. Write for retrieval, not storytelling. @@ -695,6 +961,11 @@ function spawnWikiWorker(opts) { workspaceId: config.workspaceId, memoryTable: config.tableName, sessionsTable: config.sessionsTableName, + graphNodesTable: config.graphNodesTableName, + graphEdgesTable: config.graphEdgesTableName, + factsTable: config.factsTableName, + entitiesTable: config.entitiesTableName, + factEntityLinksTable: config.factEntityLinksTableName, sessionId, userName: config.userName, project: projectName, @@ -702,7 +973,9 @@ function spawnWikiWorker(opts) { claudeBin: findClaudeBin(), wikiLog: WIKI_LOG, hooksDir: join5(HOME, ".claude", "hooks"), - promptTemplate: WIKI_PROMPT_TEMPLATE + promptTemplate: WIKI_PROMPT_TEMPLATE, + graphPromptTemplate: GRAPH_PROMPT_TEMPLATE, + factPromptTemplate: MEMORY_FACT_PROMPT_TEMPLATE })); wikiLog(`${reason}: spawning summary worker for ${sessionId}`); const workerPath = join5(bundleDir, "wiki-worker.js"); diff --git a/claude-code/bundle/commands/auth-login.js b/claude-code/bundle/commands/auth-login.js index e36fc73..0cc5d27 100755 --- a/claude-code/bundle/commands/auth-login.js +++ b/claude-code/bundle/commands/auth-login.js @@ -233,6 +233,11 @@ function loadConfig() { apiUrl: env.HIVEMIND_API_URL ?? env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", tableName: env.HIVEMIND_TABLE ?? env.DEEPLAKE_TABLE ?? "memory", sessionsTableName: env.HIVEMIND_SESSIONS_TABLE ?? env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + graphNodesTableName: env.HIVEMIND_GRAPH_NODES_TABLE ?? env.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", + graphEdgesTableName: env.HIVEMIND_GRAPH_EDGES_TABLE ?? env.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", + factsTableName: env.HIVEMIND_FACTS_TABLE ?? env.DEEPLAKE_FACTS_TABLE ?? "memory_facts", + entitiesTableName: env.HIVEMIND_ENTITIES_TABLE ?? env.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", + factEntityLinksTableName: env.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", memoryPath: env.HIVEMIND_MEMORY_PATH ?? env.DEEPLAKE_MEMORY_PATH ?? join2(home, ".deeplake", "memory") }; } @@ -277,6 +282,22 @@ function traceSql(msg) { if (DEBUG_FILE_LOG) log2(msg); } +var DeeplakeQueryError = class extends Error { + sqlSummary; + status; + responseBody; + sql; + cause; + constructor(message, args = {}) { + super(message); + this.name = "DeeplakeQueryError"; + this.sql = args.sql; + this.sqlSummary = args.sql ? summarizeSql(args.sql) : ""; + this.status = args.status; + this.responseBody = args.responseBody; + this.cause = args.cause; + } +}; var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; @@ -380,10 +401,10 @@ var DeeplakeApi = class { }); } catch (e) { if (isTimeoutError(e)) { - lastError = new Error(`Query timeout after ${QUERY_TIMEOUT_MS}ms`); + lastError = new DeeplakeQueryError(`Query timeout after ${QUERY_TIMEOUT_MS}ms`, { sql, cause: e }); throw lastError; } - lastError = e instanceof Error ? e : new Error(String(e)); + lastError = e instanceof Error ? new DeeplakeQueryError(e.message, { sql, cause: e }) : new DeeplakeQueryError(String(e), { sql, cause: e }); if (attempt < MAX_RETRIES) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); @@ -406,9 +427,13 @@ var DeeplakeApi = class { await sleep(delay); continue; } - throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); + throw new DeeplakeQueryError(`Query failed: ${resp.status}: ${text.slice(0, 200)}`, { + sql, + status: resp.status, + responseBody: text.slice(0, 4e3) + }); } - throw lastError ?? new Error("Query failed: max retries exceeded"); + throw lastError ?? new DeeplakeQueryError("Query failed: max retries exceeded", { sql }); } // ── Writes ────────────────────────────────────────────────────────────────── /** Queue rows for writing. Call commit() to flush. */ @@ -635,6 +660,193 @@ var DeeplakeApi = class { } await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); } + async ensureGraphNodesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `node_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `node_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "node_id", `("node_id")`); + } + async ensureGraphEdgesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `edge_id TEXT NOT NULL DEFAULT ''`, + `source_node_id TEXT NOT NULL DEFAULT ''`, + `target_node_id TEXT NOT NULL DEFAULT ''`, + `relation TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "source_target_relation", `("source_node_id", "target_node_id", "relation")`); + } + async ensureFactsTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `subject_entity_id TEXT NOT NULL DEFAULT ''`, + `subject_name TEXT NOT NULL DEFAULT ''`, + `subject_type TEXT NOT NULL DEFAULT ''`, + `predicate TEXT NOT NULL DEFAULT ''`, + `object_entity_id TEXT NOT NULL DEFAULT ''`, + `object_name TEXT NOT NULL DEFAULT ''`, + `object_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `confidence TEXT NOT NULL DEFAULT ''`, + `valid_at TEXT NOT NULL DEFAULT ''`, + `valid_from TEXT NOT NULL DEFAULT ''`, + `valid_to TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "session_predicate", `("source_session_id", "predicate")`); + await this.ensureLookupIndex(name, "subject_object", `("subject_entity_id", "object_entity_id")`); + } + async ensureEntitiesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `entity_type TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "canonical_name", `("canonical_name")`); + } + async ensureFactEntityLinksTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `link_id TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `entity_role TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "session_entity_role", `("source_session_id", "entity_id", "entity_role")`); + } }; // dist/src/commands/session-prune.js diff --git a/claude-code/bundle/pre-tool-use.js b/claude-code/bundle/pre-tool-use.js index 5f5340d..3891171 100755 --- a/claude-code/bundle/pre-tool-use.js +++ b/claude-code/bundle/pre-tool-use.js @@ -54,6 +54,11 @@ function loadConfig() { apiUrl: env.HIVEMIND_API_URL ?? env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", tableName: env.HIVEMIND_TABLE ?? env.DEEPLAKE_TABLE ?? "memory", sessionsTableName: env.HIVEMIND_SESSIONS_TABLE ?? env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + graphNodesTableName: env.HIVEMIND_GRAPH_NODES_TABLE ?? env.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", + graphEdgesTableName: env.HIVEMIND_GRAPH_EDGES_TABLE ?? env.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", + factsTableName: env.HIVEMIND_FACTS_TABLE ?? env.DEEPLAKE_FACTS_TABLE ?? "memory_facts", + entitiesTableName: env.HIVEMIND_ENTITIES_TABLE ?? env.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", + factEntityLinksTableName: env.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", memoryPath: env.HIVEMIND_MEMORY_PATH ?? env.DEEPLAKE_MEMORY_PATH ?? join(home, ".deeplake", "memory") }; } @@ -101,6 +106,22 @@ function traceSql(msg) { if (DEBUG_FILE_LOG) log2(msg); } +var DeeplakeQueryError = class extends Error { + sqlSummary; + status; + responseBody; + sql; + cause; + constructor(message, args = {}) { + super(message); + this.name = "DeeplakeQueryError"; + this.sql = args.sql; + this.sqlSummary = args.sql ? summarizeSql(args.sql) : ""; + this.status = args.status; + this.responseBody = args.responseBody; + this.cause = args.cause; + } +}; var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; @@ -204,10 +225,10 @@ var DeeplakeApi = class { }); } catch (e) { if (isTimeoutError(e)) { - lastError = new Error(`Query timeout after ${QUERY_TIMEOUT_MS}ms`); + lastError = new DeeplakeQueryError(`Query timeout after ${QUERY_TIMEOUT_MS}ms`, { sql, cause: e }); throw lastError; } - lastError = e instanceof Error ? e : new Error(String(e)); + lastError = e instanceof Error ? new DeeplakeQueryError(e.message, { sql, cause: e }) : new DeeplakeQueryError(String(e), { sql, cause: e }); if (attempt < MAX_RETRIES) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); @@ -230,9 +251,13 @@ var DeeplakeApi = class { await sleep(delay); continue; } - throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); + throw new DeeplakeQueryError(`Query failed: ${resp.status}: ${text.slice(0, 200)}`, { + sql, + status: resp.status, + responseBody: text.slice(0, 4e3) + }); } - throw lastError ?? new Error("Query failed: max retries exceeded"); + throw lastError ?? new DeeplakeQueryError("Query failed: max retries exceeded", { sql }); } // ── Writes ────────────────────────────────────────────────────────────────── /** Queue rows for writing. Call commit() to flush. */ @@ -459,6 +484,193 @@ var DeeplakeApi = class { } await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); } + async ensureGraphNodesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `node_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `node_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "node_id", `("node_id")`); + } + async ensureGraphEdgesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `edge_id TEXT NOT NULL DEFAULT ''`, + `source_node_id TEXT NOT NULL DEFAULT ''`, + `target_node_id TEXT NOT NULL DEFAULT ''`, + `relation TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "source_target_relation", `("source_node_id", "target_node_id", "relation")`); + } + async ensureFactsTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `subject_entity_id TEXT NOT NULL DEFAULT ''`, + `subject_name TEXT NOT NULL DEFAULT ''`, + `subject_type TEXT NOT NULL DEFAULT ''`, + `predicate TEXT NOT NULL DEFAULT ''`, + `object_entity_id TEXT NOT NULL DEFAULT ''`, + `object_name TEXT NOT NULL DEFAULT ''`, + `object_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `confidence TEXT NOT NULL DEFAULT ''`, + `valid_at TEXT NOT NULL DEFAULT ''`, + `valid_from TEXT NOT NULL DEFAULT ''`, + `valid_to TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "session_predicate", `("source_session_id", "predicate")`); + await this.ensureLookupIndex(name, "subject_object", `("subject_entity_id", "object_entity_id")`); + } + async ensureEntitiesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `entity_type TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "canonical_name", `("canonical_name")`); + } + async ensureFactEntityLinksTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `link_id TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `entity_role TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "session_entity_role", `("source_session_id", "entity_id", "entity_role")`); + } }; // dist/src/utils/direct-run.js @@ -1870,6 +2082,22 @@ function extractPsqlQueryFromCommand(cmd) { function normalizeSqlRef(ref) { return ref.replace(/\s+/g, "").replace(/"/g, "").toLowerCase(); } +var INTERCEPTED_SQL_REFS = /* @__PURE__ */ new Set([ + "memory", + "sessions", + "graph_nodes", + "graph_edges", + "memory_facts", + "memory_entities", + "fact_entity_links", + "hivemind.memory", + "hivemind.sessions", + "hivemind.graph_nodes", + "hivemind.graph_edges", + "hivemind.memory_facts", + "hivemind.memory_entities", + "hivemind.fact_entity_links" +]); function extractSqlTableRefs(query) { const refs = []; const regex = /\b(?:from|join)\s+((?:"[^"]+"|[a-zA-Z_][a-zA-Z0-9_]*)(?:\s*\.\s*(?:"[^"]+"|[a-zA-Z_][a-zA-Z0-9_]*))?)/gi; @@ -1880,11 +2108,11 @@ function extractSqlTableRefs(query) { return refs; } function queryReferencesInterceptedTables(query) { - return extractSqlTableRefs(query).some((ref) => ref === "memory" || ref === "sessions" || ref === "hivemind.memory" || ref === "hivemind.sessions"); + return extractSqlTableRefs(query).some((ref) => INTERCEPTED_SQL_REFS.has(ref)); } function queryUsesOnlyInterceptedTables(query) { const refs = extractSqlTableRefs(query); - return refs.length > 0 && refs.every((ref) => ref === "memory" || ref === "sessions" || ref === "hivemind.memory" || ref === "hivemind.sessions"); + return refs.length > 0 && refs.every((ref) => INTERCEPTED_SQL_REFS.has(ref)); } function parsePsqlSegment(pipeline, tokens) { if (tokens[0] !== "psql" || !isPsqlMode()) @@ -1930,24 +2158,32 @@ function parsePsqlSegment(pipeline, tokens) { } return { kind: "psql", query, lineLimit, tuplesOnly, fieldSeparator }; } -function normalizePsqlQuery(query, memoryTable, sessionsTable) { +function normalizePsqlQuery(query, memoryTable, sessionsTable, graphNodesTable = process.env["HIVEMIND_GRAPH_NODES_TABLE"] ?? "graph_nodes", graphEdgesTable = process.env["HIVEMIND_GRAPH_EDGES_TABLE"] ?? "graph_edges", factsTable = process.env["HIVEMIND_FACTS_TABLE"] ?? "memory_facts", entitiesTable = process.env["HIVEMIND_ENTITIES_TABLE"] ?? "memory_entities", factEntityLinksTable = process.env["HIVEMIND_FACT_ENTITY_LINKS_TABLE"] ?? "fact_entity_links") { let sql = query.trim().replace(/;+\s*$/, ""); - sql = sql.replace(/\bFROM\s+"?memory"?\b/gi, `FROM "${memoryTable}"`).replace(/\bJOIN\s+"?memory"?\b/gi, `JOIN "${memoryTable}"`).replace(/\bFROM\s+"?sessions"?\b/gi, `FROM "${sessionsTable}"`).replace(/\bJOIN\s+"?sessions"?\b/gi, `JOIN "${sessionsTable}"`).replace(/\bFROM\s+"?hivemind"?\."?memory"?\b/gi, `FROM "${memoryTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?memory"?\b/gi, `JOIN "${memoryTable}"`).replace(/\bFROM\s+"?hivemind"?\."?sessions"?\b/gi, `FROM "${sessionsTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?sessions"?\b/gi, `JOIN "${sessionsTable}"`); + sql = sql.replace(/\bFROM\s+"?memory"?\b/gi, `FROM "${memoryTable}"`).replace(/\bJOIN\s+"?memory"?\b/gi, `JOIN "${memoryTable}"`).replace(/\bFROM\s+"?sessions"?\b/gi, `FROM "${sessionsTable}"`).replace(/\bJOIN\s+"?sessions"?\b/gi, `JOIN "${sessionsTable}"`).replace(/\bFROM\s+"?graph_nodes"?\b/gi, `FROM "${graphNodesTable}"`).replace(/\bJOIN\s+"?graph_nodes"?\b/gi, `JOIN "${graphNodesTable}"`).replace(/\bFROM\s+"?graph_edges"?\b/gi, `FROM "${graphEdgesTable}"`).replace(/\bJOIN\s+"?graph_edges"?\b/gi, `JOIN "${graphEdgesTable}"`).replace(/\bFROM\s+"?memory_facts"?\b/gi, `FROM "${factsTable}"`).replace(/\bJOIN\s+"?memory_facts"?\b/gi, `JOIN "${factsTable}"`).replace(/\bFROM\s+"?memory_entities"?\b/gi, `FROM "${entitiesTable}"`).replace(/\bJOIN\s+"?memory_entities"?\b/gi, `JOIN "${entitiesTable}"`).replace(/\bFROM\s+"?fact_entity_links"?\b/gi, `FROM "${factEntityLinksTable}"`).replace(/\bJOIN\s+"?fact_entity_links"?\b/gi, `JOIN "${factEntityLinksTable}"`).replace(/\bFROM\s+"?hivemind"?\."?memory"?\b/gi, `FROM "${memoryTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?memory"?\b/gi, `JOIN "${memoryTable}"`).replace(/\bFROM\s+"?hivemind"?\."?sessions"?\b/gi, `FROM "${sessionsTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?sessions"?\b/gi, `JOIN "${sessionsTable}"`).replace(/\bFROM\s+"?hivemind"?\."?graph_nodes"?\b/gi, `FROM "${graphNodesTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?graph_nodes"?\b/gi, `JOIN "${graphNodesTable}"`).replace(/\bFROM\s+"?hivemind"?\."?graph_edges"?\b/gi, `FROM "${graphEdgesTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?graph_edges"?\b/gi, `JOIN "${graphEdgesTable}"`).replace(/\bFROM\s+"?hivemind"?\."?memory_facts"?\b/gi, `FROM "${factsTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?memory_facts"?\b/gi, `JOIN "${factsTable}"`).replace(/\bFROM\s+"?hivemind"?\."?memory_entities"?\b/gi, `FROM "${entitiesTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?memory_entities"?\b/gi, `JOIN "${entitiesTable}"`).replace(/\bFROM\s+"?hivemind"?\."?fact_entity_links"?\b/gi, `FROM "${factEntityLinksTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?fact_entity_links"?\b/gi, `JOIN "${factEntityLinksTable}"`); return sql; } -function validatePsqlQuery(query, memoryTable, sessionsTable) { +function validatePsqlQuery(query, memoryTable, sessionsTable, graphNodesTable = process.env["HIVEMIND_GRAPH_NODES_TABLE"] ?? "graph_nodes", graphEdgesTable = process.env["HIVEMIND_GRAPH_EDGES_TABLE"] ?? "graph_edges", factsTable = process.env["HIVEMIND_FACTS_TABLE"] ?? "memory_facts", entitiesTable = process.env["HIVEMIND_ENTITIES_TABLE"] ?? "memory_entities", factEntityLinksTable = process.env["HIVEMIND_FACT_ENTITY_LINKS_TABLE"] ?? "fact_entity_links") { if (!queryUsesOnlyInterceptedTables(query)) { - throw new Error("psql queries must reference only memory, sessions, hivemind.memory, or hivemind.sessions"); + throw new Error("psql queries must reference only memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, fact_entity_links, or their hivemind.* aliases"); } - const sql = normalizePsqlQuery(query, memoryTable, sessionsTable); + const sql = normalizePsqlQuery(query, memoryTable, sessionsTable, graphNodesTable, graphEdgesTable, factsTable, entitiesTable, factEntityLinksTable); const compact = sql.replace(/\s+/g, " ").trim(); if (!/^(select|with)\b/i.test(compact)) { throw new Error("psql mode only supports SELECT queries"); } - const allowedTables = /* @__PURE__ */ new Set([memoryTable, sessionsTable]); + const allowedTables = /* @__PURE__ */ new Set([ + memoryTable, + sessionsTable, + graphNodesTable, + graphEdgesTable, + factsTable, + entitiesTable, + factEntityLinksTable + ]); const tableMatches = [...compact.matchAll(/\b(?:from|join)\s+"?([a-zA-Z_][a-zA-Z0-9_]*)"?/gi)]; if (tableMatches.length === 0) { - throw new Error("psql query must reference memory or sessions"); + throw new Error("psql query must reference an intercepted hivemind memory table"); } for (const match of tableMatches) { if (!allowedTables.has(match[1])) { @@ -1956,6 +2192,135 @@ function validatePsqlQuery(query, memoryTable, sessionsTable) { } return sql; } +function decodeSqlLiteral(value) { + return value.replace(/''/g, "'").trim(); +} +function cleanSearchTerm(value) { + return decodeSqlLiteral(value).replace(/^%+|%+$/g, "").replace(/^_+|_+$/g, "").trim(); +} +function extractSqlSearchTerms(query) { + const terms = []; + const push = (value) => { + const cleaned = cleanSearchTerm(value); + if (!cleaned) + return; + if (cleaned.startsWith("/")) + return; + if (/^\/summaries\/|^\/sessions\//.test(cleaned)) + return; + if (!terms.includes(cleaned)) + terms.push(cleaned); + }; + for (const match of query.matchAll(/\b(?:i?like|=)\s+E?'((?:[^']|'')*)'/gi)) { + push(match[1] ?? ""); + } + for (const match of query.matchAll(/<\#>\s+E?'((?:[^']|'')*)'/gi)) { + push(match[1] ?? ""); + } + return terms; +} +function chooseEntityTerms(terms) { + const entityLike = terms.filter((term) => /[A-Z]/.test(term) && !/^\d+$/.test(term) && term.split(/\s+/).length <= 4); + return (entityLike.length > 0 ? entityLike : terms).slice(0, 2); +} +function escapeRegex2(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} +async function fetchGraphCandidates(api, graphNodesTable, graphEdgesTable, terms) { + const filteredTerms = [...new Set(terms.map((term) => term.trim()).filter(Boolean))].slice(0, 4); + if (filteredTerms.length === 0) + return []; + const entityTerms = chooseEntityTerms(filteredTerms); + const topicTerms = filteredTerms.filter((term) => !entityTerms.includes(term)); + const phrase = sqlStr(filteredTerms.join(" ")); + const nodeEntityClauses = entityTerms.map((term) => `(canonical_name ILIKE '%${sqlLike(term)}%' OR aliases ILIKE '%${sqlLike(term)}%')`); + const nodeTextClauses = topicTerms.map((term) => `search_text ILIKE '%${sqlLike(term)}%'`); + const edgeEntityClauses = entityTerms.map((term) => `search_text ILIKE '%${sqlLike(term)}%'`); + const edgeTopicClauses = topicTerms.map((term) => `(relation ILIKE '%${sqlLike(term)}%' OR summary ILIKE '%${sqlLike(term)}%' OR evidence ILIKE '%${sqlLike(term)}%' OR search_text ILIKE '%${sqlLike(term)}%')`); + const nodeWhere = entityTerms.length > 0 && topicTerms.length > 0 ? `(${nodeEntityClauses.join(" OR ")}) AND (${nodeTextClauses.join(" OR ")})` : entityTerms.length > 0 ? `(${nodeEntityClauses.join(" OR ")})` : topicTerms.length > 0 ? `(${nodeTextClauses.join(" OR ")})` : "FALSE"; + const edgeWhere = entityTerms.length > 0 && topicTerms.length > 0 ? `(${edgeEntityClauses.join(" OR ")}) AND (${edgeTopicClauses.join(" OR ")})` : topicTerms.length > 0 ? `(${edgeTopicClauses.join(" OR ")})` : entityTerms.length > 0 ? `(${edgeEntityClauses.join(" OR ")})` : "FALSE"; + const sql = `WITH node_candidates AS ( SELECT source_session_id, source_path, search_text, search_text <#> '${phrase}' AS score FROM "${graphNodesTable}" WHERE ${nodeWhere} ORDER BY score DESC LIMIT 8), edge_candidates AS ( SELECT source_session_id, source_path, search_text, search_text <#> '${phrase}' AS score FROM "${graphEdgesTable}" WHERE ${edgeWhere} ORDER BY score DESC LIMIT 8) SELECT source_session_id, source_path, search_text, score FROM ( SELECT source_session_id, source_path, search_text, score FROM node_candidates UNION ALL SELECT source_session_id, source_path, search_text, score FROM edge_candidates ) AS graph_candidates ORDER BY score ASC LIMIT 12`; + const rows = await api.query(sql); + const expanded = []; + const seen = /* @__PURE__ */ new Set(); + for (const row of rows) { + const searchText = typeof row["search_text"] === "string" ? row["search_text"] : ""; + const sessionIds = [ + ...searchText.match(/conv_\d+_session_\d+/g) ?? [], + typeof row["source_session_id"] === "string" ? row["source_session_id"] : "" + ].map((value) => value.trim()).filter(Boolean); + const sourcePaths = [ + ...searchText.match(/\/sessions\/conv_\d+_session_\d+\.json/g) ?? [], + typeof row["source_path"] === "string" ? row["source_path"] : "", + ...sessionIds.map((sessionId) => `/sessions/${sessionId}.json`) + ].map((value) => value.trim()).filter(Boolean); + for (let i = 0; i < sourcePaths.length; i++) { + const sourcePath = sourcePaths[i]; + const sessionId = sessionIds[i] || sessionIds[0] || sourcePath.match(/(conv_\d+_session_\d+)\.json$/)?.[1] || ""; + if (!sourcePath) + continue; + const key = `${sessionId}@@${sourcePath}`; + if (seen.has(key)) + continue; + seen.add(key); + expanded.push({ sessionId, sourcePath }); + if (expanded.length >= 12) + return expanded; + } + } + return expanded; +} +function prependCtes(sql, ctes) { + if (ctes.length === 0) + return sql; + if (/^with\b/i.test(sql)) { + return sql.replace(/^with\b/i, `WITH ${ctes.join(", ")},`); + } + return `WITH ${ctes.join(", ")} ${sql}`; +} +function rewriteQueryWithRestrictedTables(sql, memoryTable, sessionsTable, restrictedMemoryAlias, restrictedSessionsAlias) { + let rewritten = sql; + if (restrictedMemoryAlias) { + const memoryPattern = escapeRegex2(memoryTable); + rewritten = rewritten.replace(new RegExp(`\\bFROM\\s+"?${memoryPattern}"?`, "gi"), `FROM "${restrictedMemoryAlias}"`).replace(new RegExp(`\\bJOIN\\s+"?${memoryPattern}"?`, "gi"), `JOIN "${restrictedMemoryAlias}"`); + } + if (restrictedSessionsAlias) { + const sessionsPattern = escapeRegex2(sessionsTable); + rewritten = rewritten.replace(new RegExp(`\\bFROM\\s+"?${sessionsPattern}"?`, "gi"), `FROM "${restrictedSessionsAlias}"`).replace(new RegExp(`\\bJOIN\\s+"?${sessionsPattern}"?`, "gi"), `JOIN "${restrictedSessionsAlias}"`); + } + return rewritten; +} +async function applyGraphRestrictionsToPsqlQuery(api, sql, memoryTable, sessionsTable, graphNodesTable, graphEdgesTable) { + if (extractSqlTableRefs(sql).some((ref) => ref === normalizeSqlRef(graphNodesTable) || ref === normalizeSqlRef(graphEdgesTable))) { + return sql; + } + const refs = extractSqlTableRefs(sql); + const touchesMemory2 = refs.some((ref) => ref === normalizeSqlRef(memoryTable)); + const touchesSessions = refs.some((ref) => ref === normalizeSqlRef(sessionsTable)); + if (!touchesMemory2 && !touchesSessions) + return sql; + const terms = extractSqlSearchTerms(sql); + if (terms.length === 0) + return sql; + const candidates = await fetchGraphCandidates(api, graphNodesTable, graphEdgesTable, terms); + if (candidates.length === 0 || candidates.length > 16) + return sql; + const values = candidates.map((candidate) => `('${sqlStr(candidate.sessionId)}', '${sqlStr(candidate.sourcePath)}')`); + const ctes = [ + `__hm_graph_candidates(source_session_id, source_path) AS (VALUES ${values.join(", ")})` + ]; + let restrictedMemoryAlias = null; + let restrictedSessionsAlias = null; + if (touchesMemory2) { + restrictedMemoryAlias = "__hm_memory"; + ctes.push(`"${restrictedMemoryAlias}" AS ( SELECT * FROM "${memoryTable}" m WHERE EXISTS ( SELECT 1 FROM __hm_graph_candidates gc WHERE (gc.source_path <> '' AND m.summary ILIKE '%' || gc.source_path || '%') OR (gc.source_session_id <> '' AND m.path ILIKE '%' || gc.source_session_id || '%') ))`); + } + if (touchesSessions) { + restrictedSessionsAlias = "__hm_sessions"; + ctes.push(`"${restrictedSessionsAlias}" AS ( SELECT * FROM "${sessionsTable}" s WHERE s.path IN (SELECT source_path FROM __hm_graph_candidates WHERE source_path <> ''))`); + } + return prependCtes(rewriteQueryWithRestrictedTables(sql, memoryTable, sessionsTable, restrictedMemoryAlias, restrictedSessionsAlias), ctes); +} function formatPsqlValue(value) { if (value === null || value === void 0) return ""; @@ -2258,7 +2623,11 @@ async function executeCompiledBashCommand(api, memoryTable, sessionsTable, cmd, continue; } if (segment.kind === "psql") { - const rows = await api.query(validatePsqlQuery(segment.query, memoryTable, sessionsTable)); + const graphNodesTable = process.env["HIVEMIND_GRAPH_NODES_TABLE"] ?? "graph_nodes"; + const graphEdgesTable = process.env["HIVEMIND_GRAPH_EDGES_TABLE"] ?? "graph_edges"; + const validated = validatePsqlQuery(segment.query, memoryTable, sessionsTable, graphNodesTable, graphEdgesTable); + const prepared = await applyGraphRestrictionsToPsqlQuery(api, validated, memoryTable, sessionsTable, graphNodesTable, graphEdgesTable); + const rows = await api.query(prepared); const formatted = formatPsqlRows(rows, segment.tuplesOnly, segment.fieldSeparator); const limited = segment.lineLimit > 0 ? formatted.split("\n").slice(0, segment.lineLimit).join("\n") : formatted; outputs.push(limited); @@ -2518,10 +2887,10 @@ function needsHivemindPsqlRewrite(cmd) { return !!query && queryReferencesInterceptedTables(query) && !queryUsesOnlyInterceptedTables(query); } function buildPsqlOnlyGuidance() { - return "[RETRY REQUIRED] Hivemind recall is SQL-only in this mode. Use psql with the memory and sessions tables only. Do NOT use grep, cat, ls, Read, Glob, or filesystem paths for memory lookups."; + return "[RETRY REQUIRED] Hivemind recall is SQL-only in this mode. Use psql with the memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links tables only. Do NOT use grep, cat, ls, Read, Glob, or filesystem paths for memory lookups."; } function buildPsqlSchemaGuidance() { - return "[RETRY REQUIRED] Only psql SELECT queries over memory and sessions are intercepted in SQL mode. Rewrite the query to reference only those tables with normal psql SELECT syntax."; + return "[RETRY REQUIRED] Only psql SELECT queries over memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links are intercepted in SQL mode. Rewrite the query to reference only those tables with normal psql SELECT syntax."; } var log4 = (msg) => log("pre", msg); var __bundleDir = dirname(fileURLToPath2(import.meta.url)); diff --git a/claude-code/bundle/session-end.js b/claude-code/bundle/session-end.js index e31f36b..23113b7 100755 --- a/claude-code/bundle/session-end.js +++ b/claude-code/bundle/session-end.js @@ -49,6 +49,11 @@ function loadConfig() { apiUrl: env.HIVEMIND_API_URL ?? env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", tableName: env.HIVEMIND_TABLE ?? env.DEEPLAKE_TABLE ?? "memory", sessionsTableName: env.HIVEMIND_SESSIONS_TABLE ?? env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + graphNodesTableName: env.HIVEMIND_GRAPH_NODES_TABLE ?? env.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", + graphEdgesTableName: env.HIVEMIND_GRAPH_EDGES_TABLE ?? env.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", + factsTableName: env.HIVEMIND_FACTS_TABLE ?? env.DEEPLAKE_FACTS_TABLE ?? "memory_facts", + entitiesTableName: env.HIVEMIND_ENTITIES_TABLE ?? env.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", + factEntityLinksTableName: env.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", memoryPath: env.HIVEMIND_MEMORY_PATH ?? env.DEEPLAKE_MEMORY_PATH ?? join(home, ".deeplake", "memory") }; } @@ -102,6 +107,22 @@ function traceSql(msg) { if (DEBUG_FILE_LOG) log2(msg); } +var DeeplakeQueryError = class extends Error { + sqlSummary; + status; + responseBody; + sql; + cause; + constructor(message, args = {}) { + super(message); + this.name = "DeeplakeQueryError"; + this.sql = args.sql; + this.sqlSummary = args.sql ? summarizeSql(args.sql) : ""; + this.status = args.status; + this.responseBody = args.responseBody; + this.cause = args.cause; + } +}; var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; @@ -205,10 +226,10 @@ var DeeplakeApi = class { }); } catch (e) { if (isTimeoutError(e)) { - lastError = new Error(`Query timeout after ${QUERY_TIMEOUT_MS}ms`); + lastError = new DeeplakeQueryError(`Query timeout after ${QUERY_TIMEOUT_MS}ms`, { sql, cause: e }); throw lastError; } - lastError = e instanceof Error ? e : new Error(String(e)); + lastError = e instanceof Error ? new DeeplakeQueryError(e.message, { sql, cause: e }) : new DeeplakeQueryError(String(e), { sql, cause: e }); if (attempt < MAX_RETRIES) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); @@ -231,9 +252,13 @@ var DeeplakeApi = class { await sleep(delay); continue; } - throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); + throw new DeeplakeQueryError(`Query failed: ${resp.status}: ${text.slice(0, 200)}`, { + sql, + status: resp.status, + responseBody: text.slice(0, 4e3) + }); } - throw lastError ?? new Error("Query failed: max retries exceeded"); + throw lastError ?? new DeeplakeQueryError("Query failed: max retries exceeded", { sql }); } // ── Writes ────────────────────────────────────────────────────────────────── /** Queue rows for writing. Call commit() to flush. */ @@ -460,6 +485,193 @@ var DeeplakeApi = class { } await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); } + async ensureGraphNodesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `node_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `node_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "node_id", `("node_id")`); + } + async ensureGraphEdgesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `edge_id TEXT NOT NULL DEFAULT ''`, + `source_node_id TEXT NOT NULL DEFAULT ''`, + `target_node_id TEXT NOT NULL DEFAULT ''`, + `relation TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "source_target_relation", `("source_node_id", "target_node_id", "relation")`); + } + async ensureFactsTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `subject_entity_id TEXT NOT NULL DEFAULT ''`, + `subject_name TEXT NOT NULL DEFAULT ''`, + `subject_type TEXT NOT NULL DEFAULT ''`, + `predicate TEXT NOT NULL DEFAULT ''`, + `object_entity_id TEXT NOT NULL DEFAULT ''`, + `object_name TEXT NOT NULL DEFAULT ''`, + `object_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `confidence TEXT NOT NULL DEFAULT ''`, + `valid_at TEXT NOT NULL DEFAULT ''`, + `valid_from TEXT NOT NULL DEFAULT ''`, + `valid_to TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "session_predicate", `("source_session_id", "predicate")`); + await this.ensureLookupIndex(name, "subject_object", `("subject_entity_id", "object_entity_id")`); + } + async ensureEntitiesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `entity_type TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "canonical_name", `("canonical_name")`); + } + async ensureFactEntityLinksTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `link_id TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `entity_role TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "session_entity_role", `("source_session_id", "entity_id", "entity_role")`); + } }; // dist/src/utils/direct-run.js @@ -482,6 +694,60 @@ import { fileURLToPath as fileURLToPath2 } from "node:url"; import { dirname, join as join4 } from "node:path"; import { writeFileSync as writeFileSync2, mkdirSync as mkdirSync2, appendFileSync as appendFileSync2 } from "node:fs"; import { homedir as homedir3, tmpdir as tmpdir2 } from "node:os"; + +// dist/src/hooks/knowledge-graph.js +import { randomUUID as randomUUID3 } from "node:crypto"; + +// dist/src/hooks/upload-summary.js +import { randomUUID as randomUUID2 } from "node:crypto"; + +// dist/src/hooks/knowledge-graph.js +var GRAPH_PROMPT_TEMPLATE = `You are extracting a compact knowledge graph delta from a session summary. + +SESSION ID: __SESSION_ID__ +SOURCE PATH: __SOURCE_PATH__ +PROJECT: __PROJECT__ + +SUMMARY MARKDOWN: +__SUMMARY_TEXT__ + +Return ONLY valid JSON with this exact shape: +{"nodes":[{"name":"canonical entity name","type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","summary":"short factual description","aliases":["optional alias"]}],"edges":[{"source":"canonical source entity","target":"canonical target entity","relation":"snake_case_relation","summary":"short factual relation summary","evidence":"short supporting phrase"}]} + +Rules: +- Use canonical names for repeated entities. +- Include people, places, organizations, books/media, tools, files, goals, status labels, preferences, and notable events when they matter for future recall. +- Convert relationship/status/origin/preferences into edges when possible. Example relation shapes: home_country, relationship_status, enjoys, decided_to_pursue, works_on, uses_tool, located_in, recommended, plans, supports. +- Keep summaries short and factual. Do not invent facts beyond the summary. +- If a source or target appears in an edge but not in nodes, also include it in nodes. +- Prefer stable canonical names over pronouns. +- Return no markdown, no prose, no code fences, only JSON.`; + +// dist/src/hooks/memory-facts.js +import { randomUUID as randomUUID4 } from "node:crypto"; +var MEMORY_FACT_PROMPT_TEMPLATE = `You are extracting durable long-term memory facts from a session summary. + +SESSION ID: __SESSION_ID__ +SOURCE PATH: __SOURCE_PATH__ +PROJECT: __PROJECT__ + +SUMMARY MARKDOWN: +__SUMMARY_TEXT__ + +Return ONLY valid JSON with this exact shape: +{"facts":[{"subject":"canonical entity","subject_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","subject_aliases":["optional alias"],"predicate":"snake_case_relation","object":"canonical object text","object_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","object_aliases":["optional alias"],"summary":"short factual claim","evidence":"short supporting phrase","confidence":0.0,"valid_at":"optional date/time text","valid_from":"optional date/time text","valid_to":"optional date/time text"}]} + +Rules: +- Extract atomic facts that are useful for later recall. One durable claim per fact. +- Prefer canonical names for repeated people, organizations, places, projects, tools, and artifacts. +- Use relation-style predicates such as works_on, home_country, relationship_status, prefers, plans, decided_to_pursue, located_in, uses_tool, recommended, supports, owns, read, attends, moved_from, moved_to. +- Facts should preserve temporal history instead of overwriting it. If the summary says something changed, emit the new fact and include timing in valid_at / valid_from / valid_to when the summary supports it. +- Include assistant-confirmed or tool-confirmed actions when they are stated as completed facts in the summary. +- Do not invent facts that are not supported by the summary. +- Avoid duplicates or near-duplicates. If two facts say the same thing, keep the more specific one. +- Return no markdown, no prose, no code fences, only JSON.`; + +// dist/src/hooks/spawn-wiki-worker.js var HOME = homedir3(); var WIKI_LOG = join4(HOME, ".claude", "hooks", "deeplake-wiki.log"); var WIKI_PROMPT_TEMPLATE = `You are maintaining a persistent wiki from a session transcript. This page will become part of a long-lived knowledge base that future agents will search through index.md before opening the source session. Write for retrieval, not storytelling. @@ -576,6 +842,11 @@ function spawnWikiWorker(opts) { workspaceId: config.workspaceId, memoryTable: config.tableName, sessionsTable: config.sessionsTableName, + graphNodesTable: config.graphNodesTableName, + graphEdgesTable: config.graphEdgesTableName, + factsTable: config.factsTableName, + entitiesTable: config.entitiesTableName, + factEntityLinksTable: config.factEntityLinksTableName, sessionId, userName: config.userName, project: projectName, @@ -583,7 +854,9 @@ function spawnWikiWorker(opts) { claudeBin: findClaudeBin(), wikiLog: WIKI_LOG, hooksDir: join4(HOME, ".claude", "hooks"), - promptTemplate: WIKI_PROMPT_TEMPLATE + promptTemplate: WIKI_PROMPT_TEMPLATE, + graphPromptTemplate: GRAPH_PROMPT_TEMPLATE, + factPromptTemplate: MEMORY_FACT_PROMPT_TEMPLATE })); wikiLog(`${reason}: spawning summary worker for ${sessionId}`); const workerPath = join4(bundleDir, "wiki-worker.js"); diff --git a/claude-code/bundle/session-start-setup.js b/claude-code/bundle/session-start-setup.js index c1b7371..f1be9cd 100755 --- a/claude-code/bundle/session-start-setup.js +++ b/claude-code/bundle/session-start-setup.js @@ -61,6 +61,11 @@ function loadConfig() { apiUrl: env.HIVEMIND_API_URL ?? env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", tableName: env.HIVEMIND_TABLE ?? env.DEEPLAKE_TABLE ?? "memory", sessionsTableName: env.HIVEMIND_SESSIONS_TABLE ?? env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + graphNodesTableName: env.HIVEMIND_GRAPH_NODES_TABLE ?? env.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", + graphEdgesTableName: env.HIVEMIND_GRAPH_EDGES_TABLE ?? env.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", + factsTableName: env.HIVEMIND_FACTS_TABLE ?? env.DEEPLAKE_FACTS_TABLE ?? "memory_facts", + entitiesTableName: env.HIVEMIND_ENTITIES_TABLE ?? env.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", + factEntityLinksTableName: env.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", memoryPath: env.HIVEMIND_MEMORY_PATH ?? env.DEEPLAKE_MEMORY_PATH ?? join2(home, ".deeplake", "memory") }; } @@ -114,6 +119,22 @@ function traceSql(msg) { if (DEBUG_FILE_LOG) log2(msg); } +var DeeplakeQueryError = class extends Error { + sqlSummary; + status; + responseBody; + sql; + cause; + constructor(message, args = {}) { + super(message); + this.name = "DeeplakeQueryError"; + this.sql = args.sql; + this.sqlSummary = args.sql ? summarizeSql(args.sql) : ""; + this.status = args.status; + this.responseBody = args.responseBody; + this.cause = args.cause; + } +}; var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; @@ -217,10 +238,10 @@ var DeeplakeApi = class { }); } catch (e) { if (isTimeoutError(e)) { - lastError = new Error(`Query timeout after ${QUERY_TIMEOUT_MS}ms`); + lastError = new DeeplakeQueryError(`Query timeout after ${QUERY_TIMEOUT_MS}ms`, { sql, cause: e }); throw lastError; } - lastError = e instanceof Error ? e : new Error(String(e)); + lastError = e instanceof Error ? new DeeplakeQueryError(e.message, { sql, cause: e }) : new DeeplakeQueryError(String(e), { sql, cause: e }); if (attempt < MAX_RETRIES) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); @@ -243,9 +264,13 @@ var DeeplakeApi = class { await sleep(delay); continue; } - throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); + throw new DeeplakeQueryError(`Query failed: ${resp.status}: ${text.slice(0, 200)}`, { + sql, + status: resp.status, + responseBody: text.slice(0, 4e3) + }); } - throw lastError ?? new Error("Query failed: max retries exceeded"); + throw lastError ?? new DeeplakeQueryError("Query failed: max retries exceeded", { sql }); } // ── Writes ────────────────────────────────────────────────────────────────── /** Queue rows for writing. Call commit() to flush. */ @@ -472,6 +497,193 @@ var DeeplakeApi = class { } await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); } + async ensureGraphNodesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `node_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `node_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "node_id", `("node_id")`); + } + async ensureGraphEdgesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `edge_id TEXT NOT NULL DEFAULT ''`, + `source_node_id TEXT NOT NULL DEFAULT ''`, + `target_node_id TEXT NOT NULL DEFAULT ''`, + `relation TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "source_target_relation", `("source_node_id", "target_node_id", "relation")`); + } + async ensureFactsTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `subject_entity_id TEXT NOT NULL DEFAULT ''`, + `subject_name TEXT NOT NULL DEFAULT ''`, + `subject_type TEXT NOT NULL DEFAULT ''`, + `predicate TEXT NOT NULL DEFAULT ''`, + `object_entity_id TEXT NOT NULL DEFAULT ''`, + `object_name TEXT NOT NULL DEFAULT ''`, + `object_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `confidence TEXT NOT NULL DEFAULT ''`, + `valid_at TEXT NOT NULL DEFAULT ''`, + `valid_from TEXT NOT NULL DEFAULT ''`, + `valid_to TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "session_predicate", `("source_session_id", "predicate")`); + await this.ensureLookupIndex(name, "subject_object", `("subject_entity_id", "object_entity_id")`); + } + async ensureEntitiesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `entity_type TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "canonical_name", `("canonical_name")`); + } + async ensureFactEntityLinksTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `link_id TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `entity_role TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "session_entity_role", `("source_session_id", "entity_id", "entity_role")`); + } }; // dist/src/utils/stdin.js @@ -951,6 +1163,11 @@ async function runSessionStartSetup(input, deps = {}) { } else { try { await api.ensureSessionsTable(config.sessionsTableName); + await api.ensureGraphNodesTable(config.graphNodesTableName); + await api.ensureGraphEdgesTable(config.graphEdgesTableName); + await api.ensureFactsTable(config.factsTableName); + await api.ensureEntitiesTable(config.entitiesTableName); + await api.ensureFactEntityLinksTable(config.factEntityLinksTableName); const drain = await drainSessionQueuesFn(api, { sessionsTable: config.sessionsTableName }); diff --git a/claude-code/bundle/session-start.js b/claude-code/bundle/session-start.js index 938a8b2..ad55e71 100755 --- a/claude-code/bundle/session-start.js +++ b/claude-code/bundle/session-start.js @@ -266,25 +266,41 @@ var CLAUDE_SESSION_START_CONTEXT_PSQL = `DEEPLAKE MEMORY SQL MODE: For this run, Available Deeplake tables: - memory(path, summary, project, description, creation_date, last_update_date) - sessions(path, creation_date, turn_index, event_type, dia_id, speaker, text, turn_summary, source_date_time, message) +- memory_facts(path, fact_id, subject_entity_id, subject_name, subject_type, predicate, object_entity_id, object_name, object_type, summary, evidence, search_text, confidence, valid_at, valid_from, valid_to, source_session_id, source_path) +- memory_entities(path, entity_id, canonical_name, entity_type, aliases, summary, search_text, source_session_ids, source_paths) +- fact_entity_links(path, link_id, fact_id, entity_id, entity_role, source_session_id, source_path) Use this command shape: - psql -At -F '|' -c "SELECT ..." SQL strategy: -1. Start with targeted SELECTs against memory to find likely summaries. +1. Start with targeted SELECTs against memory to find likely sessions or summaries. 2. In the first pass, combine the named person/entity term with one or more topic terms. Prefer narrow AND filters over broad OR filters. -3. After finding candidate summary rows, re-query memory by exact path to inspect only those summaries. -4. If the answer needs exact wording, exact dates, or transcript grounding, query sessions by exact path for those candidate sessions. -5. Prefer precise WHERE filters, ORDER BY creation_date/last_update_date, and LIMIT 5-10. -6. Do not use filesystem commands, grep, cat, ls, Read, or Glob for recall in this mode. -7. If the first summary query returns 0-3 weak rows or the answer still seems semantically off, retry with BM25 ranking on memory before concluding the data is absent. -8. Use sessions.text, sessions.speaker, sessions.turn_index, and sessions.source_date_time for transcript retrieval. Use sessions.message only when you need the raw JSON payload. -9. If a summary answer is vague or relative (for example "home country", "next month", "last week"), immediately open the linked sessions rows and convert it to the most concrete answer supported there. -10. For identity, origin, relationship, and "what did they decide" questions, prefer the exact self-description or named entity from sessions over a paraphrased summary label. +3. Graph-backed entity and relation resolution is applied automatically behind the scenes to narrow likely sessions before memory/sessions queries run. You do not need to query graph tables manually for normal recall. +3a. For stable person/project/place facts, use memory_facts first. Use memory_entities to resolve aliases or canonical names, then join through fact_entity_links when you need all facts connected to an entity. +4. After finding candidate summary rows, re-query memory by exact path. +5. If the answer needs exact wording, exact dates, or transcript grounding, query sessions by exact path for those candidate sessions. +6. Prefer precise WHERE filters, ORDER BY creation_date/last_update_date, and LIMIT 5-10. +7. Do not use filesystem commands, grep, cat, ls, Read, or Glob for recall in this mode. +8. If the first literal query returns 0-3 weak rows or the answer still seems semantically off, retry with BM25 ranking on memory.summary before concluding the data is absent. +9. Use sessions.text, sessions.speaker, sessions.turn_index, and sessions.source_date_time for transcript retrieval. Use sessions.message only when you need the raw JSON payload. +10. If a summary, node, or edge answer is vague or relative (for example "home country", "next month", "last week"), immediately open the linked sessions rows and convert it to the most concrete answer supported there. +11. For identity, origin, relationship, preference, and "what did they decide" questions, prefer transcript grounding over a paraphrased summary label. +12. When memory_entities resolves a canonical entity, use fact_entity_links to expand the connected facts before deciding the fact layer is sparse. +13. For identity or relationship questions, prefer the narrowest explicit self-label or status label over broader biography or community descriptions. +14. For "when" questions, if the best evidence is already phrased relative to another dated event, return that relative phrase instead of inventing a different absolute date. +15. For list/profile questions, return a minimal comma-separated set of directly supported items. Do not pad the answer with adjacent hobbies, events, or explanations. +16. For artifact/title questions such as books, talks, projects, or artworks, prefer exact titled objects from facts or transcript over generic phrases like "a book" or "a speech". Good query patterns: - Candidate summaries: psql -At -F '|' -c "SELECT path, summary, creation_date FROM memory WHERE summary ILIKE '%%' AND (summary ILIKE '%%' OR summary ILIKE '%%') ORDER BY creation_date DESC LIMIT 5" +- Canonical entity lookup: + psql -At -F '|' -c "SELECT entity_id, canonical_name, entity_type, aliases, summary FROM memory_entities WHERE canonical_name ILIKE '%%' OR aliases ILIKE '%%' LIMIT 5" +- Fact lookup by entity: + psql -At -F '|' -c "SELECT fact_id, subject_name, predicate, object_name, summary, valid_at, valid_from, valid_to, source_session_id FROM memory_facts WHERE subject_name ILIKE '%%' AND (predicate ILIKE '%%' OR object_name ILIKE '%%') ORDER BY creation_date DESC LIMIT 10" +- Entity-linked fact expansion: + psql -At -F '|' -c "SELECT f.fact_id, f.subject_name, f.predicate, f.object_name, f.summary FROM fact_entity_links l JOIN memory_facts f ON f.fact_id = l.fact_id WHERE l.entity_id = '' ORDER BY f.creation_date DESC LIMIT 10" - Exact summary reread: psql -At -F '|' -c "SELECT path, summary FROM memory WHERE path IN ('/summaries/...', '/summaries/...')" - Transcript grounding by exact path: @@ -297,7 +313,12 @@ Good query patterns: Avoid these mistakes: - Do NOT search person names via path ILIKE. Person names live in summary text, not session paths. - Do NOT filter sessions.message directly when sessions.text / sessions.speaker already contain the needed transcript fields. +- Do NOT use fact tables for exact quoted wording when a transcript row is available; use them to narrow and aggregate, then ground on sessions. +- Do NOT stop at graph rows alone when the question asks for exact wording or time grounding. Use graph rows to narrow the search, then open the linked sessions. - Do NOT blend multiple different events when the question asks about one specific event. Prefer the most direct supporting row. +- Do NOT replace an exact status or self-label with a broader biography. +- Do NOT recalculate a relative-time answer against today's date when the stored phrase already answers the question. +- Do NOT turn a short list question into a narrative list of loosely related activities. Answer rules: - Return the smallest exact answer supported by the data. @@ -305,11 +326,12 @@ Answer rules: - Do not answer "not found" until you have checked both memory and a likely sessions row for the named person. - For duration or age-style answers, preserve the stored relative phrase when it directly answers the question instead of over-converting it. - If the transcript already directly answers with a relative duration like "10 years ago", return that phrase instead of recalculating to today's date. +- If the transcript or fact row says something like "the week before June 9, 2023", return that phrase instead of converting it to June 9, 2023. - If a summary says something vague like "home country", search sessions for the exact named place before answering. - For list or profile questions, aggregate across the small set of candidate sessions before answering. - For "likely", "would", or profile questions, a concise inference from strong summary evidence is allowed even if the exact final phrase is not quoted verbatim. -IMPORTANT: Only psql SELECT queries over memory and sessions are intercepted in this mode. Do NOT use python, python3, node, curl, or filesystem paths for recall in this mode. +IMPORTANT: Only psql SELECT queries over memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links are intercepted in this mode. For normal recall, query memory_facts for distilled claims, memory_entities for canonical names, and sessions for exact grounding; graph-based restriction is applied automatically where relevant. Do NOT use python, python3, node, curl, or filesystem paths for recall in this mode. Debugging: Set HIVEMIND_DEBUG=1 to enable verbose logging to ~/.deeplake/hook-debug.log`; var GITHUB_RAW_PKG = "https://raw.githubusercontent.com/activeloopai/hivemind/main/package.json"; diff --git a/claude-code/bundle/shell/deeplake-shell.js b/claude-code/bundle/shell/deeplake-shell.js index 14f7835..819d244 100755 --- a/claude-code/bundle/shell/deeplake-shell.js +++ b/claude-code/bundle/shell/deeplake-shell.js @@ -66752,6 +66752,11 @@ function loadConfig() { apiUrl: env2.HIVEMIND_API_URL ?? env2.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", tableName: env2.HIVEMIND_TABLE ?? env2.DEEPLAKE_TABLE ?? "memory", sessionsTableName: env2.HIVEMIND_SESSIONS_TABLE ?? env2.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + graphNodesTableName: env2.HIVEMIND_GRAPH_NODES_TABLE ?? env2.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", + graphEdgesTableName: env2.HIVEMIND_GRAPH_EDGES_TABLE ?? env2.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", + factsTableName: env2.HIVEMIND_FACTS_TABLE ?? env2.DEEPLAKE_FACTS_TABLE ?? "memory_facts", + entitiesTableName: env2.HIVEMIND_ENTITIES_TABLE ?? env2.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", + factEntityLinksTableName: env2.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env2.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", memoryPath: env2.HIVEMIND_MEMORY_PATH ?? env2.DEEPLAKE_MEMORY_PATH ?? join4(home, ".deeplake", "memory") }; } @@ -66799,6 +66804,22 @@ function traceSql(msg) { if (DEBUG_FILE_LOG) log2(msg); } +var DeeplakeQueryError = class extends Error { + sqlSummary; + status; + responseBody; + sql; + cause; + constructor(message, args = {}) { + super(message); + this.name = "DeeplakeQueryError"; + this.sql = args.sql; + this.sqlSummary = args.sql ? summarizeSql(args.sql) : ""; + this.status = args.status; + this.responseBody = args.responseBody; + this.cause = args.cause; + } +}; var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; @@ -66902,10 +66923,10 @@ var DeeplakeApi = class { }); } catch (e6) { if (isTimeoutError(e6)) { - lastError = new Error(`Query timeout after ${QUERY_TIMEOUT_MS}ms`); + lastError = new DeeplakeQueryError(`Query timeout after ${QUERY_TIMEOUT_MS}ms`, { sql, cause: e6 }); throw lastError; } - lastError = e6 instanceof Error ? e6 : new Error(String(e6)); + lastError = e6 instanceof Error ? new DeeplakeQueryError(e6.message, { sql, cause: e6 }) : new DeeplakeQueryError(String(e6), { sql, cause: e6 }); if (attempt < MAX_RETRIES) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); @@ -66928,9 +66949,13 @@ var DeeplakeApi = class { await sleep(delay); continue; } - throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); + throw new DeeplakeQueryError(`Query failed: ${resp.status}: ${text.slice(0, 200)}`, { + sql, + status: resp.status, + responseBody: text.slice(0, 4e3) + }); } - throw lastError ?? new Error("Query failed: max retries exceeded"); + throw lastError ?? new DeeplakeQueryError("Query failed: max retries exceeded", { sql }); } // ── Writes ────────────────────────────────────────────────────────────────── /** Queue rows for writing. Call commit() to flush. */ @@ -67157,6 +67182,193 @@ var DeeplakeApi = class { } await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); } + async ensureGraphNodesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `node_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `node_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "node_id", `("node_id")`); + } + async ensureGraphEdgesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `edge_id TEXT NOT NULL DEFAULT ''`, + `source_node_id TEXT NOT NULL DEFAULT ''`, + `target_node_id TEXT NOT NULL DEFAULT ''`, + `relation TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "source_target_relation", `("source_node_id", "target_node_id", "relation")`); + } + async ensureFactsTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `subject_entity_id TEXT NOT NULL DEFAULT ''`, + `subject_name TEXT NOT NULL DEFAULT ''`, + `subject_type TEXT NOT NULL DEFAULT ''`, + `predicate TEXT NOT NULL DEFAULT ''`, + `object_entity_id TEXT NOT NULL DEFAULT ''`, + `object_name TEXT NOT NULL DEFAULT ''`, + `object_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `confidence TEXT NOT NULL DEFAULT ''`, + `valid_at TEXT NOT NULL DEFAULT ''`, + `valid_from TEXT NOT NULL DEFAULT ''`, + `valid_to TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "session_predicate", `("source_session_id", "predicate")`); + await this.ensureLookupIndex(name, "subject_object", `("subject_entity_id", "object_entity_id")`); + } + async ensureEntitiesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `entity_type TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "canonical_name", `("canonical_name")`); + } + async ensureFactEntityLinksTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `link_id TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `entity_role TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "session_entity_role", `("source_session_id", "entity_id", "entity_role")`); + } }; // dist/src/shell/deeplake-fs.js diff --git a/claude-code/bundle/wiki-worker.js b/claude-code/bundle/wiki-worker.js index 19c1150..10580ce 100755 --- a/claude-code/bundle/wiki-worker.js +++ b/claude-code/bundle/wiki-worker.js @@ -165,6 +165,527 @@ async function uploadSummary(query2, params) { return { path: "insert", sql, descLength: desc.length, summaryLength: text.length }; } +// dist/src/hooks/knowledge-graph.js +import { randomUUID as randomUUID2 } from "node:crypto"; +var GRAPH_PROMPT_TEMPLATE = `You are extracting a compact knowledge graph delta from a session summary. + +SESSION ID: __SESSION_ID__ +SOURCE PATH: __SOURCE_PATH__ +PROJECT: __PROJECT__ + +SUMMARY MARKDOWN: +__SUMMARY_TEXT__ + +Return ONLY valid JSON with this exact shape: +{"nodes":[{"name":"canonical entity name","type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","summary":"short factual description","aliases":["optional alias"]}],"edges":[{"source":"canonical source entity","target":"canonical target entity","relation":"snake_case_relation","summary":"short factual relation summary","evidence":"short supporting phrase"}]} + +Rules: +- Use canonical names for repeated entities. +- Include people, places, organizations, books/media, tools, files, goals, status labels, preferences, and notable events when they matter for future recall. +- Convert relationship/status/origin/preferences into edges when possible. Example relation shapes: home_country, relationship_status, enjoys, decided_to_pursue, works_on, uses_tool, located_in, recommended, plans, supports. +- Keep summaries short and factual. Do not invent facts beyond the summary. +- If a source or target appears in an edge but not in nodes, also include it in nodes. +- Prefer stable canonical names over pronouns. +- Return no markdown, no prose, no code fences, only JSON.`; +function stripCodeFences(text) { + const trimmed = text.trim(); + const fenceMatch = trimmed.match(/^```(?:json)?\s*([\s\S]*?)\s*```$/i); + return fenceMatch ? fenceMatch[1].trim() : trimmed; +} +function normalizeString(value) { + return typeof value === "string" ? value.trim() : ""; +} +function normalizeAliasList(value) { + if (!Array.isArray(value)) + return []; + return value.map(normalizeString).filter(Boolean).filter((item, index, arr) => arr.indexOf(item) === index); +} +function parseGraphExtraction(raw) { + const cleaned = stripCodeFences(raw); + const parsed = JSON.parse(cleaned); + const nodes = Array.isArray(parsed["nodes"]) ? parsed["nodes"] : []; + const edges = Array.isArray(parsed["edges"]) ? parsed["edges"] : []; + return { + nodes: nodes.map((node) => ({ + name: normalizeString(node["name"]), + type: normalizeString(node["type"]) || "other", + summary: normalizeString(node["summary"]), + aliases: normalizeAliasList(node["aliases"]) + })).filter((node) => node.name), + edges: edges.map((edge) => ({ + source: normalizeString(edge["source"]), + target: normalizeString(edge["target"]), + relation: normalizeString(edge["relation"]).replace(/\s+/g, "_").toLowerCase(), + summary: normalizeString(edge["summary"]), + evidence: normalizeString(edge["evidence"]) + })).filter((edge) => edge.source && edge.target && edge.relation) + }; +} +function slugify(value) { + return value.normalize("NFKD").replace(/[^\w\s-]/g, "").trim().toLowerCase().replace(/[\s-]+/g, "_").replace(/^_+|_+$/g, "") || "item"; +} +function buildGraphNodeId(name, _type = "other") { + return `entity:${slugify(name)}`; +} +function buildNodeSearchText(node) { + return [ + node.name, + node.type ?? "other", + ...node.aliases ?? [], + node.summary ?? "" + ].filter(Boolean).join(" | "); +} +function buildEdgeSearchText(edge, sourceNodeId, targetNodeId) { + return [ + edge.source, + edge.relation, + edge.target, + edge.summary ?? "", + edge.evidence ?? "", + sourceNodeId, + targetNodeId + ].filter(Boolean).join(" | "); +} +function buildKnowledgeGraphPrompt(args) { + return (args.template ?? GRAPH_PROMPT_TEMPLATE).replace(/__SUMMARY_TEXT__/g, args.summaryText).replace(/__SESSION_ID__/g, args.sessionId).replace(/__SOURCE_PATH__/g, args.sourcePath).replace(/__PROJECT__/g, args.project); +} +function wrapGraphPhaseError(error, args) { + const wrapped = new Error(`graph ${args.phase} failed for session ${args.sessionId} on table ${args.table}: ${error instanceof Error ? error.message : String(error)}`); + wrapped.cause = error; + wrapped.phase = args.phase; + wrapped.sessionId = args.sessionId; + wrapped.table = args.table; + wrapped.sql = args.sql; + return wrapped; +} +async function replaceSessionGraph(params) { + const ts = params.ts ?? (/* @__PURE__ */ new Date()).toISOString(); + const nodePath = `/graphs/nodes/${params.userName}/${params.sessionId}.jsonl`; + const edgePath = `/graphs/edges/${params.userName}/${params.sessionId}.jsonl`; + const nodeFilename = `${params.sessionId}.jsonl`; + const edgeFilename = `${params.sessionId}.jsonl`; + const nodeMap = /* @__PURE__ */ new Map(); + for (const node of params.graph.nodes) { + const key = buildGraphNodeId(node.name, node.type); + nodeMap.set(key, { + name: node.name, + type: node.type || "other", + summary: node.summary || "", + aliases: node.aliases || [] + }); + } + for (const edge of params.graph.edges) { + const sourceKey = buildGraphNodeId(edge.source); + const targetKey = buildGraphNodeId(edge.target); + if (!nodeMap.has(sourceKey)) + nodeMap.set(sourceKey, { name: edge.source, type: "other", summary: "", aliases: [] }); + if (!nodeMap.has(targetKey)) + nodeMap.set(targetKey, { name: edge.target, type: "other", summary: "", aliases: [] }); + } + const deleteNodesSql = `DELETE FROM "${params.nodesTable}" WHERE source_session_id = '${esc(params.sessionId)}'`; + const deleteEdgesSql = `DELETE FROM "${params.edgesTable}" WHERE source_session_id = '${esc(params.sessionId)}'`; + try { + await params.query(deleteNodesSql); + } catch (error) { + throw wrapGraphPhaseError(error, { + phase: "delete_nodes", + sessionId: params.sessionId, + table: params.nodesTable, + sql: deleteNodesSql + }); + } + try { + await params.query(deleteEdgesSql); + } catch (error) { + throw wrapGraphPhaseError(error, { + phase: "delete_edges", + sessionId: params.sessionId, + table: params.edgesTable, + sql: deleteEdgesSql + }); + } + const nodeRows = [...nodeMap.entries()].map(([nodeId, node]) => { + const summary = node.summary || buildSummaryBlurb(`# Graph Node + +${node.name}`); + const aliases = (node.aliases ?? []).join(", "); + const searchText = buildNodeSearchText(node); + return `('${randomUUID2()}', '${esc(nodePath)}', '${esc(nodeFilename)}', '${esc(nodeId)}', '${esc(node.name)}', '${esc(node.type || "other")}', E'${esc(summary)}', E'${esc(searchText)}', '${esc(aliases)}', '${esc(params.sessionId)}', '${esc(params.sourcePath)}', '${esc(params.userName)}', 'application/json', ${Buffer.byteLength(searchText, "utf-8")}, '${esc(params.project)}', E'${esc(buildSummaryBlurb(summary))}', '${esc(params.agent)}', '${ts}', '${ts}')`; + }); + if (nodeRows.length > 0) { + const insertNodesSql = `INSERT INTO "${params.nodesTable}" (id, path, filename, node_id, canonical_name, node_type, summary, search_text, aliases, source_session_id, source_path, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ${nodeRows.join(", ")}`; + try { + await params.query(insertNodesSql); + } catch (error) { + throw wrapGraphPhaseError(error, { + phase: "insert_nodes", + sessionId: params.sessionId, + table: params.nodesTable, + sql: insertNodesSql + }); + } + } + const edgeRows = params.graph.edges.map((edge) => { + const sourceNodeId = buildGraphNodeId(edge.source); + const targetNodeId = buildGraphNodeId(edge.target); + const searchText = buildEdgeSearchText(edge, sourceNodeId, targetNodeId); + const summary = edge.summary || `${edge.source} ${edge.relation} ${edge.target}`; + const evidence = edge.evidence || ""; + const edgeId = `${sourceNodeId}:${edge.relation}:${targetNodeId}`; + return `('${randomUUID2()}', '${esc(edgePath)}', '${esc(edgeFilename)}', '${esc(edgeId)}', '${esc(sourceNodeId)}', '${esc(targetNodeId)}', '${esc(edge.relation)}', E'${esc(summary)}', E'${esc(evidence)}', E'${esc(searchText)}', '${esc(params.sessionId)}', '${esc(params.sourcePath)}', '${esc(params.userName)}', 'application/json', ${Buffer.byteLength(searchText, "utf-8")}, '${esc(params.project)}', E'${esc(buildSummaryBlurb(summary))}', '${esc(params.agent)}', '${ts}', '${ts}')`; + }); + if (edgeRows.length > 0) { + const insertEdgesSql = `INSERT INTO "${params.edgesTable}" (id, path, filename, edge_id, source_node_id, target_node_id, relation, summary, evidence, search_text, source_session_id, source_path, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ${edgeRows.join(", ")}`; + try { + await params.query(insertEdgesSql); + } catch (error) { + throw wrapGraphPhaseError(error, { + phase: "insert_edges", + sessionId: params.sessionId, + table: params.edgesTable, + sql: insertEdgesSql + }); + } + } + return { nodes: nodeRows.length, edges: edgeRows.length }; +} + +// dist/src/hooks/memory-facts.js +import { randomUUID as randomUUID3 } from "node:crypto"; +var MEMORY_FACT_PROMPT_TEMPLATE = `You are extracting durable long-term memory facts from a session summary. + +SESSION ID: __SESSION_ID__ +SOURCE PATH: __SOURCE_PATH__ +PROJECT: __PROJECT__ + +SUMMARY MARKDOWN: +__SUMMARY_TEXT__ + +Return ONLY valid JSON with this exact shape: +{"facts":[{"subject":"canonical entity","subject_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","subject_aliases":["optional alias"],"predicate":"snake_case_relation","object":"canonical object text","object_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","object_aliases":["optional alias"],"summary":"short factual claim","evidence":"short supporting phrase","confidence":0.0,"valid_at":"optional date/time text","valid_from":"optional date/time text","valid_to":"optional date/time text"}]} + +Rules: +- Extract atomic facts that are useful for later recall. One durable claim per fact. +- Prefer canonical names for repeated people, organizations, places, projects, tools, and artifacts. +- Use relation-style predicates such as works_on, home_country, relationship_status, prefers, plans, decided_to_pursue, located_in, uses_tool, recommended, supports, owns, read, attends, moved_from, moved_to. +- Facts should preserve temporal history instead of overwriting it. If the summary says something changed, emit the new fact and include timing in valid_at / valid_from / valid_to when the summary supports it. +- Include assistant-confirmed or tool-confirmed actions when they are stated as completed facts in the summary. +- Do not invent facts that are not supported by the summary. +- Avoid duplicates or near-duplicates. If two facts say the same thing, keep the more specific one. +- Return no markdown, no prose, no code fences, only JSON.`; +function stripCodeFences2(text) { + const trimmed = text.trim(); + const fenceMatch = trimmed.match(/^```(?:json)?\s*([\s\S]*?)\s*```$/i); + return fenceMatch ? fenceMatch[1].trim() : trimmed; +} +function normalizeString2(value) { + return typeof value === "string" ? value.trim() : ""; +} +function normalizeAliases(value) { + if (!Array.isArray(value)) + return []; + return value.map(normalizeString2).filter(Boolean).filter((item, index, arr) => arr.indexOf(item) === index); +} +function normalizeFactType(value) { + return normalizeString2(value) || "other"; +} +function normalizeConfidence(value) { + if (typeof value === "number" && Number.isFinite(value)) { + return Math.max(0, Math.min(1, value)); + } + if (typeof value === "string" && value.trim() !== "") { + const parsed = Number(value); + if (Number.isFinite(parsed)) + return Math.max(0, Math.min(1, parsed)); + } + return void 0; +} +function slugify2(value) { + return value.normalize("NFKD").replace(/[^\w\s-]/g, "").trim().toLowerCase().replace(/[\s-]+/g, "_").replace(/^_+|_+$/g, "") || "item"; +} +function buildFactId(sessionId, fact, index) { + return [ + "fact", + slugify2(sessionId), + String(index + 1), + slugify2(fact.subject), + slugify2(fact.predicate), + slugify2(fact.object) + ].join(":"); +} +function buildFactSearchText(fact) { + return [ + fact.subject, + ...fact.subjectAliases ?? [], + fact.predicate, + fact.object, + ...fact.objectAliases ?? [], + fact.summary ?? "", + fact.evidence ?? "", + fact.validAt ?? "", + fact.validFrom ?? "", + fact.validTo ?? "" + ].filter(Boolean).join(" | "); +} +function buildEntitySearchText(entity) { + return [ + entity.canonicalName, + entity.entityType, + ...entity.aliases, + ...entity.searchTerms, + ...entity.summaries + ].filter(Boolean).join(" | "); +} +function mergeDelimited(existing, nextValues) { + const merged = new Set(existing.split(",").map((value) => value.trim()).filter(Boolean)); + for (const value of nextValues) { + const trimmed = value.trim(); + if (!trimmed) + continue; + merged.add(trimmed); + } + return [...merged].join(", "); +} +function mergePipeDelimited(existing, nextValues, maxItems = 8) { + const merged = new Set(existing.split("|").map((value) => value.trim()).filter(Boolean)); + for (const value of nextValues) { + const trimmed = value.trim(); + if (!trimmed) + continue; + if (merged.has(trimmed)) + continue; + if (merged.size >= maxItems) + break; + merged.add(trimmed); + } + return [...merged].join(" | "); +} +function wrapFactsPhaseError(error, args) { + const wrapped = new Error(`facts ${args.phase} failed for session ${args.sessionId} on table ${args.table}: ${error instanceof Error ? error.message : String(error)}`); + wrapped.cause = error; + wrapped.phase = args.phase; + wrapped.sessionId = args.sessionId; + wrapped.table = args.table; + wrapped.sql = args.sql; + return wrapped; +} +function buildEntityAggregate(entityMap, args) { + const entityId = buildGraphNodeId(args.name, args.type); + const existing = entityMap.get(entityId); + if (existing) { + for (const alias of args.aliases) + existing.aliases.add(alias); + if (args.summary) + existing.summaries.add(args.summary); + if (args.searchText) + existing.searchTerms.add(args.searchText); + return existing; + } + const created = { + entityId, + canonicalName: args.name, + entityType: args.type || "other", + aliases: new Set(args.aliases), + summaries: new Set(args.summary ? [args.summary] : []), + searchTerms: new Set(args.searchText ? [args.searchText] : []) + }; + entityMap.set(entityId, created); + return created; +} +async function upsertEntities(params) { + let upserts = 0; + const path = `/facts/entities/${params.userName}.jsonl`; + const filename = `${params.userName}.jsonl`; + for (const entity of params.entityMap.values()) { + const aliases = [...entity.aliases].filter((alias) => alias !== entity.canonicalName); + const entitySummary = [...entity.summaries].join(" | ") || entity.canonicalName; + const searchText = buildEntitySearchText(entity); + const existingRows = await params.query(`SELECT id, aliases, summary, search_text, source_session_ids, source_paths, entity_type FROM "${params.entitiesTable}" WHERE entity_id = '${esc(entity.entityId)}' LIMIT 1`); + if (existingRows.length === 0) { + const insertSql = `INSERT INTO "${params.entitiesTable}" (id, path, filename, entity_id, canonical_name, entity_type, aliases, summary, search_text, source_session_ids, source_paths, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ('${randomUUID3()}', '${esc(path)}', '${esc(filename)}', '${esc(entity.entityId)}', '${esc(entity.canonicalName)}', '${esc(entity.entityType)}', '${esc(aliases.join(", "))}', E'${esc(entitySummary)}', E'${esc(searchText)}', '${esc(params.sessionId)}', '${esc(params.sourcePath)}', '${esc(params.userName)}', 'application/json', ${Buffer.byteLength(searchText, "utf-8")}, '${esc(params.project)}', E'${esc(buildSummaryBlurb(entitySummary))}', '${esc(params.agent)}', '${params.ts}', '${params.ts}')`; + await params.query(insertSql); + upserts += 1; + continue; + } + const existing = existingRows[0]; + const mergedAliases = mergeDelimited(String(existing["aliases"] ?? ""), aliases); + const mergedSummary = mergePipeDelimited(String(existing["summary"] ?? ""), entity.summaries, 10) || entitySummary; + const mergedSearchText = mergePipeDelimited(String(existing["search_text"] ?? ""), [searchText], 12) || searchText; + const mergedSessionIds = mergeDelimited(String(existing["source_session_ids"] ?? ""), [params.sessionId]); + const mergedSourcePaths = mergeDelimited(String(existing["source_paths"] ?? ""), [params.sourcePath]); + const existingType = normalizeString2(existing["entity_type"]); + const entityType = existingType && existingType !== "other" ? existingType : entity.entityType; + const updateSql = `UPDATE "${params.entitiesTable}" SET canonical_name = '${esc(entity.canonicalName)}', entity_type = '${esc(entityType)}', aliases = '${esc(mergedAliases)}', summary = E'${esc(mergedSummary)}', search_text = E'${esc(mergedSearchText)}', source_session_ids = '${esc(mergedSessionIds)}', source_paths = '${esc(mergedSourcePaths)}', size_bytes = ${Buffer.byteLength(mergedSearchText, "utf-8")}, project = '${esc(params.project)}', description = E'${esc(buildSummaryBlurb(mergedSummary))}', agent = '${esc(params.agent)}', last_update_date = '${params.ts}' WHERE entity_id = '${esc(entity.entityId)}'`; + await params.query(updateSql); + upserts += 1; + } + return upserts; +} +function parseMemoryFactExtraction(raw) { + const cleaned = stripCodeFences2(raw); + const parsed = JSON.parse(cleaned); + const facts = Array.isArray(parsed["facts"]) ? parsed["facts"] : []; + const dedupe = /* @__PURE__ */ new Set(); + return { + facts: facts.map((fact) => ({ + subject: normalizeString2(fact["subject"]), + subjectType: normalizeFactType(fact["subject_type"]), + subjectAliases: normalizeAliases(fact["subject_aliases"]), + predicate: normalizeString2(fact["predicate"]).replace(/\s+/g, "_").toLowerCase(), + object: normalizeString2(fact["object"]), + objectType: normalizeFactType(fact["object_type"]), + objectAliases: normalizeAliases(fact["object_aliases"]), + summary: normalizeString2(fact["summary"]), + evidence: normalizeString2(fact["evidence"]), + confidence: normalizeConfidence(fact["confidence"]), + validAt: normalizeString2(fact["valid_at"]), + validFrom: normalizeString2(fact["valid_from"]), + validTo: normalizeString2(fact["valid_to"]) + })).filter((fact) => fact.subject && fact.predicate && fact.object).filter((fact) => { + const key = `${fact.subject}::${fact.predicate}::${fact.object}`; + if (dedupe.has(key)) + return false; + dedupe.add(key); + return true; + }) + }; +} +function buildMemoryFactPrompt(args) { + return (args.template ?? MEMORY_FACT_PROMPT_TEMPLATE).replace(/__SUMMARY_TEXT__/g, args.summaryText).replace(/__SESSION_ID__/g, args.sessionId).replace(/__SOURCE_PATH__/g, args.sourcePath).replace(/__PROJECT__/g, args.project); +} +async function replaceSessionFacts(params) { + const ts = params.ts ?? (/* @__PURE__ */ new Date()).toISOString(); + const factPath = `/facts/${params.userName}/${params.sessionId}.jsonl`; + const linkPath = `/facts/links/${params.userName}/${params.sessionId}.jsonl`; + const factFilename = `${params.sessionId}.jsonl`; + const linkFilename = `${params.sessionId}.jsonl`; + const deleteFactsSql = `DELETE FROM "${params.factsTable}" WHERE source_session_id = '${esc(params.sessionId)}'`; + const deleteLinksSql = `DELETE FROM "${params.linksTable}" WHERE source_session_id = '${esc(params.sessionId)}'`; + try { + await params.query(deleteFactsSql); + } catch (error) { + throw wrapFactsPhaseError(error, { + phase: "delete_facts", + sessionId: params.sessionId, + table: params.factsTable, + sql: deleteFactsSql + }); + } + try { + await params.query(deleteLinksSql); + } catch (error) { + throw wrapFactsPhaseError(error, { + phase: "delete_links", + sessionId: params.sessionId, + table: params.linksTable, + sql: deleteLinksSql + }); + } + const entityMap = /* @__PURE__ */ new Map(); + const factRows = params.extraction.facts.map((fact, index) => { + const summary = fact.summary || `${fact.subject} ${fact.predicate.replace(/_/g, " ")} ${fact.object}`; + const searchText = buildFactSearchText(fact); + const subjectEntity = buildEntityAggregate(entityMap, { + name: fact.subject, + type: fact.subjectType || "other", + aliases: fact.subjectAliases ?? [], + summary, + searchText + }); + const objectEntity = buildEntityAggregate(entityMap, { + name: fact.object, + type: fact.objectType || "other", + aliases: fact.objectAliases ?? [], + summary, + searchText + }); + return { + factId: buildFactId(params.sessionId, fact, index), + subjectEntityId: subjectEntity.entityId, + subjectName: fact.subject, + subjectType: fact.subjectType || "other", + objectEntityId: objectEntity.entityId, + objectName: fact.object, + objectType: fact.objectType || "other", + predicate: fact.predicate, + summary, + evidence: fact.evidence || "", + searchText, + confidence: fact.confidence == null ? "" : String(fact.confidence), + validAt: fact.validAt || "", + validFrom: fact.validFrom || "", + validTo: fact.validTo || "" + }; + }); + try { + await upsertEntities({ + query: params.query, + entitiesTable: params.entitiesTable, + entityMap, + userName: params.userName, + project: params.project, + agent: params.agent, + sourcePath: params.sourcePath, + sessionId: params.sessionId, + ts + }); + } catch (error) { + throw wrapFactsPhaseError(error, { + phase: "upsert_entities", + sessionId: params.sessionId, + table: params.entitiesTable, + sql: `UPSERT entities for ${params.sessionId}` + }); + } + if (factRows.length > 0) { + const values = factRows.map((row) => `('${randomUUID3()}', '${esc(factPath)}', '${esc(factFilename)}', '${esc(row.factId)}', '${esc(row.subjectEntityId)}', '${esc(row.subjectName)}', '${esc(row.subjectType)}', '${esc(row.predicate)}', '${esc(row.objectEntityId)}', '${esc(row.objectName)}', '${esc(row.objectType)}', E'${esc(row.summary)}', E'${esc(row.evidence)}', E'${esc(row.searchText)}', '${esc(row.confidence)}', '${esc(row.validAt)}', '${esc(row.validFrom)}', '${esc(row.validTo)}', '${esc(params.sessionId)}', '${esc(params.sourcePath)}', '${esc(params.userName)}', 'application/json', ${Buffer.byteLength(row.searchText, "utf-8")}, '${esc(params.project)}', E'${esc(buildSummaryBlurb(row.summary))}', '${esc(params.agent)}', '${ts}', '${ts}')`); + const insertFactsSql = `INSERT INTO "${params.factsTable}" (id, path, filename, fact_id, subject_entity_id, subject_name, subject_type, predicate, object_entity_id, object_name, object_type, summary, evidence, search_text, confidence, valid_at, valid_from, valid_to, source_session_id, source_path, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ${values.join(", ")}`; + try { + await params.query(insertFactsSql); + } catch (error) { + throw wrapFactsPhaseError(error, { + phase: "insert_facts", + sessionId: params.sessionId, + table: params.factsTable, + sql: insertFactsSql + }); + } + } + const linkRows = factRows.flatMap((row) => [ + { + linkId: `${row.factId}:subject:${row.subjectEntityId}`, + factId: row.factId, + entityId: row.subjectEntityId, + entityRole: "subject" + }, + { + linkId: `${row.factId}:object:${row.objectEntityId}`, + factId: row.factId, + entityId: row.objectEntityId, + entityRole: "object" + } + ]); + if (linkRows.length > 0) { + const values = linkRows.map((row) => `('${randomUUID3()}', '${esc(linkPath)}', '${esc(linkFilename)}', '${esc(row.linkId)}', '${esc(row.factId)}', '${esc(row.entityId)}', '${esc(row.entityRole)}', '${esc(params.sessionId)}', '${esc(params.sourcePath)}', '${esc(params.userName)}', 'application/json', ${Buffer.byteLength(row.linkId, "utf-8")}, '${esc(params.project)}', 'fact entity link', '${esc(params.agent)}', '${ts}', '${ts}')`); + const insertLinksSql = `INSERT INTO "${params.linksTable}" (id, path, filename, link_id, fact_id, entity_id, entity_role, source_session_id, source_path, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ${values.join(", ")}`; + try { + await params.query(insertLinksSql); + } catch (error) { + throw wrapFactsPhaseError(error, { + phase: "insert_links", + sessionId: params.sessionId, + table: params.linksTable, + sql: insertLinksSql + }); + } + } + return { + facts: factRows.length, + entities: entityMap.size, + links: linkRows.length + }; +} + // dist/src/hooks/wiki-worker.js var cfg = JSON.parse(readFileSync2(process.argv[2], "utf-8")); var tmpDir = cfg.tmpDir; @@ -279,6 +800,81 @@ async function main() { text }); wlog(`uploaded ${vpath} (summary=${result.summaryLength}, desc=${result.descLength})`); + try { + const graphPrompt = buildKnowledgeGraphPrompt({ + summaryText: text, + sessionId: cfg.sessionId, + sourcePath: jsonlServerPath, + project: cfg.project, + template: cfg.graphPromptTemplate + }); + const graphRaw = execFileSync(cfg.claudeBin, [ + "-p", + graphPrompt, + "--no-session-persistence", + "--model", + "haiku", + "--permission-mode", + "bypassPermissions" + ], { + stdio: ["ignore", "pipe", "pipe"], + timeout: 12e4, + env: { ...process.env, HIVEMIND_WIKI_WORKER: "1", HIVEMIND_CAPTURE: "false" } + }).toString("utf-8"); + const graph = parseGraphExtraction(graphRaw); + const graphResult = await replaceSessionGraph({ + query, + nodesTable: cfg.graphNodesTable, + edgesTable: cfg.graphEdgesTable, + sessionId: cfg.sessionId, + userName: cfg.userName, + project: cfg.project, + agent: "claude_code", + sourcePath: jsonlServerPath, + graph + }); + wlog(`graph updated nodes=${graphResult.nodes} edges=${graphResult.edges}`); + } catch (e) { + wlog(`graph update failed: ${e.message}`); + } + try { + const factPrompt = buildMemoryFactPrompt({ + summaryText: text, + sessionId: cfg.sessionId, + sourcePath: jsonlServerPath, + project: cfg.project, + template: cfg.factPromptTemplate + }); + const factsRaw = execFileSync(cfg.claudeBin, [ + "-p", + factPrompt, + "--no-session-persistence", + "--model", + "haiku", + "--permission-mode", + "bypassPermissions" + ], { + stdio: ["ignore", "pipe", "pipe"], + timeout: 12e4, + env: { ...process.env, HIVEMIND_WIKI_WORKER: "1", HIVEMIND_CAPTURE: "false" } + }).toString("utf-8"); + const extraction = parseMemoryFactExtraction(factsRaw); + const factResult = await replaceSessionFacts({ + query, + factsTable: cfg.factsTable, + entitiesTable: cfg.entitiesTable, + linksTable: cfg.factEntityLinksTable, + sessionId: cfg.sessionId, + userName: cfg.userName, + project: cfg.project, + agent: "claude_code", + sourcePath: jsonlServerPath, + extraction + }); + wlog(`facts updated facts=${factResult.facts} entities=${factResult.entities} links=${factResult.links}`); + } catch (e) { + wlog(`fact update failed: ${e.message}`); + } try { finalizeSummary(cfg.sessionId, jsonlLines); wlog(`sidecar updated: lastSummaryCount=${jsonlLines}`); diff --git a/claude-code/tests/bash-command-compiler.test.ts b/claude-code/tests/bash-command-compiler.test.ts index e1cfd4f..f879330 100644 --- a/claude-code/tests/bash-command-compiler.test.ts +++ b/claude-code/tests/bash-command-compiler.test.ts @@ -317,6 +317,22 @@ describe("bash-command-compiler parsing", () => { fieldSeparator: "|", }); + expect(parseCompiledSegment("psql -At -F '|' -c \"SELECT node_id, canonical_name, relation FROM graph_nodes JOIN graph_edges ON graph_edges.source_node_id = graph_nodes.node_id LIMIT 2\"")).toEqual({ + kind: "psql", + query: "SELECT node_id, canonical_name, relation FROM graph_nodes JOIN graph_edges ON graph_edges.source_node_id = graph_nodes.node_id LIMIT 2", + lineLimit: 0, + tuplesOnly: true, + fieldSeparator: "|", + }); + + expect(parseCompiledSegment("psql -At -F '|' -c \"SELECT fact_id, subject_name, predicate, object_name FROM memory_facts LIMIT 2\"")).toEqual({ + kind: "psql", + query: "SELECT fact_id, subject_name, predicate, object_name FROM memory_facts LIMIT 2", + lineLimit: 0, + tuplesOnly: true, + fieldSeparator: "|", + }); + restorePsqlMode(); }); @@ -512,6 +528,9 @@ describe("bash-command-compiler execution", () => { it("executes psql queries against normalized memory and sessions table names", async () => { const query = vi.fn(async (sql: string) => { + if (sql.includes('FROM "graph_nodes"') || sql.includes('FROM "graph_edges"')) { + return []; + } expect(sql).toContain('FROM "memory_actual"'); expect(sql).toContain('JOIN "sessions_actual"'); return [ @@ -527,11 +546,15 @@ describe("bash-command-compiler execution", () => { "psql -At -F '|' -c \"SELECT m.path, m.summary FROM memory m JOIN sessions s ON s.path = m.path WHERE m.summary ILIKE '%Caroline%' LIMIT 1\"", ); expect(output).toBe("/summaries/locomo/conv_0_session_6_summary.md|Caroline keeps classic kids books"); + expect(query.mock.calls.some(([sql]) => String(sql).includes('FROM "memory_actual"'))).toBe(true); restorePsqlMode(); }); it("executes direct sessions queries against physical per-message rows", async () => { const query = vi.fn(async (sql: string) => { + if (sql.includes('FROM "graph_nodes"') || sql.includes('FROM "graph_edges"')) { + return []; + } expect(sql).toContain('FROM "sessions_actual"'); expect(sql).toContain("WHERE path = '/sessions/conv_0_session_8.json'"); return [ @@ -553,7 +576,7 @@ describe("bash-command-compiler execution", () => { "psql -At -F '|' -c \"SELECT path, creation_date, turn_index, speaker, text FROM sessions WHERE path = '/sessions/conv_0_session_8.json' AND text ILIKE '%camp%' ORDER BY turn_index ASC LIMIT 1\"", ); expect(output).toBe("/sessions/conv_0_session_8.json|2023-08-10|1|Melanie|We planned a camping trip"); - expect(query).toHaveBeenCalledTimes(1); + expect(query.mock.calls.some(([sql]) => String(sql).includes('FROM "sessions_actual"'))).toBe(true); restorePsqlMode(); }); diff --git a/claude-code/tests/deeplake-api.test.ts b/claude-code/tests/deeplake-api.test.ts index 046e347..b1ca795 100644 --- a/claude-code/tests/deeplake-api.test.ts +++ b/claude-code/tests/deeplake-api.test.ts @@ -564,3 +564,107 @@ describe("DeeplakeApi.ensureSessionsTable", () => { expect(querySqls.some((sql) => sql.includes("CREATE INDEX IF NOT EXISTS"))).toBe(false); }); }); + +describe("DeeplakeApi graph tables", () => { + it("creates graph_nodes with searchable graph columns", async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, status: 200, + json: async () => ({ tables: [] }), + }); + mockFetch.mockResolvedValue(jsonResponse({})); + const api = makeApi(); + await api.ensureGraphNodesTable("graph_nodes"); + const querySqls = mockFetch.mock.calls + .map(([, init]) => init?.body ? JSON.parse(init.body).query : null) + .filter((sql): sql is string => typeof sql === "string"); + const createSql = querySqls.find((sql) => sql.includes("CREATE TABLE IF NOT EXISTS")) ?? ""; + expect(createSql).toContain("graph_nodes"); + expect(createSql).toContain("node_id TEXT"); + expect(createSql).toContain("canonical_name TEXT"); + expect(createSql).toContain("search_text TEXT"); + expect(querySqls.some((sql) => sql.includes(`"source_session_id"`))).toBe(true); + expect(querySqls.some((sql) => sql.includes(`"node_id"`))).toBe(true); + }); + + it("creates graph_edges with relation columns", async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, status: 200, + json: async () => ({ tables: [] }), + }); + mockFetch.mockResolvedValue(jsonResponse({})); + const api = makeApi(); + await api.ensureGraphEdgesTable("graph_edges"); + const querySqls = mockFetch.mock.calls + .map(([, init]) => init?.body ? JSON.parse(init.body).query : null) + .filter((sql): sql is string => typeof sql === "string"); + const createSql = querySqls.find((sql) => sql.includes("CREATE TABLE IF NOT EXISTS")) ?? ""; + expect(createSql).toContain("graph_edges"); + expect(createSql).toContain("source_node_id TEXT"); + expect(createSql).toContain("target_node_id TEXT"); + expect(createSql).toContain("relation TEXT"); + expect(querySqls.some((sql) => sql.includes(`"source_session_id"`))).toBe(true); + expect(querySqls.some((sql) => sql.includes(`"source_node_id", "target_node_id", "relation"`))).toBe(true); + }); +}); + +describe("DeeplakeApi fact tables", () => { + it("creates memory_facts with fact and temporal columns", async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, status: 200, + json: async () => ({ tables: [] }), + }); + mockFetch.mockResolvedValue(jsonResponse({})); + const api = makeApi(); + await api.ensureFactsTable("memory_facts"); + const querySqls = mockFetch.mock.calls + .map(([, init]) => init?.body ? JSON.parse(init.body).query : null) + .filter((sql): sql is string => typeof sql === "string"); + const createSql = querySqls.find((sql) => sql.includes("CREATE TABLE IF NOT EXISTS")) ?? ""; + expect(createSql).toContain("memory_facts"); + expect(createSql).toContain("fact_id TEXT"); + expect(createSql).toContain("subject_entity_id TEXT"); + expect(createSql).toContain("predicate TEXT"); + expect(createSql).toContain("valid_from TEXT"); + expect(querySqls.some((sql) => sql.includes(`"fact_id"`))).toBe(true); + expect(querySqls.some((sql) => sql.includes(`"source_session_id", "predicate"`))).toBe(true); + }); + + it("creates memory_entities with canonical entity columns", async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, status: 200, + json: async () => ({ tables: [] }), + }); + mockFetch.mockResolvedValue(jsonResponse({})); + const api = makeApi(); + await api.ensureEntitiesTable("memory_entities"); + const querySqls = mockFetch.mock.calls + .map(([, init]) => init?.body ? JSON.parse(init.body).query : null) + .filter((sql): sql is string => typeof sql === "string"); + const createSql = querySqls.find((sql) => sql.includes("CREATE TABLE IF NOT EXISTS")) ?? ""; + expect(createSql).toContain("memory_entities"); + expect(createSql).toContain("entity_id TEXT"); + expect(createSql).toContain("canonical_name TEXT"); + expect(createSql).toContain("aliases TEXT"); + expect(querySqls.some((sql) => sql.includes(`"entity_id"`))).toBe(true); + expect(querySqls.some((sql) => sql.includes(`"canonical_name"`))).toBe(true); + }); + + it("creates fact_entity_links with linking columns", async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, status: 200, + json: async () => ({ tables: [] }), + }); + mockFetch.mockResolvedValue(jsonResponse({})); + const api = makeApi(); + await api.ensureFactEntityLinksTable("fact_entity_links"); + const querySqls = mockFetch.mock.calls + .map(([, init]) => init?.body ? JSON.parse(init.body).query : null) + .filter((sql): sql is string => typeof sql === "string"); + const createSql = querySqls.find((sql) => sql.includes("CREATE TABLE IF NOT EXISTS")) ?? ""; + expect(createSql).toContain("fact_entity_links"); + expect(createSql).toContain("link_id TEXT"); + expect(createSql).toContain("fact_id TEXT"); + expect(createSql).toContain("entity_id TEXT"); + expect(querySqls.some((sql) => sql.includes(`"source_session_id", "entity_id", "entity_role"`))).toBe(true); + }); +}); diff --git a/claude-code/tests/embedding-text.test.ts b/claude-code/tests/embedding-text.test.ts new file mode 100644 index 0000000..72332a4 --- /dev/null +++ b/claude-code/tests/embedding-text.test.ts @@ -0,0 +1,61 @@ +import { describe, expect, it } from "vitest"; +import { + buildMemoryEmbeddingText, + buildSessionEmbeddingText, + stableEmbeddingSourceHash, +} from "../../src/embeddings/text.js"; + +describe("embedding text builders", () => { + it("builds a compact memory embedding payload", () => { + const text = buildMemoryEmbeddingText({ + path: "/summaries/alice/session.md", + filename: "session.md", + project: "hivemind", + description: "session summary", + summary: "Discussed local embeddings and retrieval quality.", + }); + + expect(text).toContain("Path: /summaries/alice/session.md"); + expect(text).toContain("Project: hivemind"); + expect(text).toContain("Summary: Discussed local embeddings and retrieval quality."); + }); + + it("builds a session embedding payload from structured turn columns", () => { + const text = buildSessionEmbeddingText({ + path: "/sessions/alice/demo.jsonl", + event_type: "dialogue_turn", + speaker: "user", + text: "Can we run Harrier locally in TypeScript?", + turn_summary: "Asked about local Harrier embeddings.", + source_date_time: "2026-04-20T10:00:00Z", + turn_index: 4, + }); + + expect(text).toContain("Event: dialogue_turn"); + expect(text).toContain("Speaker: user"); + expect(text).toContain("Text: Can we run Harrier locally in TypeScript?"); + expect(text).toContain("Turn summary: Asked about local Harrier embeddings."); + }); + + it("falls back to transcript extraction for session blobs", () => { + const text = buildSessionEmbeddingText({ + path: "/sessions/alice/transcript.json", + message: { + date_time: "2026-04-20", + turns: [ + { speaker: "user", text: "first turn" }, + { speaker: "assistant", text: "second turn" }, + ], + }, + }); + + expect(text).toContain("[user] first turn"); + expect(text).toContain("[assistant] second turn"); + expect(text).toContain("Date: 2026-04-20"); + }); + + it("hashes identical embedding sources deterministically", () => { + expect(stableEmbeddingSourceHash("same text")).toBe(stableEmbeddingSourceHash("same text")); + expect(stableEmbeddingSourceHash("same text")).not.toBe(stableEmbeddingSourceHash("different text")); + }); +}); diff --git a/claude-code/tests/hooks-source.test.ts b/claude-code/tests/hooks-source.test.ts index 6345224..b6ab1f9 100644 --- a/claude-code/tests/hooks-source.test.ts +++ b/claude-code/tests/hooks-source.test.ts @@ -34,6 +34,11 @@ const baseConfig: Config = { apiUrl: "https://api.example.com", tableName: "memory", sessionsTableName: "sessions", + graphNodesTableName: "graph_nodes", + graphEdgesTableName: "graph_edges", + factsTableName: "memory_facts", + entitiesTableName: "memory_entities", + factEntityLinksTableName: "fact_entity_links", memoryPath: "/tmp/.deeplake/memory", }; @@ -756,7 +761,12 @@ describe("claude session start source", () => { expect(context).toContain("DEEPLAKE MEMORY SQL MODE"); expect(context).toContain("memory(path, summary"); expect(context).toContain("sessions(path, creation_date, turn_index, event_type, dia_id, speaker, text, turn_summary, source_date_time, message)"); + expect(context).toContain("memory_facts(path, fact_id, subject_entity_id"); + expect(context).toContain("memory_entities(path, entity_id, canonical_name"); + expect(context).toContain("fact_entity_links(path, link_id, fact_id"); expect(context).toContain("psql -At -F '|'"); + expect(context).toContain("For stable person/project/place facts, use memory_facts first."); + expect(context).toContain("Graph-backed entity and relation resolution is applied automatically"); expect(context).toContain("Use sessions.text, sessions.speaker, sessions.turn_index, and sessions.source_date_time"); expect(context).toContain("Use sessions.message only when you need the raw JSON payload"); expect(context).toContain("Do not use filesystem commands"); @@ -813,6 +823,11 @@ describe("claude session start setup source", () => { const createApi = vi.fn(() => ({ ensureTable: vi.fn(async () => undefined), ensureSessionsTable: vi.fn(async () => undefined), + ensureGraphNodesTable: vi.fn(async () => undefined), + ensureGraphEdgesTable: vi.fn(async () => undefined), + ensureFactsTable: vi.fn(async () => undefined), + ensureEntitiesTable: vi.fn(async () => undefined), + ensureFactEntityLinksTable: vi.fn(async () => undefined), query: vi.fn(async () => []), }) as any); const placeholder = vi.fn(async () => undefined); @@ -838,6 +853,11 @@ describe("claude session start setup source", () => { createApi: vi.fn(() => ({ ensureTable: vi.fn(async () => undefined), ensureSessionsTable: vi.fn(async () => { throw new Error("403 Forbidden"); }), + ensureGraphNodesTable: vi.fn(async () => undefined), + ensureGraphEdgesTable: vi.fn(async () => undefined), + ensureFactsTable: vi.fn(async () => undefined), + ensureEntitiesTable: vi.fn(async () => undefined), + ensureFactEntityLinksTable: vi.fn(async () => undefined), query: vi.fn(async () => []), }) as any), isSessionWriteDisabledFn: vi.fn(() => false) as any, @@ -863,6 +883,11 @@ describe("claude session start setup source", () => { createApi: vi.fn(() => ({ ensureTable: vi.fn(async () => undefined), ensureSessionsTable: vi.fn(async () => undefined), + ensureGraphNodesTable: vi.fn(async () => undefined), + ensureGraphEdgesTable: vi.fn(async () => undefined), + ensureFactsTable: vi.fn(async () => undefined), + ensureEntitiesTable: vi.fn(async () => undefined), + ensureFactEntityLinksTable: vi.fn(async () => undefined), query: vi.fn(async () => []), }) as any), drainSessionQueuesFn: vi.fn(async () => ({ @@ -917,6 +942,11 @@ describe("claude session start setup source", () => { createApi: vi.fn(() => ({ ensureTable: vi.fn(async () => undefined), ensureSessionsTable, + ensureGraphNodesTable: vi.fn(async () => undefined), + ensureGraphEdgesTable: vi.fn(async () => undefined), + ensureFactsTable: vi.fn(async () => undefined), + ensureEntitiesTable: vi.fn(async () => undefined), + ensureFactEntityLinksTable: vi.fn(async () => undefined), query: vi.fn(async () => []), }) as any), isSessionWriteDisabledFn: vi.fn(() => false) as any, @@ -969,6 +999,11 @@ describe("claude session start setup source", () => { createApi: vi.fn(() => ({ ensureTable: vi.fn(async () => undefined), ensureSessionsTable: vi.fn(async () => { throw new Error("boom"); }), + ensureGraphNodesTable: vi.fn(async () => undefined), + ensureGraphEdgesTable: vi.fn(async () => undefined), + ensureFactsTable: vi.fn(async () => undefined), + ensureEntitiesTable: vi.fn(async () => undefined), + ensureFactEntityLinksTable: vi.fn(async () => undefined), }) as any), isSessionWriteDisabledFn: vi.fn(() => false) as any, isSessionWriteAuthErrorFn: vi.fn(() => false) as any, @@ -993,6 +1028,11 @@ describe("claude session start setup source", () => { createApi: vi.fn(() => ({ ensureTable: vi.fn(async () => undefined), ensureSessionsTable: vi.fn(async () => undefined), + ensureGraphNodesTable: vi.fn(async () => undefined), + ensureGraphEdgesTable: vi.fn(async () => undefined), + ensureFactsTable: vi.fn(async () => undefined), + ensureEntitiesTable: vi.fn(async () => undefined), + ensureFactEntityLinksTable: vi.fn(async () => undefined), }) as any), drainSessionQueuesFn: vi.fn(async () => ({ queuedSessions: 0, diff --git a/claude-code/tests/knowledge-graph.test.ts b/claude-code/tests/knowledge-graph.test.ts new file mode 100644 index 0000000..f341bc0 --- /dev/null +++ b/claude-code/tests/knowledge-graph.test.ts @@ -0,0 +1,67 @@ +import { describe, expect, it } from "vitest"; +import { + buildGraphNodeId, + buildKnowledgeGraphPrompt, + parseGraphExtraction, + replaceSessionGraph, +} from "../../src/hooks/knowledge-graph.js"; + +describe("knowledge-graph", () => { + it("parses fenced JSON graph output", () => { + const graph = parseGraphExtraction(`\`\`\`json +{"nodes":[{"name":"Caroline","type":"person","summary":"Artist","aliases":["Caro"]}],"edges":[{"source":"Caroline","target":"Sweden","relation":"home_country","summary":"Caroline is from Sweden","evidence":"home country"}]} +\`\`\``); + expect(graph.nodes).toHaveLength(1); + expect(graph.edges).toHaveLength(1); + expect(graph.nodes[0].aliases).toEqual(["Caro"]); + expect(graph.edges[0].relation).toBe("home_country"); + }); + + it("uses stable canonical-name node ids", () => { + expect(buildGraphNodeId("Caroline")).toBe("entity:caroline"); + expect(buildGraphNodeId("Dr. Seuss")).toBe("entity:dr_seuss"); + }); + + it("builds a graph prompt with summary and source metadata", () => { + const prompt = buildKnowledgeGraphPrompt({ + summaryText: "# Session\n- **Source**: /sessions/x.json", + sessionId: "sess-1", + sourcePath: "/sessions/x.json", + project: "proj", + }); + expect(prompt).toContain("SESSION ID: sess-1"); + expect(prompt).toContain("SOURCE PATH: /sessions/x.json"); + expect(prompt).toContain("SUMMARY MARKDOWN:"); + }); + + it("replaces per-session node and edge rows using stable ids", async () => { + const calls: string[] = []; + const query = async (sql: string) => { + calls.push(sql); + return []; + }; + const result = await replaceSessionGraph({ + query, + nodesTable: "graph_nodes", + edgesTable: "graph_edges", + sessionId: "sess-1", + userName: "alice", + project: "proj", + agent: "claude_code", + sourcePath: "/sessions/alice/sess-1.jsonl", + graph: { + nodes: [{ name: "Caroline", type: "person", summary: "Artist", aliases: ["Caro"] }], + edges: [{ source: "Caroline", target: "Sweden", relation: "home_country", summary: "Caroline is from Sweden", evidence: "home country" }], + }, + ts: "2026-01-01T00:00:00.000Z", + }); + expect(result).toEqual({ nodes: 2, edges: 1 }); + expect(calls[0]).toContain('DELETE FROM "graph_nodes"'); + expect(calls[1]).toContain('DELETE FROM "graph_edges"'); + expect(calls[2]).toContain('INSERT INTO "graph_nodes"'); + expect(calls[2]).toContain("entity:caroline"); + expect(calls[2]).toContain("entity:sweden"); + expect(calls[3]).toContain('INSERT INTO "graph_edges"'); + expect(calls[3]).toContain("home_country"); + }); +}); diff --git a/claude-code/tests/memory-facts.test.ts b/claude-code/tests/memory-facts.test.ts new file mode 100644 index 0000000..c3889de --- /dev/null +++ b/claude-code/tests/memory-facts.test.ts @@ -0,0 +1,76 @@ +import { describe, expect, it } from "vitest"; +import { + buildMemoryFactPrompt, + parseMemoryFactExtraction, + replaceSessionFacts, +} from "../../src/hooks/memory-facts.js"; + +describe("memory-facts", () => { + it("parses fenced JSON fact output and normalizes predicates", () => { + const extraction = parseMemoryFactExtraction(`\`\`\`json +{"facts":[{"subject":"Caroline","subject_type":"person","predicate":"Home Country","object":"Sweden","object_type":"place","summary":"Caroline's home country is Sweden","evidence":"home country","confidence":0.92}]} +\`\`\``); + expect(extraction.facts).toHaveLength(1); + expect(extraction.facts[0].predicate).toBe("home_country"); + expect(extraction.facts[0].confidence).toBe(0.92); + }); + + it("builds a fact prompt with summary and source metadata", () => { + const prompt = buildMemoryFactPrompt({ + summaryText: "# Session\n- **Source**: /sessions/x.json", + sessionId: "sess-1", + sourcePath: "/sessions/x.json", + project: "proj", + }); + expect(prompt).toContain("SESSION ID: sess-1"); + expect(prompt).toContain("SOURCE PATH: /sessions/x.json"); + expect(prompt).toContain("SUMMARY MARKDOWN:"); + }); + + it("replaces per-session fact rows and upserts canonical entities", async () => { + const calls: string[] = []; + const query = async (sql: string) => { + calls.push(sql); + if (sql.includes('FROM "memory_entities"')) return []; + return []; + }; + const result = await replaceSessionFacts({ + query, + factsTable: "memory_facts", + entitiesTable: "memory_entities", + linksTable: "fact_entity_links", + sessionId: "sess-1", + userName: "alice", + project: "proj", + agent: "claude_code", + sourcePath: "/sessions/alice/sess-1.jsonl", + extraction: { + facts: [ + { + subject: "Caroline", + subjectType: "person", + predicate: "home_country", + object: "Sweden", + objectType: "place", + summary: "Caroline's home country is Sweden", + evidence: "home country", + confidence: 0.92, + }, + ], + }, + ts: "2026-01-01T00:00:00.000Z", + }); + expect(result).toEqual({ facts: 1, entities: 2, links: 2 }); + expect(calls[0]).toContain('DELETE FROM "memory_facts"'); + expect(calls[1]).toContain('DELETE FROM "fact_entity_links"'); + expect(calls.some((sql) => sql.includes('INSERT INTO "memory_entities"'))).toBe(true); + expect(calls.some((sql) => sql.includes('INSERT INTO "memory_facts"'))).toBe(true); + expect(calls.some((sql) => sql.includes('INSERT INTO "fact_entity_links"'))).toBe(true); + expect(calls.join("\n")).toContain("entity:caroline"); + expect(calls.join("\n")).toContain("entity:sweden"); + const linkInsert = calls.find((sql) => sql.includes('INSERT INTO "fact_entity_links"')); + expect(linkInsert).toContain("fact:sess_1:1:caroline:home_country:sweden"); + expect(linkInsert).toContain("'fact:sess_1:1:caroline:home_country:sweden', 'entity:caroline', 'subject'"); + expect(linkInsert).toContain("'fact:sess_1:1:caroline:home_country:sweden', 'entity:sweden', 'object'"); + }); +}); diff --git a/codex/bundle/capture.js b/codex/bundle/capture.js index 30fce82..e8b7bfa 100755 --- a/codex/bundle/capture.js +++ b/codex/bundle/capture.js @@ -49,6 +49,11 @@ function loadConfig() { apiUrl: env.HIVEMIND_API_URL ?? env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", tableName: env.HIVEMIND_TABLE ?? env.DEEPLAKE_TABLE ?? "memory", sessionsTableName: env.HIVEMIND_SESSIONS_TABLE ?? env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + graphNodesTableName: env.HIVEMIND_GRAPH_NODES_TABLE ?? env.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", + graphEdgesTableName: env.HIVEMIND_GRAPH_EDGES_TABLE ?? env.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", + factsTableName: env.HIVEMIND_FACTS_TABLE ?? env.DEEPLAKE_FACTS_TABLE ?? "memory_facts", + entitiesTableName: env.HIVEMIND_ENTITIES_TABLE ?? env.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", + factEntityLinksTableName: env.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", memoryPath: env.HIVEMIND_MEMORY_PATH ?? env.DEEPLAKE_MEMORY_PATH ?? join(home, ".deeplake", "memory") }; } @@ -205,6 +210,60 @@ import { fileURLToPath as fileURLToPath2 } from "node:url"; import { dirname, join as join4 } from "node:path"; import { writeFileSync as writeFileSync2, mkdirSync as mkdirSync2, appendFileSync as appendFileSync2 } from "node:fs"; import { homedir as homedir4, tmpdir } from "node:os"; + +// dist/src/hooks/knowledge-graph.js +import { randomUUID as randomUUID2 } from "node:crypto"; + +// dist/src/hooks/upload-summary.js +import { randomUUID } from "node:crypto"; + +// dist/src/hooks/knowledge-graph.js +var GRAPH_PROMPT_TEMPLATE = `You are extracting a compact knowledge graph delta from a session summary. + +SESSION ID: __SESSION_ID__ +SOURCE PATH: __SOURCE_PATH__ +PROJECT: __PROJECT__ + +SUMMARY MARKDOWN: +__SUMMARY_TEXT__ + +Return ONLY valid JSON with this exact shape: +{"nodes":[{"name":"canonical entity name","type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","summary":"short factual description","aliases":["optional alias"]}],"edges":[{"source":"canonical source entity","target":"canonical target entity","relation":"snake_case_relation","summary":"short factual relation summary","evidence":"short supporting phrase"}]} + +Rules: +- Use canonical names for repeated entities. +- Include people, places, organizations, books/media, tools, files, goals, status labels, preferences, and notable events when they matter for future recall. +- Convert relationship/status/origin/preferences into edges when possible. Example relation shapes: home_country, relationship_status, enjoys, decided_to_pursue, works_on, uses_tool, located_in, recommended, plans, supports. +- Keep summaries short and factual. Do not invent facts beyond the summary. +- If a source or target appears in an edge but not in nodes, also include it in nodes. +- Prefer stable canonical names over pronouns. +- Return no markdown, no prose, no code fences, only JSON.`; + +// dist/src/hooks/memory-facts.js +import { randomUUID as randomUUID3 } from "node:crypto"; +var MEMORY_FACT_PROMPT_TEMPLATE = `You are extracting durable long-term memory facts from a session summary. + +SESSION ID: __SESSION_ID__ +SOURCE PATH: __SOURCE_PATH__ +PROJECT: __PROJECT__ + +SUMMARY MARKDOWN: +__SUMMARY_TEXT__ + +Return ONLY valid JSON with this exact shape: +{"facts":[{"subject":"canonical entity","subject_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","subject_aliases":["optional alias"],"predicate":"snake_case_relation","object":"canonical object text","object_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","object_aliases":["optional alias"],"summary":"short factual claim","evidence":"short supporting phrase","confidence":0.0,"valid_at":"optional date/time text","valid_from":"optional date/time text","valid_to":"optional date/time text"}]} + +Rules: +- Extract atomic facts that are useful for later recall. One durable claim per fact. +- Prefer canonical names for repeated people, organizations, places, projects, tools, and artifacts. +- Use relation-style predicates such as works_on, home_country, relationship_status, prefers, plans, decided_to_pursue, located_in, uses_tool, recommended, supports, owns, read, attends, moved_from, moved_to. +- Facts should preserve temporal history instead of overwriting it. If the summary says something changed, emit the new fact and include timing in valid_at / valid_from / valid_to when the summary supports it. +- Include assistant-confirmed or tool-confirmed actions when they are stated as completed facts in the summary. +- Do not invent facts that are not supported by the summary. +- Avoid duplicates or near-duplicates. If two facts say the same thing, keep the more specific one. +- Return no markdown, no prose, no code fences, only JSON.`; + +// dist/src/hooks/codex/spawn-wiki-worker.js var HOME = homedir4(); var WIKI_LOG = join4(HOME, ".codex", "hooks", "deeplake-wiki.log"); var WIKI_PROMPT_TEMPLATE = `You are maintaining a persistent wiki from a session transcript. This page will become part of a long-lived knowledge base that future agents will search through index.md before opening the source session. Write for retrieval, not storytelling. @@ -296,6 +355,11 @@ function spawnCodexWikiWorker(opts) { workspaceId: config.workspaceId, memoryTable: config.tableName, sessionsTable: config.sessionsTableName, + graphNodesTable: config.graphNodesTableName, + graphEdgesTable: config.graphEdgesTableName, + factsTable: config.factsTableName, + entitiesTable: config.entitiesTableName, + factEntityLinksTable: config.factEntityLinksTableName, sessionId, userName: config.userName, project: projectName, @@ -303,7 +367,9 @@ function spawnCodexWikiWorker(opts) { codexBin: findCodexBin(), wikiLog: WIKI_LOG, hooksDir: join4(HOME, ".codex", "hooks"), - promptTemplate: WIKI_PROMPT_TEMPLATE + promptTemplate: WIKI_PROMPT_TEMPLATE, + graphPromptTemplate: GRAPH_PROMPT_TEMPLATE, + factPromptTemplate: MEMORY_FACT_PROMPT_TEMPLATE })); wikiLog(`${reason}: spawning summary worker for ${sessionId}`); const workerPath = join4(bundleDir, "wiki-worker.js"); diff --git a/codex/bundle/commands/auth-login.js b/codex/bundle/commands/auth-login.js index e36fc73..0cc5d27 100755 --- a/codex/bundle/commands/auth-login.js +++ b/codex/bundle/commands/auth-login.js @@ -233,6 +233,11 @@ function loadConfig() { apiUrl: env.HIVEMIND_API_URL ?? env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", tableName: env.HIVEMIND_TABLE ?? env.DEEPLAKE_TABLE ?? "memory", sessionsTableName: env.HIVEMIND_SESSIONS_TABLE ?? env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + graphNodesTableName: env.HIVEMIND_GRAPH_NODES_TABLE ?? env.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", + graphEdgesTableName: env.HIVEMIND_GRAPH_EDGES_TABLE ?? env.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", + factsTableName: env.HIVEMIND_FACTS_TABLE ?? env.DEEPLAKE_FACTS_TABLE ?? "memory_facts", + entitiesTableName: env.HIVEMIND_ENTITIES_TABLE ?? env.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", + factEntityLinksTableName: env.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", memoryPath: env.HIVEMIND_MEMORY_PATH ?? env.DEEPLAKE_MEMORY_PATH ?? join2(home, ".deeplake", "memory") }; } @@ -277,6 +282,22 @@ function traceSql(msg) { if (DEBUG_FILE_LOG) log2(msg); } +var DeeplakeQueryError = class extends Error { + sqlSummary; + status; + responseBody; + sql; + cause; + constructor(message, args = {}) { + super(message); + this.name = "DeeplakeQueryError"; + this.sql = args.sql; + this.sqlSummary = args.sql ? summarizeSql(args.sql) : ""; + this.status = args.status; + this.responseBody = args.responseBody; + this.cause = args.cause; + } +}; var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; @@ -380,10 +401,10 @@ var DeeplakeApi = class { }); } catch (e) { if (isTimeoutError(e)) { - lastError = new Error(`Query timeout after ${QUERY_TIMEOUT_MS}ms`); + lastError = new DeeplakeQueryError(`Query timeout after ${QUERY_TIMEOUT_MS}ms`, { sql, cause: e }); throw lastError; } - lastError = e instanceof Error ? e : new Error(String(e)); + lastError = e instanceof Error ? new DeeplakeQueryError(e.message, { sql, cause: e }) : new DeeplakeQueryError(String(e), { sql, cause: e }); if (attempt < MAX_RETRIES) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); @@ -406,9 +427,13 @@ var DeeplakeApi = class { await sleep(delay); continue; } - throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); + throw new DeeplakeQueryError(`Query failed: ${resp.status}: ${text.slice(0, 200)}`, { + sql, + status: resp.status, + responseBody: text.slice(0, 4e3) + }); } - throw lastError ?? new Error("Query failed: max retries exceeded"); + throw lastError ?? new DeeplakeQueryError("Query failed: max retries exceeded", { sql }); } // ── Writes ────────────────────────────────────────────────────────────────── /** Queue rows for writing. Call commit() to flush. */ @@ -635,6 +660,193 @@ var DeeplakeApi = class { } await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); } + async ensureGraphNodesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `node_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `node_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "node_id", `("node_id")`); + } + async ensureGraphEdgesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `edge_id TEXT NOT NULL DEFAULT ''`, + `source_node_id TEXT NOT NULL DEFAULT ''`, + `target_node_id TEXT NOT NULL DEFAULT ''`, + `relation TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "source_target_relation", `("source_node_id", "target_node_id", "relation")`); + } + async ensureFactsTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `subject_entity_id TEXT NOT NULL DEFAULT ''`, + `subject_name TEXT NOT NULL DEFAULT ''`, + `subject_type TEXT NOT NULL DEFAULT ''`, + `predicate TEXT NOT NULL DEFAULT ''`, + `object_entity_id TEXT NOT NULL DEFAULT ''`, + `object_name TEXT NOT NULL DEFAULT ''`, + `object_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `confidence TEXT NOT NULL DEFAULT ''`, + `valid_at TEXT NOT NULL DEFAULT ''`, + `valid_from TEXT NOT NULL DEFAULT ''`, + `valid_to TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "session_predicate", `("source_session_id", "predicate")`); + await this.ensureLookupIndex(name, "subject_object", `("subject_entity_id", "object_entity_id")`); + } + async ensureEntitiesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `entity_type TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "canonical_name", `("canonical_name")`); + } + async ensureFactEntityLinksTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `link_id TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `entity_role TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "session_entity_role", `("source_session_id", "entity_id", "entity_role")`); + } }; // dist/src/commands/session-prune.js diff --git a/codex/bundle/pre-tool-use.js b/codex/bundle/pre-tool-use.js index 245a213..7502520 100755 --- a/codex/bundle/pre-tool-use.js +++ b/codex/bundle/pre-tool-use.js @@ -55,6 +55,11 @@ function loadConfig() { apiUrl: env.HIVEMIND_API_URL ?? env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", tableName: env.HIVEMIND_TABLE ?? env.DEEPLAKE_TABLE ?? "memory", sessionsTableName: env.HIVEMIND_SESSIONS_TABLE ?? env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + graphNodesTableName: env.HIVEMIND_GRAPH_NODES_TABLE ?? env.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", + graphEdgesTableName: env.HIVEMIND_GRAPH_EDGES_TABLE ?? env.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", + factsTableName: env.HIVEMIND_FACTS_TABLE ?? env.DEEPLAKE_FACTS_TABLE ?? "memory_facts", + entitiesTableName: env.HIVEMIND_ENTITIES_TABLE ?? env.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", + factEntityLinksTableName: env.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", memoryPath: env.HIVEMIND_MEMORY_PATH ?? env.DEEPLAKE_MEMORY_PATH ?? join(home, ".deeplake", "memory") }; } @@ -102,6 +107,22 @@ function traceSql(msg) { if (DEBUG_FILE_LOG) log2(msg); } +var DeeplakeQueryError = class extends Error { + sqlSummary; + status; + responseBody; + sql; + cause; + constructor(message, args = {}) { + super(message); + this.name = "DeeplakeQueryError"; + this.sql = args.sql; + this.sqlSummary = args.sql ? summarizeSql(args.sql) : ""; + this.status = args.status; + this.responseBody = args.responseBody; + this.cause = args.cause; + } +}; var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; @@ -205,10 +226,10 @@ var DeeplakeApi = class { }); } catch (e) { if (isTimeoutError(e)) { - lastError = new Error(`Query timeout after ${QUERY_TIMEOUT_MS}ms`); + lastError = new DeeplakeQueryError(`Query timeout after ${QUERY_TIMEOUT_MS}ms`, { sql, cause: e }); throw lastError; } - lastError = e instanceof Error ? e : new Error(String(e)); + lastError = e instanceof Error ? new DeeplakeQueryError(e.message, { sql, cause: e }) : new DeeplakeQueryError(String(e), { sql, cause: e }); if (attempt < MAX_RETRIES) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); @@ -231,9 +252,13 @@ var DeeplakeApi = class { await sleep(delay); continue; } - throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); + throw new DeeplakeQueryError(`Query failed: ${resp.status}: ${text.slice(0, 200)}`, { + sql, + status: resp.status, + responseBody: text.slice(0, 4e3) + }); } - throw lastError ?? new Error("Query failed: max retries exceeded"); + throw lastError ?? new DeeplakeQueryError("Query failed: max retries exceeded", { sql }); } // ── Writes ────────────────────────────────────────────────────────────────── /** Queue rows for writing. Call commit() to flush. */ @@ -460,6 +485,193 @@ var DeeplakeApi = class { } await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); } + async ensureGraphNodesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `node_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `node_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "node_id", `("node_id")`); + } + async ensureGraphEdgesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `edge_id TEXT NOT NULL DEFAULT ''`, + `source_node_id TEXT NOT NULL DEFAULT ''`, + `target_node_id TEXT NOT NULL DEFAULT ''`, + `relation TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "source_target_relation", `("source_node_id", "target_node_id", "relation")`); + } + async ensureFactsTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `subject_entity_id TEXT NOT NULL DEFAULT ''`, + `subject_name TEXT NOT NULL DEFAULT ''`, + `subject_type TEXT NOT NULL DEFAULT ''`, + `predicate TEXT NOT NULL DEFAULT ''`, + `object_entity_id TEXT NOT NULL DEFAULT ''`, + `object_name TEXT NOT NULL DEFAULT ''`, + `object_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `confidence TEXT NOT NULL DEFAULT ''`, + `valid_at TEXT NOT NULL DEFAULT ''`, + `valid_from TEXT NOT NULL DEFAULT ''`, + `valid_to TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "session_predicate", `("source_session_id", "predicate")`); + await this.ensureLookupIndex(name, "subject_object", `("subject_entity_id", "object_entity_id")`); + } + async ensureEntitiesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `entity_type TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "canonical_name", `("canonical_name")`); + } + async ensureFactEntityLinksTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `link_id TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `entity_role TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "session_entity_role", `("source_session_id", "entity_id", "entity_role")`); + } }; // dist/src/utils/retrieval-mode.js @@ -1857,6 +2069,22 @@ function extractPsqlQueryFromCommand(cmd) { function normalizeSqlRef(ref) { return ref.replace(/\s+/g, "").replace(/"/g, "").toLowerCase(); } +var INTERCEPTED_SQL_REFS = /* @__PURE__ */ new Set([ + "memory", + "sessions", + "graph_nodes", + "graph_edges", + "memory_facts", + "memory_entities", + "fact_entity_links", + "hivemind.memory", + "hivemind.sessions", + "hivemind.graph_nodes", + "hivemind.graph_edges", + "hivemind.memory_facts", + "hivemind.memory_entities", + "hivemind.fact_entity_links" +]); function extractSqlTableRefs(query) { const refs = []; const regex = /\b(?:from|join)\s+((?:"[^"]+"|[a-zA-Z_][a-zA-Z0-9_]*)(?:\s*\.\s*(?:"[^"]+"|[a-zA-Z_][a-zA-Z0-9_]*))?)/gi; @@ -1867,11 +2095,11 @@ function extractSqlTableRefs(query) { return refs; } function queryReferencesInterceptedTables(query) { - return extractSqlTableRefs(query).some((ref) => ref === "memory" || ref === "sessions" || ref === "hivemind.memory" || ref === "hivemind.sessions"); + return extractSqlTableRefs(query).some((ref) => INTERCEPTED_SQL_REFS.has(ref)); } function queryUsesOnlyInterceptedTables(query) { const refs = extractSqlTableRefs(query); - return refs.length > 0 && refs.every((ref) => ref === "memory" || ref === "sessions" || ref === "hivemind.memory" || ref === "hivemind.sessions"); + return refs.length > 0 && refs.every((ref) => INTERCEPTED_SQL_REFS.has(ref)); } function parsePsqlSegment(pipeline, tokens) { if (tokens[0] !== "psql" || !isPsqlMode()) @@ -1917,24 +2145,32 @@ function parsePsqlSegment(pipeline, tokens) { } return { kind: "psql", query, lineLimit, tuplesOnly, fieldSeparator }; } -function normalizePsqlQuery(query, memoryTable, sessionsTable) { +function normalizePsqlQuery(query, memoryTable, sessionsTable, graphNodesTable = process.env["HIVEMIND_GRAPH_NODES_TABLE"] ?? "graph_nodes", graphEdgesTable = process.env["HIVEMIND_GRAPH_EDGES_TABLE"] ?? "graph_edges", factsTable = process.env["HIVEMIND_FACTS_TABLE"] ?? "memory_facts", entitiesTable = process.env["HIVEMIND_ENTITIES_TABLE"] ?? "memory_entities", factEntityLinksTable = process.env["HIVEMIND_FACT_ENTITY_LINKS_TABLE"] ?? "fact_entity_links") { let sql = query.trim().replace(/;+\s*$/, ""); - sql = sql.replace(/\bFROM\s+"?memory"?\b/gi, `FROM "${memoryTable}"`).replace(/\bJOIN\s+"?memory"?\b/gi, `JOIN "${memoryTable}"`).replace(/\bFROM\s+"?sessions"?\b/gi, `FROM "${sessionsTable}"`).replace(/\bJOIN\s+"?sessions"?\b/gi, `JOIN "${sessionsTable}"`).replace(/\bFROM\s+"?hivemind"?\."?memory"?\b/gi, `FROM "${memoryTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?memory"?\b/gi, `JOIN "${memoryTable}"`).replace(/\bFROM\s+"?hivemind"?\."?sessions"?\b/gi, `FROM "${sessionsTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?sessions"?\b/gi, `JOIN "${sessionsTable}"`); + sql = sql.replace(/\bFROM\s+"?memory"?\b/gi, `FROM "${memoryTable}"`).replace(/\bJOIN\s+"?memory"?\b/gi, `JOIN "${memoryTable}"`).replace(/\bFROM\s+"?sessions"?\b/gi, `FROM "${sessionsTable}"`).replace(/\bJOIN\s+"?sessions"?\b/gi, `JOIN "${sessionsTable}"`).replace(/\bFROM\s+"?graph_nodes"?\b/gi, `FROM "${graphNodesTable}"`).replace(/\bJOIN\s+"?graph_nodes"?\b/gi, `JOIN "${graphNodesTable}"`).replace(/\bFROM\s+"?graph_edges"?\b/gi, `FROM "${graphEdgesTable}"`).replace(/\bJOIN\s+"?graph_edges"?\b/gi, `JOIN "${graphEdgesTable}"`).replace(/\bFROM\s+"?memory_facts"?\b/gi, `FROM "${factsTable}"`).replace(/\bJOIN\s+"?memory_facts"?\b/gi, `JOIN "${factsTable}"`).replace(/\bFROM\s+"?memory_entities"?\b/gi, `FROM "${entitiesTable}"`).replace(/\bJOIN\s+"?memory_entities"?\b/gi, `JOIN "${entitiesTable}"`).replace(/\bFROM\s+"?fact_entity_links"?\b/gi, `FROM "${factEntityLinksTable}"`).replace(/\bJOIN\s+"?fact_entity_links"?\b/gi, `JOIN "${factEntityLinksTable}"`).replace(/\bFROM\s+"?hivemind"?\."?memory"?\b/gi, `FROM "${memoryTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?memory"?\b/gi, `JOIN "${memoryTable}"`).replace(/\bFROM\s+"?hivemind"?\."?sessions"?\b/gi, `FROM "${sessionsTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?sessions"?\b/gi, `JOIN "${sessionsTable}"`).replace(/\bFROM\s+"?hivemind"?\."?graph_nodes"?\b/gi, `FROM "${graphNodesTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?graph_nodes"?\b/gi, `JOIN "${graphNodesTable}"`).replace(/\bFROM\s+"?hivemind"?\."?graph_edges"?\b/gi, `FROM "${graphEdgesTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?graph_edges"?\b/gi, `JOIN "${graphEdgesTable}"`).replace(/\bFROM\s+"?hivemind"?\."?memory_facts"?\b/gi, `FROM "${factsTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?memory_facts"?\b/gi, `JOIN "${factsTable}"`).replace(/\bFROM\s+"?hivemind"?\."?memory_entities"?\b/gi, `FROM "${entitiesTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?memory_entities"?\b/gi, `JOIN "${entitiesTable}"`).replace(/\bFROM\s+"?hivemind"?\."?fact_entity_links"?\b/gi, `FROM "${factEntityLinksTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?fact_entity_links"?\b/gi, `JOIN "${factEntityLinksTable}"`); return sql; } -function validatePsqlQuery(query, memoryTable, sessionsTable) { +function validatePsqlQuery(query, memoryTable, sessionsTable, graphNodesTable = process.env["HIVEMIND_GRAPH_NODES_TABLE"] ?? "graph_nodes", graphEdgesTable = process.env["HIVEMIND_GRAPH_EDGES_TABLE"] ?? "graph_edges", factsTable = process.env["HIVEMIND_FACTS_TABLE"] ?? "memory_facts", entitiesTable = process.env["HIVEMIND_ENTITIES_TABLE"] ?? "memory_entities", factEntityLinksTable = process.env["HIVEMIND_FACT_ENTITY_LINKS_TABLE"] ?? "fact_entity_links") { if (!queryUsesOnlyInterceptedTables(query)) { - throw new Error("psql queries must reference only memory, sessions, hivemind.memory, or hivemind.sessions"); + throw new Error("psql queries must reference only memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, fact_entity_links, or their hivemind.* aliases"); } - const sql = normalizePsqlQuery(query, memoryTable, sessionsTable); + const sql = normalizePsqlQuery(query, memoryTable, sessionsTable, graphNodesTable, graphEdgesTable, factsTable, entitiesTable, factEntityLinksTable); const compact = sql.replace(/\s+/g, " ").trim(); if (!/^(select|with)\b/i.test(compact)) { throw new Error("psql mode only supports SELECT queries"); } - const allowedTables = /* @__PURE__ */ new Set([memoryTable, sessionsTable]); + const allowedTables = /* @__PURE__ */ new Set([ + memoryTable, + sessionsTable, + graphNodesTable, + graphEdgesTable, + factsTable, + entitiesTable, + factEntityLinksTable + ]); const tableMatches = [...compact.matchAll(/\b(?:from|join)\s+"?([a-zA-Z_][a-zA-Z0-9_]*)"?/gi)]; if (tableMatches.length === 0) { - throw new Error("psql query must reference memory or sessions"); + throw new Error("psql query must reference an intercepted hivemind memory table"); } for (const match of tableMatches) { if (!allowedTables.has(match[1])) { @@ -1943,6 +2179,135 @@ function validatePsqlQuery(query, memoryTable, sessionsTable) { } return sql; } +function decodeSqlLiteral(value) { + return value.replace(/''/g, "'").trim(); +} +function cleanSearchTerm(value) { + return decodeSqlLiteral(value).replace(/^%+|%+$/g, "").replace(/^_+|_+$/g, "").trim(); +} +function extractSqlSearchTerms(query) { + const terms = []; + const push = (value) => { + const cleaned = cleanSearchTerm(value); + if (!cleaned) + return; + if (cleaned.startsWith("/")) + return; + if (/^\/summaries\/|^\/sessions\//.test(cleaned)) + return; + if (!terms.includes(cleaned)) + terms.push(cleaned); + }; + for (const match of query.matchAll(/\b(?:i?like|=)\s+E?'((?:[^']|'')*)'/gi)) { + push(match[1] ?? ""); + } + for (const match of query.matchAll(/<\#>\s+E?'((?:[^']|'')*)'/gi)) { + push(match[1] ?? ""); + } + return terms; +} +function chooseEntityTerms(terms) { + const entityLike = terms.filter((term) => /[A-Z]/.test(term) && !/^\d+$/.test(term) && term.split(/\s+/).length <= 4); + return (entityLike.length > 0 ? entityLike : terms).slice(0, 2); +} +function escapeRegex2(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} +async function fetchGraphCandidates(api, graphNodesTable, graphEdgesTable, terms) { + const filteredTerms = [...new Set(terms.map((term) => term.trim()).filter(Boolean))].slice(0, 4); + if (filteredTerms.length === 0) + return []; + const entityTerms = chooseEntityTerms(filteredTerms); + const topicTerms = filteredTerms.filter((term) => !entityTerms.includes(term)); + const phrase = sqlStr(filteredTerms.join(" ")); + const nodeEntityClauses = entityTerms.map((term) => `(canonical_name ILIKE '%${sqlLike(term)}%' OR aliases ILIKE '%${sqlLike(term)}%')`); + const nodeTextClauses = topicTerms.map((term) => `search_text ILIKE '%${sqlLike(term)}%'`); + const edgeEntityClauses = entityTerms.map((term) => `search_text ILIKE '%${sqlLike(term)}%'`); + const edgeTopicClauses = topicTerms.map((term) => `(relation ILIKE '%${sqlLike(term)}%' OR summary ILIKE '%${sqlLike(term)}%' OR evidence ILIKE '%${sqlLike(term)}%' OR search_text ILIKE '%${sqlLike(term)}%')`); + const nodeWhere = entityTerms.length > 0 && topicTerms.length > 0 ? `(${nodeEntityClauses.join(" OR ")}) AND (${nodeTextClauses.join(" OR ")})` : entityTerms.length > 0 ? `(${nodeEntityClauses.join(" OR ")})` : topicTerms.length > 0 ? `(${nodeTextClauses.join(" OR ")})` : "FALSE"; + const edgeWhere = entityTerms.length > 0 && topicTerms.length > 0 ? `(${edgeEntityClauses.join(" OR ")}) AND (${edgeTopicClauses.join(" OR ")})` : topicTerms.length > 0 ? `(${edgeTopicClauses.join(" OR ")})` : entityTerms.length > 0 ? `(${edgeEntityClauses.join(" OR ")})` : "FALSE"; + const sql = `WITH node_candidates AS ( SELECT source_session_id, source_path, search_text, search_text <#> '${phrase}' AS score FROM "${graphNodesTable}" WHERE ${nodeWhere} ORDER BY score DESC LIMIT 8), edge_candidates AS ( SELECT source_session_id, source_path, search_text, search_text <#> '${phrase}' AS score FROM "${graphEdgesTable}" WHERE ${edgeWhere} ORDER BY score DESC LIMIT 8) SELECT source_session_id, source_path, search_text, score FROM ( SELECT source_session_id, source_path, search_text, score FROM node_candidates UNION ALL SELECT source_session_id, source_path, search_text, score FROM edge_candidates ) AS graph_candidates ORDER BY score ASC LIMIT 12`; + const rows = await api.query(sql); + const expanded = []; + const seen = /* @__PURE__ */ new Set(); + for (const row of rows) { + const searchText = typeof row["search_text"] === "string" ? row["search_text"] : ""; + const sessionIds = [ + ...searchText.match(/conv_\d+_session_\d+/g) ?? [], + typeof row["source_session_id"] === "string" ? row["source_session_id"] : "" + ].map((value) => value.trim()).filter(Boolean); + const sourcePaths = [ + ...searchText.match(/\/sessions\/conv_\d+_session_\d+\.json/g) ?? [], + typeof row["source_path"] === "string" ? row["source_path"] : "", + ...sessionIds.map((sessionId) => `/sessions/${sessionId}.json`) + ].map((value) => value.trim()).filter(Boolean); + for (let i = 0; i < sourcePaths.length; i++) { + const sourcePath = sourcePaths[i]; + const sessionId = sessionIds[i] || sessionIds[0] || sourcePath.match(/(conv_\d+_session_\d+)\.json$/)?.[1] || ""; + if (!sourcePath) + continue; + const key = `${sessionId}@@${sourcePath}`; + if (seen.has(key)) + continue; + seen.add(key); + expanded.push({ sessionId, sourcePath }); + if (expanded.length >= 12) + return expanded; + } + } + return expanded; +} +function prependCtes(sql, ctes) { + if (ctes.length === 0) + return sql; + if (/^with\b/i.test(sql)) { + return sql.replace(/^with\b/i, `WITH ${ctes.join(", ")},`); + } + return `WITH ${ctes.join(", ")} ${sql}`; +} +function rewriteQueryWithRestrictedTables(sql, memoryTable, sessionsTable, restrictedMemoryAlias, restrictedSessionsAlias) { + let rewritten = sql; + if (restrictedMemoryAlias) { + const memoryPattern = escapeRegex2(memoryTable); + rewritten = rewritten.replace(new RegExp(`\\bFROM\\s+"?${memoryPattern}"?`, "gi"), `FROM "${restrictedMemoryAlias}"`).replace(new RegExp(`\\bJOIN\\s+"?${memoryPattern}"?`, "gi"), `JOIN "${restrictedMemoryAlias}"`); + } + if (restrictedSessionsAlias) { + const sessionsPattern = escapeRegex2(sessionsTable); + rewritten = rewritten.replace(new RegExp(`\\bFROM\\s+"?${sessionsPattern}"?`, "gi"), `FROM "${restrictedSessionsAlias}"`).replace(new RegExp(`\\bJOIN\\s+"?${sessionsPattern}"?`, "gi"), `JOIN "${restrictedSessionsAlias}"`); + } + return rewritten; +} +async function applyGraphRestrictionsToPsqlQuery(api, sql, memoryTable, sessionsTable, graphNodesTable, graphEdgesTable) { + if (extractSqlTableRefs(sql).some((ref) => ref === normalizeSqlRef(graphNodesTable) || ref === normalizeSqlRef(graphEdgesTable))) { + return sql; + } + const refs = extractSqlTableRefs(sql); + const touchesMemory2 = refs.some((ref) => ref === normalizeSqlRef(memoryTable)); + const touchesSessions = refs.some((ref) => ref === normalizeSqlRef(sessionsTable)); + if (!touchesMemory2 && !touchesSessions) + return sql; + const terms = extractSqlSearchTerms(sql); + if (terms.length === 0) + return sql; + const candidates = await fetchGraphCandidates(api, graphNodesTable, graphEdgesTable, terms); + if (candidates.length === 0 || candidates.length > 16) + return sql; + const values = candidates.map((candidate) => `('${sqlStr(candidate.sessionId)}', '${sqlStr(candidate.sourcePath)}')`); + const ctes = [ + `__hm_graph_candidates(source_session_id, source_path) AS (VALUES ${values.join(", ")})` + ]; + let restrictedMemoryAlias = null; + let restrictedSessionsAlias = null; + if (touchesMemory2) { + restrictedMemoryAlias = "__hm_memory"; + ctes.push(`"${restrictedMemoryAlias}" AS ( SELECT * FROM "${memoryTable}" m WHERE EXISTS ( SELECT 1 FROM __hm_graph_candidates gc WHERE (gc.source_path <> '' AND m.summary ILIKE '%' || gc.source_path || '%') OR (gc.source_session_id <> '' AND m.path ILIKE '%' || gc.source_session_id || '%') ))`); + } + if (touchesSessions) { + restrictedSessionsAlias = "__hm_sessions"; + ctes.push(`"${restrictedSessionsAlias}" AS ( SELECT * FROM "${sessionsTable}" s WHERE s.path IN (SELECT source_path FROM __hm_graph_candidates WHERE source_path <> ''))`); + } + return prependCtes(rewriteQueryWithRestrictedTables(sql, memoryTable, sessionsTable, restrictedMemoryAlias, restrictedSessionsAlias), ctes); +} function formatPsqlValue(value) { if (value === null || value === void 0) return ""; @@ -2245,7 +2610,11 @@ async function executeCompiledBashCommand(api, memoryTable, sessionsTable, cmd, continue; } if (segment.kind === "psql") { - const rows = await api.query(validatePsqlQuery(segment.query, memoryTable, sessionsTable)); + const graphNodesTable = process.env["HIVEMIND_GRAPH_NODES_TABLE"] ?? "graph_nodes"; + const graphEdgesTable = process.env["HIVEMIND_GRAPH_EDGES_TABLE"] ?? "graph_edges"; + const validated = validatePsqlQuery(segment.query, memoryTable, sessionsTable, graphNodesTable, graphEdgesTable); + const prepared = await applyGraphRestrictionsToPsqlQuery(api, validated, memoryTable, sessionsTable, graphNodesTable, graphEdgesTable); + const rows = await api.query(prepared); const formatted = formatPsqlRows(rows, segment.tuplesOnly, segment.fieldSeparator); const limited = segment.lineLimit > 0 ? formatted.split("\n").slice(0, segment.lineLimit).join("\n") : formatted; outputs.push(limited); @@ -2525,10 +2894,10 @@ function buildUnsupportedGuidance() { return `This command is not supported for ~/.deeplake/memory/ operations. Only bash builtins are available, plus benchmark SQL mode via psql -At -F '|' -c "SELECT ...". Do NOT use python, python3, node, curl, or other interpreters. Rewrite your command using only bash tools and retry.`; } function buildPsqlOnlyGuidance() { - return "Hivemind recall is SQL-only in this mode. Use psql with the memory and sessions tables only. Do NOT use grep, cat, ls, Read, Glob, or filesystem paths for memory lookups."; + return "Hivemind recall is SQL-only in this mode. Use psql with the memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links tables only. Do NOT use grep, cat, ls, Read, Glob, or filesystem paths for memory lookups."; } function buildPsqlSchemaGuidance() { - return "Only psql SELECT queries over memory and sessions are intercepted in SQL mode. Rewrite the query to reference only those tables with normal psql SELECT syntax."; + return "Only psql SELECT queries over memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links are intercepted in SQL mode. Rewrite the query to reference only those tables with normal psql SELECT syntax."; } function runVirtualShell(cmd, shellBundle = SHELL_BUNDLE, logFn = log4) { try { diff --git a/codex/bundle/session-start-setup.js b/codex/bundle/session-start-setup.js index 8cb1e70..6660cab 100755 --- a/codex/bundle/session-start-setup.js +++ b/codex/bundle/session-start-setup.js @@ -61,6 +61,11 @@ function loadConfig() { apiUrl: env.HIVEMIND_API_URL ?? env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", tableName: env.HIVEMIND_TABLE ?? env.DEEPLAKE_TABLE ?? "memory", sessionsTableName: env.HIVEMIND_SESSIONS_TABLE ?? env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + graphNodesTableName: env.HIVEMIND_GRAPH_NODES_TABLE ?? env.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", + graphEdgesTableName: env.HIVEMIND_GRAPH_EDGES_TABLE ?? env.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", + factsTableName: env.HIVEMIND_FACTS_TABLE ?? env.DEEPLAKE_FACTS_TABLE ?? "memory_facts", + entitiesTableName: env.HIVEMIND_ENTITIES_TABLE ?? env.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", + factEntityLinksTableName: env.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", memoryPath: env.HIVEMIND_MEMORY_PATH ?? env.DEEPLAKE_MEMORY_PATH ?? join2(home, ".deeplake", "memory") }; } @@ -111,6 +116,22 @@ function traceSql(msg) { if (DEBUG_FILE_LOG) log2(msg); } +var DeeplakeQueryError = class extends Error { + sqlSummary; + status; + responseBody; + sql; + cause; + constructor(message, args = {}) { + super(message); + this.name = "DeeplakeQueryError"; + this.sql = args.sql; + this.sqlSummary = args.sql ? summarizeSql(args.sql) : ""; + this.status = args.status; + this.responseBody = args.responseBody; + this.cause = args.cause; + } +}; var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; @@ -214,10 +235,10 @@ var DeeplakeApi = class { }); } catch (e) { if (isTimeoutError(e)) { - lastError = new Error(`Query timeout after ${QUERY_TIMEOUT_MS}ms`); + lastError = new DeeplakeQueryError(`Query timeout after ${QUERY_TIMEOUT_MS}ms`, { sql, cause: e }); throw lastError; } - lastError = e instanceof Error ? e : new Error(String(e)); + lastError = e instanceof Error ? new DeeplakeQueryError(e.message, { sql, cause: e }) : new DeeplakeQueryError(String(e), { sql, cause: e }); if (attempt < MAX_RETRIES) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); @@ -240,9 +261,13 @@ var DeeplakeApi = class { await sleep(delay); continue; } - throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); + throw new DeeplakeQueryError(`Query failed: ${resp.status}: ${text.slice(0, 200)}`, { + sql, + status: resp.status, + responseBody: text.slice(0, 4e3) + }); } - throw lastError ?? new Error("Query failed: max retries exceeded"); + throw lastError ?? new DeeplakeQueryError("Query failed: max retries exceeded", { sql }); } // ── Writes ────────────────────────────────────────────────────────────────── /** Queue rows for writing. Call commit() to flush. */ @@ -469,6 +494,193 @@ var DeeplakeApi = class { } await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); } + async ensureGraphNodesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `node_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `node_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "node_id", `("node_id")`); + } + async ensureGraphEdgesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `edge_id TEXT NOT NULL DEFAULT ''`, + `source_node_id TEXT NOT NULL DEFAULT ''`, + `target_node_id TEXT NOT NULL DEFAULT ''`, + `relation TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "source_target_relation", `("source_node_id", "target_node_id", "relation")`); + } + async ensureFactsTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `subject_entity_id TEXT NOT NULL DEFAULT ''`, + `subject_name TEXT NOT NULL DEFAULT ''`, + `subject_type TEXT NOT NULL DEFAULT ''`, + `predicate TEXT NOT NULL DEFAULT ''`, + `object_entity_id TEXT NOT NULL DEFAULT ''`, + `object_name TEXT NOT NULL DEFAULT ''`, + `object_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `confidence TEXT NOT NULL DEFAULT ''`, + `valid_at TEXT NOT NULL DEFAULT ''`, + `valid_from TEXT NOT NULL DEFAULT ''`, + `valid_to TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "session_predicate", `("source_session_id", "predicate")`); + await this.ensureLookupIndex(name, "subject_object", `("subject_entity_id", "object_entity_id")`); + } + async ensureEntitiesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `entity_type TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "canonical_name", `("canonical_name")`); + } + async ensureFactEntityLinksTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `link_id TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `entity_role TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "session_entity_role", `("source_session_id", "entity_id", "entity_role")`); + } }; // dist/src/utils/stdin.js @@ -948,6 +1160,11 @@ async function runCodexSessionStartSetup(input, deps = {}) { } else { try { await api.ensureSessionsTable(config.sessionsTableName); + await api.ensureGraphNodesTable(config.graphNodesTableName); + await api.ensureGraphEdgesTable(config.graphEdgesTableName); + await api.ensureFactsTable(config.factsTableName); + await api.ensureEntitiesTable(config.entitiesTableName); + await api.ensureFactEntityLinksTable(config.factEntityLinksTableName); const drain = await drainSessionQueuesFn(api, { sessionsTable: config.sessionsTableName }); diff --git a/codex/bundle/session-start.js b/codex/bundle/session-start.js index 59c5cf4..6707ee9 100755 --- a/codex/bundle/session-start.js +++ b/codex/bundle/session-start.js @@ -168,25 +168,41 @@ var CODEX_SESSION_START_CONTEXT_PSQL = `DEEPLAKE MEMORY SQL MODE: Use SQL only f Available tables: - memory(path, summary, project, description, creation_date, last_update_date) - sessions(path, creation_date, turn_index, event_type, dia_id, speaker, text, turn_summary, source_date_time, message) +- memory_facts(path, fact_id, subject_entity_id, subject_name, subject_type, predicate, object_entity_id, object_name, object_type, summary, evidence, search_text, confidence, valid_at, valid_from, valid_to, source_session_id, source_path) +- memory_entities(path, entity_id, canonical_name, entity_type, aliases, summary, search_text, source_session_ids, source_paths) +- fact_entity_links(path, link_id, fact_id, entity_id, entity_role, source_session_id, source_path) Use this command shape: - psql -At -F '|' -c "SELECT ..." Workflow: -1. Query memory first to identify likely summaries. +1. Query memory first to identify likely summaries and sessions. 2. In the first pass, combine the named person/entity term with one or more topic terms. Prefer narrow AND filters over broad OR filters. -3. Re-query memory by exact path for the small set of summary rows you selected. -4. Query sessions by exact path for transcript evidence or unresolved dates. -5. Prefer small targeted SELECTs with ORDER BY and LIMIT 5-10. -6. Do not use filesystem commands, grep, cat, ls, Read, or Glob for recall in this mode. -7. If the first summary query returns 0-3 weak rows or the answer still seems semantically off, retry with BM25 ranking on memory before concluding the data is absent. -8. Use sessions.text, sessions.speaker, sessions.turn_index, and sessions.source_date_time for transcript retrieval. Use sessions.message only when you need the raw JSON payload. -9. If a summary answer is vague or relative (for example "home country", "next month", "last week"), immediately open the linked sessions rows and convert it to the most concrete answer supported there. -10. For identity, origin, relationship, and "what did they decide" questions, prefer the exact self-description or named entity from sessions over a paraphrased summary label. +3. Graph-backed entity and relation resolution is applied automatically behind the scenes to narrow likely sessions before memory/sessions queries run. You do not need to query graph tables manually for normal recall. +3a. For stable person/project/place facts, use memory_facts first. Use memory_entities to resolve aliases or canonical names, then join through fact_entity_links when you need all facts connected to an entity. +4. Re-query memory by exact path for the small candidate set you selected. +5. Query sessions by exact path for transcript evidence or unresolved dates. +6. Prefer small targeted SELECTs with ORDER BY and LIMIT 5-10. +7. Do not use filesystem commands, grep, cat, ls, Read, or Glob for recall in this mode. +8. If the first literal query returns 0-3 weak rows or the answer still seems semantically off, retry with BM25 ranking on memory.summary. +9. Use sessions.text, sessions.speaker, sessions.turn_index, and sessions.source_date_time for transcript retrieval. Use sessions.message only when you need the raw JSON payload. +10. If a summary, node, or edge answer is vague or relative, immediately open the linked sessions rows and convert it to the most concrete answer supported there. +11. For identity, origin, relationship, preference, and "what did they decide" questions, prefer transcript grounding over paraphrased summary labels. +12. When memory_entities resolves a canonical entity, use fact_entity_links to expand the connected facts before deciding the fact layer is sparse. +13. For identity or relationship questions, prefer the narrowest explicit self-label or status label over broader biography or community descriptions. +14. For "when" questions, if the best evidence is already phrased relative to another dated event, return that relative phrase instead of inventing a different absolute date. +15. For list/profile questions, return a minimal comma-separated set of directly supported items. Do not pad the answer with adjacent hobbies, events, or explanations. +16. For artifact/title questions such as books, talks, projects, or artworks, prefer exact titled objects from facts or transcript over generic phrases like "a book" or "a speech". Good query patterns: - Candidate summaries: psql -At -F '|' -c "SELECT path, summary, creation_date FROM memory WHERE summary ILIKE '%%' AND (summary ILIKE '%%' OR summary ILIKE '%%') ORDER BY creation_date DESC LIMIT 5" +- Canonical entity lookup: + psql -At -F '|' -c "SELECT entity_id, canonical_name, entity_type, aliases, summary FROM memory_entities WHERE canonical_name ILIKE '%%' OR aliases ILIKE '%%' LIMIT 5" +- Fact lookup by entity: + psql -At -F '|' -c "SELECT fact_id, subject_name, predicate, object_name, summary, valid_at, valid_from, valid_to, source_session_id FROM memory_facts WHERE subject_name ILIKE '%%' AND (predicate ILIKE '%%' OR object_name ILIKE '%%') ORDER BY creation_date DESC LIMIT 10" +- Entity-linked fact expansion: + psql -At -F '|' -c "SELECT f.fact_id, f.subject_name, f.predicate, f.object_name, f.summary FROM fact_entity_links l JOIN memory_facts f ON f.fact_id = l.fact_id WHERE l.entity_id = '' ORDER BY f.creation_date DESC LIMIT 10" - Exact summary reread: psql -At -F '|' -c "SELECT path, summary FROM memory WHERE path IN ('/summaries/...', '/summaries/...')" - Transcript grounding by exact path: @@ -195,11 +211,18 @@ Good query patterns: psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') AND (speaker ILIKE '%%' OR text ILIKE '%%') ORDER BY path ASC, turn_index ASC" - If literal ILIKE retrieval is sparse or semantically weak, retry with BM25 text ranking on summaries: psql -At -F '|' -c "SELECT path, summary, summary <#> ' ' AS score FROM memory WHERE summary ILIKE '%%' ORDER BY score DESC LIMIT 5" +- If graph entity lookup is sparse or semantically weak, retry with BM25 on graph nodes: + psql -At -F '|' -c "SELECT node_id, canonical_name, node_type, summary, source_session_id, source_path, search_text <#> ' ' AS score FROM graph_nodes ORDER BY score DESC LIMIT 5" Avoid these mistakes: - Do NOT search person names via path ILIKE. Person names live in summary text, not session paths. - Do NOT filter sessions.message directly when sessions.text / sessions.speaker already contain the needed transcript fields. +- Do NOT use fact tables for exact quoted wording when a transcript row is available; use them to narrow and aggregate, then ground on sessions. +- Do NOT stop at graph rows alone when the question asks for exact wording or time grounding. Use graph rows to narrow the search, then open the linked sessions. - Do NOT blend multiple different events when the question asks about one specific event. Prefer the most direct supporting row. +- Do NOT replace an exact status or self-label with a broader biography. +- Do NOT recalculate a relative-time answer against today's date when the stored phrase already answers the question. +- Do NOT turn a short list question into a narrative list of loosely related activities. Answer rules: - Return the smallest exact answer supported by the data. @@ -207,11 +230,12 @@ Answer rules: - Do not answer "not found" until you have checked both memory and a likely sessions row. - Preserve direct relative-duration answers when they already match the question. - If the transcript already directly answers with a relative duration like "10 years ago", return that phrase instead of recalculating to today's date. +- If the transcript or fact row says something like "the week before June 9, 2023", return that phrase instead of converting it to June 9, 2023. - If a summary says something vague like "home country", search sessions for the exact named place before answering. - Aggregate across the small candidate set before answering profile or list questions. - For "likely", "would", or profile questions, a concise inference from strong summary evidence is allowed even if the exact final phrase is not quoted verbatim. -Only psql SELECT queries over memory and sessions are intercepted in this mode. Do NOT use python, python3, node, curl, or filesystem paths for recall in this mode.`; +Only psql SELECT queries over memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links are intercepted in this mode. For normal recall, query memory_facts for distilled claims, memory_entities for canonical names, and sessions for exact grounding; graph-based restriction is applied automatically where relevant. Do NOT use python, python3, node, curl, or filesystem paths for recall in this mode.`; function buildCodexSessionStartContext(args) { const versionNotice = args.currentVersion ? ` Hivemind v${args.currentVersion}` : ""; diff --git a/codex/bundle/shell/deeplake-shell.js b/codex/bundle/shell/deeplake-shell.js index 14f7835..819d244 100755 --- a/codex/bundle/shell/deeplake-shell.js +++ b/codex/bundle/shell/deeplake-shell.js @@ -66752,6 +66752,11 @@ function loadConfig() { apiUrl: env2.HIVEMIND_API_URL ?? env2.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", tableName: env2.HIVEMIND_TABLE ?? env2.DEEPLAKE_TABLE ?? "memory", sessionsTableName: env2.HIVEMIND_SESSIONS_TABLE ?? env2.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + graphNodesTableName: env2.HIVEMIND_GRAPH_NODES_TABLE ?? env2.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", + graphEdgesTableName: env2.HIVEMIND_GRAPH_EDGES_TABLE ?? env2.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", + factsTableName: env2.HIVEMIND_FACTS_TABLE ?? env2.DEEPLAKE_FACTS_TABLE ?? "memory_facts", + entitiesTableName: env2.HIVEMIND_ENTITIES_TABLE ?? env2.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", + factEntityLinksTableName: env2.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env2.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", memoryPath: env2.HIVEMIND_MEMORY_PATH ?? env2.DEEPLAKE_MEMORY_PATH ?? join4(home, ".deeplake", "memory") }; } @@ -66799,6 +66804,22 @@ function traceSql(msg) { if (DEBUG_FILE_LOG) log2(msg); } +var DeeplakeQueryError = class extends Error { + sqlSummary; + status; + responseBody; + sql; + cause; + constructor(message, args = {}) { + super(message); + this.name = "DeeplakeQueryError"; + this.sql = args.sql; + this.sqlSummary = args.sql ? summarizeSql(args.sql) : ""; + this.status = args.status; + this.responseBody = args.responseBody; + this.cause = args.cause; + } +}; var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; @@ -66902,10 +66923,10 @@ var DeeplakeApi = class { }); } catch (e6) { if (isTimeoutError(e6)) { - lastError = new Error(`Query timeout after ${QUERY_TIMEOUT_MS}ms`); + lastError = new DeeplakeQueryError(`Query timeout after ${QUERY_TIMEOUT_MS}ms`, { sql, cause: e6 }); throw lastError; } - lastError = e6 instanceof Error ? e6 : new Error(String(e6)); + lastError = e6 instanceof Error ? new DeeplakeQueryError(e6.message, { sql, cause: e6 }) : new DeeplakeQueryError(String(e6), { sql, cause: e6 }); if (attempt < MAX_RETRIES) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); @@ -66928,9 +66949,13 @@ var DeeplakeApi = class { await sleep(delay); continue; } - throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); + throw new DeeplakeQueryError(`Query failed: ${resp.status}: ${text.slice(0, 200)}`, { + sql, + status: resp.status, + responseBody: text.slice(0, 4e3) + }); } - throw lastError ?? new Error("Query failed: max retries exceeded"); + throw lastError ?? new DeeplakeQueryError("Query failed: max retries exceeded", { sql }); } // ── Writes ────────────────────────────────────────────────────────────────── /** Queue rows for writing. Call commit() to flush. */ @@ -67157,6 +67182,193 @@ var DeeplakeApi = class { } await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); } + async ensureGraphNodesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `node_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `node_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "node_id", `("node_id")`); + } + async ensureGraphEdgesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `edge_id TEXT NOT NULL DEFAULT ''`, + `source_node_id TEXT NOT NULL DEFAULT ''`, + `target_node_id TEXT NOT NULL DEFAULT ''`, + `relation TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "source_target_relation", `("source_node_id", "target_node_id", "relation")`); + } + async ensureFactsTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `subject_entity_id TEXT NOT NULL DEFAULT ''`, + `subject_name TEXT NOT NULL DEFAULT ''`, + `subject_type TEXT NOT NULL DEFAULT ''`, + `predicate TEXT NOT NULL DEFAULT ''`, + `object_entity_id TEXT NOT NULL DEFAULT ''`, + `object_name TEXT NOT NULL DEFAULT ''`, + `object_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `confidence TEXT NOT NULL DEFAULT ''`, + `valid_at TEXT NOT NULL DEFAULT ''`, + `valid_from TEXT NOT NULL DEFAULT ''`, + `valid_to TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "session_predicate", `("source_session_id", "predicate")`); + await this.ensureLookupIndex(name, "subject_object", `("subject_entity_id", "object_entity_id")`); + } + async ensureEntitiesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `entity_type TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "canonical_name", `("canonical_name")`); + } + async ensureFactEntityLinksTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `link_id TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `entity_role TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "session_entity_role", `("source_session_id", "entity_id", "entity_role")`); + } }; // dist/src/shell/deeplake-fs.js diff --git a/codex/bundle/stop.js b/codex/bundle/stop.js index e0d68ea..cda8f47 100755 --- a/codex/bundle/stop.js +++ b/codex/bundle/stop.js @@ -52,6 +52,11 @@ function loadConfig() { apiUrl: env.HIVEMIND_API_URL ?? env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", tableName: env.HIVEMIND_TABLE ?? env.DEEPLAKE_TABLE ?? "memory", sessionsTableName: env.HIVEMIND_SESSIONS_TABLE ?? env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + graphNodesTableName: env.HIVEMIND_GRAPH_NODES_TABLE ?? env.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", + graphEdgesTableName: env.HIVEMIND_GRAPH_EDGES_TABLE ?? env.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", + factsTableName: env.HIVEMIND_FACTS_TABLE ?? env.DEEPLAKE_FACTS_TABLE ?? "memory_facts", + entitiesTableName: env.HIVEMIND_ENTITIES_TABLE ?? env.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", + factEntityLinksTableName: env.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", memoryPath: env.HIVEMIND_MEMORY_PATH ?? env.DEEPLAKE_MEMORY_PATH ?? join(home, ".deeplake", "memory") }; } @@ -102,6 +107,22 @@ function traceSql(msg) { if (DEBUG_FILE_LOG) log2(msg); } +var DeeplakeQueryError = class extends Error { + sqlSummary; + status; + responseBody; + sql; + cause; + constructor(message, args = {}) { + super(message); + this.name = "DeeplakeQueryError"; + this.sql = args.sql; + this.sqlSummary = args.sql ? summarizeSql(args.sql) : ""; + this.status = args.status; + this.responseBody = args.responseBody; + this.cause = args.cause; + } +}; var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; @@ -205,10 +226,10 @@ var DeeplakeApi = class { }); } catch (e) { if (isTimeoutError(e)) { - lastError = new Error(`Query timeout after ${QUERY_TIMEOUT_MS}ms`); + lastError = new DeeplakeQueryError(`Query timeout after ${QUERY_TIMEOUT_MS}ms`, { sql, cause: e }); throw lastError; } - lastError = e instanceof Error ? e : new Error(String(e)); + lastError = e instanceof Error ? new DeeplakeQueryError(e.message, { sql, cause: e }) : new DeeplakeQueryError(String(e), { sql, cause: e }); if (attempt < MAX_RETRIES) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); @@ -231,9 +252,13 @@ var DeeplakeApi = class { await sleep(delay); continue; } - throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); + throw new DeeplakeQueryError(`Query failed: ${resp.status}: ${text.slice(0, 200)}`, { + sql, + status: resp.status, + responseBody: text.slice(0, 4e3) + }); } - throw lastError ?? new Error("Query failed: max retries exceeded"); + throw lastError ?? new DeeplakeQueryError("Query failed: max retries exceeded", { sql }); } // ── Writes ────────────────────────────────────────────────────────────────── /** Queue rows for writing. Call commit() to flush. */ @@ -460,6 +485,193 @@ var DeeplakeApi = class { } await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); } + async ensureGraphNodesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `node_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `node_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "node_id", `("node_id")`); + } + async ensureGraphEdgesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `edge_id TEXT NOT NULL DEFAULT ''`, + `source_node_id TEXT NOT NULL DEFAULT ''`, + `target_node_id TEXT NOT NULL DEFAULT ''`, + `relation TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`] + ]) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "source_target_relation", `("source_node_id", "target_node_id", "relation")`); + } + async ensureFactsTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `subject_entity_id TEXT NOT NULL DEFAULT ''`, + `subject_name TEXT NOT NULL DEFAULT ''`, + `subject_type TEXT NOT NULL DEFAULT ''`, + `predicate TEXT NOT NULL DEFAULT ''`, + `object_entity_id TEXT NOT NULL DEFAULT ''`, + `object_name TEXT NOT NULL DEFAULT ''`, + `object_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `confidence TEXT NOT NULL DEFAULT ''`, + `valid_at TEXT NOT NULL DEFAULT ''`, + `valid_from TEXT NOT NULL DEFAULT ''`, + `valid_to TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "session_predicate", `("source_session_id", "predicate")`); + await this.ensureLookupIndex(name, "subject_object", `("subject_entity_id", "object_entity_id")`); + } + async ensureEntitiesTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `entity_type TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "canonical_name", `("canonical_name")`); + } + async ensureFactEntityLinksTable(name) { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `link_id TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `entity_role TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''` + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) + this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "session_entity_role", `("source_session_id", "entity_id", "entity_role")`); + } }; // dist/src/utils/direct-run.js @@ -482,6 +694,60 @@ import { fileURLToPath as fileURLToPath2 } from "node:url"; import { dirname, join as join4 } from "node:path"; import { writeFileSync as writeFileSync2, mkdirSync as mkdirSync2, appendFileSync as appendFileSync2 } from "node:fs"; import { homedir as homedir3, tmpdir as tmpdir2 } from "node:os"; + +// dist/src/hooks/knowledge-graph.js +import { randomUUID as randomUUID3 } from "node:crypto"; + +// dist/src/hooks/upload-summary.js +import { randomUUID as randomUUID2 } from "node:crypto"; + +// dist/src/hooks/knowledge-graph.js +var GRAPH_PROMPT_TEMPLATE = `You are extracting a compact knowledge graph delta from a session summary. + +SESSION ID: __SESSION_ID__ +SOURCE PATH: __SOURCE_PATH__ +PROJECT: __PROJECT__ + +SUMMARY MARKDOWN: +__SUMMARY_TEXT__ + +Return ONLY valid JSON with this exact shape: +{"nodes":[{"name":"canonical entity name","type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","summary":"short factual description","aliases":["optional alias"]}],"edges":[{"source":"canonical source entity","target":"canonical target entity","relation":"snake_case_relation","summary":"short factual relation summary","evidence":"short supporting phrase"}]} + +Rules: +- Use canonical names for repeated entities. +- Include people, places, organizations, books/media, tools, files, goals, status labels, preferences, and notable events when they matter for future recall. +- Convert relationship/status/origin/preferences into edges when possible. Example relation shapes: home_country, relationship_status, enjoys, decided_to_pursue, works_on, uses_tool, located_in, recommended, plans, supports. +- Keep summaries short and factual. Do not invent facts beyond the summary. +- If a source or target appears in an edge but not in nodes, also include it in nodes. +- Prefer stable canonical names over pronouns. +- Return no markdown, no prose, no code fences, only JSON.`; + +// dist/src/hooks/memory-facts.js +import { randomUUID as randomUUID4 } from "node:crypto"; +var MEMORY_FACT_PROMPT_TEMPLATE = `You are extracting durable long-term memory facts from a session summary. + +SESSION ID: __SESSION_ID__ +SOURCE PATH: __SOURCE_PATH__ +PROJECT: __PROJECT__ + +SUMMARY MARKDOWN: +__SUMMARY_TEXT__ + +Return ONLY valid JSON with this exact shape: +{"facts":[{"subject":"canonical entity","subject_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","subject_aliases":["optional alias"],"predicate":"snake_case_relation","object":"canonical object text","object_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","object_aliases":["optional alias"],"summary":"short factual claim","evidence":"short supporting phrase","confidence":0.0,"valid_at":"optional date/time text","valid_from":"optional date/time text","valid_to":"optional date/time text"}]} + +Rules: +- Extract atomic facts that are useful for later recall. One durable claim per fact. +- Prefer canonical names for repeated people, organizations, places, projects, tools, and artifacts. +- Use relation-style predicates such as works_on, home_country, relationship_status, prefers, plans, decided_to_pursue, located_in, uses_tool, recommended, supports, owns, read, attends, moved_from, moved_to. +- Facts should preserve temporal history instead of overwriting it. If the summary says something changed, emit the new fact and include timing in valid_at / valid_from / valid_to when the summary supports it. +- Include assistant-confirmed or tool-confirmed actions when they are stated as completed facts in the summary. +- Do not invent facts that are not supported by the summary. +- Avoid duplicates or near-duplicates. If two facts say the same thing, keep the more specific one. +- Return no markdown, no prose, no code fences, only JSON.`; + +// dist/src/hooks/codex/spawn-wiki-worker.js var HOME = homedir3(); var WIKI_LOG = join4(HOME, ".codex", "hooks", "deeplake-wiki.log"); var WIKI_PROMPT_TEMPLATE = `You are maintaining a persistent wiki from a session transcript. This page will become part of a long-lived knowledge base that future agents will search through index.md before opening the source session. Write for retrieval, not storytelling. @@ -573,6 +839,11 @@ function spawnCodexWikiWorker(opts) { workspaceId: config.workspaceId, memoryTable: config.tableName, sessionsTable: config.sessionsTableName, + graphNodesTable: config.graphNodesTableName, + graphEdgesTable: config.graphEdgesTableName, + factsTable: config.factsTableName, + entitiesTable: config.entitiesTableName, + factEntityLinksTable: config.factEntityLinksTableName, sessionId, userName: config.userName, project: projectName, @@ -580,7 +851,9 @@ function spawnCodexWikiWorker(opts) { codexBin: findCodexBin(), wikiLog: WIKI_LOG, hooksDir: join4(HOME, ".codex", "hooks"), - promptTemplate: WIKI_PROMPT_TEMPLATE + promptTemplate: WIKI_PROMPT_TEMPLATE, + graphPromptTemplate: GRAPH_PROMPT_TEMPLATE, + factPromptTemplate: MEMORY_FACT_PROMPT_TEMPLATE })); wikiLog(`${reason}: spawning summary worker for ${sessionId}`); const workerPath = join4(bundleDir, "wiki-worker.js"); diff --git a/codex/bundle/wiki-worker.js b/codex/bundle/wiki-worker.js index 3b50821..99a2b9a 100755 --- a/codex/bundle/wiki-worker.js +++ b/codex/bundle/wiki-worker.js @@ -155,6 +155,527 @@ async function uploadSummary(query2, params) { return { path: "insert", sql, descLength: desc.length, summaryLength: text.length }; } +// dist/src/hooks/knowledge-graph.js +import { randomUUID as randomUUID2 } from "node:crypto"; +var GRAPH_PROMPT_TEMPLATE = `You are extracting a compact knowledge graph delta from a session summary. + +SESSION ID: __SESSION_ID__ +SOURCE PATH: __SOURCE_PATH__ +PROJECT: __PROJECT__ + +SUMMARY MARKDOWN: +__SUMMARY_TEXT__ + +Return ONLY valid JSON with this exact shape: +{"nodes":[{"name":"canonical entity name","type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","summary":"short factual description","aliases":["optional alias"]}],"edges":[{"source":"canonical source entity","target":"canonical target entity","relation":"snake_case_relation","summary":"short factual relation summary","evidence":"short supporting phrase"}]} + +Rules: +- Use canonical names for repeated entities. +- Include people, places, organizations, books/media, tools, files, goals, status labels, preferences, and notable events when they matter for future recall. +- Convert relationship/status/origin/preferences into edges when possible. Example relation shapes: home_country, relationship_status, enjoys, decided_to_pursue, works_on, uses_tool, located_in, recommended, plans, supports. +- Keep summaries short and factual. Do not invent facts beyond the summary. +- If a source or target appears in an edge but not in nodes, also include it in nodes. +- Prefer stable canonical names over pronouns. +- Return no markdown, no prose, no code fences, only JSON.`; +function stripCodeFences(text) { + const trimmed = text.trim(); + const fenceMatch = trimmed.match(/^```(?:json)?\s*([\s\S]*?)\s*```$/i); + return fenceMatch ? fenceMatch[1].trim() : trimmed; +} +function normalizeString(value) { + return typeof value === "string" ? value.trim() : ""; +} +function normalizeAliasList(value) { + if (!Array.isArray(value)) + return []; + return value.map(normalizeString).filter(Boolean).filter((item, index, arr) => arr.indexOf(item) === index); +} +function parseGraphExtraction(raw) { + const cleaned = stripCodeFences(raw); + const parsed = JSON.parse(cleaned); + const nodes = Array.isArray(parsed["nodes"]) ? parsed["nodes"] : []; + const edges = Array.isArray(parsed["edges"]) ? parsed["edges"] : []; + return { + nodes: nodes.map((node) => ({ + name: normalizeString(node["name"]), + type: normalizeString(node["type"]) || "other", + summary: normalizeString(node["summary"]), + aliases: normalizeAliasList(node["aliases"]) + })).filter((node) => node.name), + edges: edges.map((edge) => ({ + source: normalizeString(edge["source"]), + target: normalizeString(edge["target"]), + relation: normalizeString(edge["relation"]).replace(/\s+/g, "_").toLowerCase(), + summary: normalizeString(edge["summary"]), + evidence: normalizeString(edge["evidence"]) + })).filter((edge) => edge.source && edge.target && edge.relation) + }; +} +function slugify(value) { + return value.normalize("NFKD").replace(/[^\w\s-]/g, "").trim().toLowerCase().replace(/[\s-]+/g, "_").replace(/^_+|_+$/g, "") || "item"; +} +function buildGraphNodeId(name, _type = "other") { + return `entity:${slugify(name)}`; +} +function buildNodeSearchText(node) { + return [ + node.name, + node.type ?? "other", + ...node.aliases ?? [], + node.summary ?? "" + ].filter(Boolean).join(" | "); +} +function buildEdgeSearchText(edge, sourceNodeId, targetNodeId) { + return [ + edge.source, + edge.relation, + edge.target, + edge.summary ?? "", + edge.evidence ?? "", + sourceNodeId, + targetNodeId + ].filter(Boolean).join(" | "); +} +function buildKnowledgeGraphPrompt(args) { + return (args.template ?? GRAPH_PROMPT_TEMPLATE).replace(/__SUMMARY_TEXT__/g, args.summaryText).replace(/__SESSION_ID__/g, args.sessionId).replace(/__SOURCE_PATH__/g, args.sourcePath).replace(/__PROJECT__/g, args.project); +} +function wrapGraphPhaseError(error, args) { + const wrapped = new Error(`graph ${args.phase} failed for session ${args.sessionId} on table ${args.table}: ${error instanceof Error ? error.message : String(error)}`); + wrapped.cause = error; + wrapped.phase = args.phase; + wrapped.sessionId = args.sessionId; + wrapped.table = args.table; + wrapped.sql = args.sql; + return wrapped; +} +async function replaceSessionGraph(params) { + const ts = params.ts ?? (/* @__PURE__ */ new Date()).toISOString(); + const nodePath = `/graphs/nodes/${params.userName}/${params.sessionId}.jsonl`; + const edgePath = `/graphs/edges/${params.userName}/${params.sessionId}.jsonl`; + const nodeFilename = `${params.sessionId}.jsonl`; + const edgeFilename = `${params.sessionId}.jsonl`; + const nodeMap = /* @__PURE__ */ new Map(); + for (const node of params.graph.nodes) { + const key = buildGraphNodeId(node.name, node.type); + nodeMap.set(key, { + name: node.name, + type: node.type || "other", + summary: node.summary || "", + aliases: node.aliases || [] + }); + } + for (const edge of params.graph.edges) { + const sourceKey = buildGraphNodeId(edge.source); + const targetKey = buildGraphNodeId(edge.target); + if (!nodeMap.has(sourceKey)) + nodeMap.set(sourceKey, { name: edge.source, type: "other", summary: "", aliases: [] }); + if (!nodeMap.has(targetKey)) + nodeMap.set(targetKey, { name: edge.target, type: "other", summary: "", aliases: [] }); + } + const deleteNodesSql = `DELETE FROM "${params.nodesTable}" WHERE source_session_id = '${esc(params.sessionId)}'`; + const deleteEdgesSql = `DELETE FROM "${params.edgesTable}" WHERE source_session_id = '${esc(params.sessionId)}'`; + try { + await params.query(deleteNodesSql); + } catch (error) { + throw wrapGraphPhaseError(error, { + phase: "delete_nodes", + sessionId: params.sessionId, + table: params.nodesTable, + sql: deleteNodesSql + }); + } + try { + await params.query(deleteEdgesSql); + } catch (error) { + throw wrapGraphPhaseError(error, { + phase: "delete_edges", + sessionId: params.sessionId, + table: params.edgesTable, + sql: deleteEdgesSql + }); + } + const nodeRows = [...nodeMap.entries()].map(([nodeId, node]) => { + const summary = node.summary || buildSummaryBlurb(`# Graph Node + +${node.name}`); + const aliases = (node.aliases ?? []).join(", "); + const searchText = buildNodeSearchText(node); + return `('${randomUUID2()}', '${esc(nodePath)}', '${esc(nodeFilename)}', '${esc(nodeId)}', '${esc(node.name)}', '${esc(node.type || "other")}', E'${esc(summary)}', E'${esc(searchText)}', '${esc(aliases)}', '${esc(params.sessionId)}', '${esc(params.sourcePath)}', '${esc(params.userName)}', 'application/json', ${Buffer.byteLength(searchText, "utf-8")}, '${esc(params.project)}', E'${esc(buildSummaryBlurb(summary))}', '${esc(params.agent)}', '${ts}', '${ts}')`; + }); + if (nodeRows.length > 0) { + const insertNodesSql = `INSERT INTO "${params.nodesTable}" (id, path, filename, node_id, canonical_name, node_type, summary, search_text, aliases, source_session_id, source_path, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ${nodeRows.join(", ")}`; + try { + await params.query(insertNodesSql); + } catch (error) { + throw wrapGraphPhaseError(error, { + phase: "insert_nodes", + sessionId: params.sessionId, + table: params.nodesTable, + sql: insertNodesSql + }); + } + } + const edgeRows = params.graph.edges.map((edge) => { + const sourceNodeId = buildGraphNodeId(edge.source); + const targetNodeId = buildGraphNodeId(edge.target); + const searchText = buildEdgeSearchText(edge, sourceNodeId, targetNodeId); + const summary = edge.summary || `${edge.source} ${edge.relation} ${edge.target}`; + const evidence = edge.evidence || ""; + const edgeId = `${sourceNodeId}:${edge.relation}:${targetNodeId}`; + return `('${randomUUID2()}', '${esc(edgePath)}', '${esc(edgeFilename)}', '${esc(edgeId)}', '${esc(sourceNodeId)}', '${esc(targetNodeId)}', '${esc(edge.relation)}', E'${esc(summary)}', E'${esc(evidence)}', E'${esc(searchText)}', '${esc(params.sessionId)}', '${esc(params.sourcePath)}', '${esc(params.userName)}', 'application/json', ${Buffer.byteLength(searchText, "utf-8")}, '${esc(params.project)}', E'${esc(buildSummaryBlurb(summary))}', '${esc(params.agent)}', '${ts}', '${ts}')`; + }); + if (edgeRows.length > 0) { + const insertEdgesSql = `INSERT INTO "${params.edgesTable}" (id, path, filename, edge_id, source_node_id, target_node_id, relation, summary, evidence, search_text, source_session_id, source_path, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ${edgeRows.join(", ")}`; + try { + await params.query(insertEdgesSql); + } catch (error) { + throw wrapGraphPhaseError(error, { + phase: "insert_edges", + sessionId: params.sessionId, + table: params.edgesTable, + sql: insertEdgesSql + }); + } + } + return { nodes: nodeRows.length, edges: edgeRows.length }; +} + +// dist/src/hooks/memory-facts.js +import { randomUUID as randomUUID3 } from "node:crypto"; +var MEMORY_FACT_PROMPT_TEMPLATE = `You are extracting durable long-term memory facts from a session summary. + +SESSION ID: __SESSION_ID__ +SOURCE PATH: __SOURCE_PATH__ +PROJECT: __PROJECT__ + +SUMMARY MARKDOWN: +__SUMMARY_TEXT__ + +Return ONLY valid JSON with this exact shape: +{"facts":[{"subject":"canonical entity","subject_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","subject_aliases":["optional alias"],"predicate":"snake_case_relation","object":"canonical object text","object_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","object_aliases":["optional alias"],"summary":"short factual claim","evidence":"short supporting phrase","confidence":0.0,"valid_at":"optional date/time text","valid_from":"optional date/time text","valid_to":"optional date/time text"}]} + +Rules: +- Extract atomic facts that are useful for later recall. One durable claim per fact. +- Prefer canonical names for repeated people, organizations, places, projects, tools, and artifacts. +- Use relation-style predicates such as works_on, home_country, relationship_status, prefers, plans, decided_to_pursue, located_in, uses_tool, recommended, supports, owns, read, attends, moved_from, moved_to. +- Facts should preserve temporal history instead of overwriting it. If the summary says something changed, emit the new fact and include timing in valid_at / valid_from / valid_to when the summary supports it. +- Include assistant-confirmed or tool-confirmed actions when they are stated as completed facts in the summary. +- Do not invent facts that are not supported by the summary. +- Avoid duplicates or near-duplicates. If two facts say the same thing, keep the more specific one. +- Return no markdown, no prose, no code fences, only JSON.`; +function stripCodeFences2(text) { + const trimmed = text.trim(); + const fenceMatch = trimmed.match(/^```(?:json)?\s*([\s\S]*?)\s*```$/i); + return fenceMatch ? fenceMatch[1].trim() : trimmed; +} +function normalizeString2(value) { + return typeof value === "string" ? value.trim() : ""; +} +function normalizeAliases(value) { + if (!Array.isArray(value)) + return []; + return value.map(normalizeString2).filter(Boolean).filter((item, index, arr) => arr.indexOf(item) === index); +} +function normalizeFactType(value) { + return normalizeString2(value) || "other"; +} +function normalizeConfidence(value) { + if (typeof value === "number" && Number.isFinite(value)) { + return Math.max(0, Math.min(1, value)); + } + if (typeof value === "string" && value.trim() !== "") { + const parsed = Number(value); + if (Number.isFinite(parsed)) + return Math.max(0, Math.min(1, parsed)); + } + return void 0; +} +function slugify2(value) { + return value.normalize("NFKD").replace(/[^\w\s-]/g, "").trim().toLowerCase().replace(/[\s-]+/g, "_").replace(/^_+|_+$/g, "") || "item"; +} +function buildFactId(sessionId, fact, index) { + return [ + "fact", + slugify2(sessionId), + String(index + 1), + slugify2(fact.subject), + slugify2(fact.predicate), + slugify2(fact.object) + ].join(":"); +} +function buildFactSearchText(fact) { + return [ + fact.subject, + ...fact.subjectAliases ?? [], + fact.predicate, + fact.object, + ...fact.objectAliases ?? [], + fact.summary ?? "", + fact.evidence ?? "", + fact.validAt ?? "", + fact.validFrom ?? "", + fact.validTo ?? "" + ].filter(Boolean).join(" | "); +} +function buildEntitySearchText(entity) { + return [ + entity.canonicalName, + entity.entityType, + ...entity.aliases, + ...entity.searchTerms, + ...entity.summaries + ].filter(Boolean).join(" | "); +} +function mergeDelimited(existing, nextValues) { + const merged = new Set(existing.split(",").map((value) => value.trim()).filter(Boolean)); + for (const value of nextValues) { + const trimmed = value.trim(); + if (!trimmed) + continue; + merged.add(trimmed); + } + return [...merged].join(", "); +} +function mergePipeDelimited(existing, nextValues, maxItems = 8) { + const merged = new Set(existing.split("|").map((value) => value.trim()).filter(Boolean)); + for (const value of nextValues) { + const trimmed = value.trim(); + if (!trimmed) + continue; + if (merged.has(trimmed)) + continue; + if (merged.size >= maxItems) + break; + merged.add(trimmed); + } + return [...merged].join(" | "); +} +function wrapFactsPhaseError(error, args) { + const wrapped = new Error(`facts ${args.phase} failed for session ${args.sessionId} on table ${args.table}: ${error instanceof Error ? error.message : String(error)}`); + wrapped.cause = error; + wrapped.phase = args.phase; + wrapped.sessionId = args.sessionId; + wrapped.table = args.table; + wrapped.sql = args.sql; + return wrapped; +} +function buildEntityAggregate(entityMap, args) { + const entityId = buildGraphNodeId(args.name, args.type); + const existing = entityMap.get(entityId); + if (existing) { + for (const alias of args.aliases) + existing.aliases.add(alias); + if (args.summary) + existing.summaries.add(args.summary); + if (args.searchText) + existing.searchTerms.add(args.searchText); + return existing; + } + const created = { + entityId, + canonicalName: args.name, + entityType: args.type || "other", + aliases: new Set(args.aliases), + summaries: new Set(args.summary ? [args.summary] : []), + searchTerms: new Set(args.searchText ? [args.searchText] : []) + }; + entityMap.set(entityId, created); + return created; +} +async function upsertEntities(params) { + let upserts = 0; + const path = `/facts/entities/${params.userName}.jsonl`; + const filename = `${params.userName}.jsonl`; + for (const entity of params.entityMap.values()) { + const aliases = [...entity.aliases].filter((alias) => alias !== entity.canonicalName); + const entitySummary = [...entity.summaries].join(" | ") || entity.canonicalName; + const searchText = buildEntitySearchText(entity); + const existingRows = await params.query(`SELECT id, aliases, summary, search_text, source_session_ids, source_paths, entity_type FROM "${params.entitiesTable}" WHERE entity_id = '${esc(entity.entityId)}' LIMIT 1`); + if (existingRows.length === 0) { + const insertSql = `INSERT INTO "${params.entitiesTable}" (id, path, filename, entity_id, canonical_name, entity_type, aliases, summary, search_text, source_session_ids, source_paths, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ('${randomUUID3()}', '${esc(path)}', '${esc(filename)}', '${esc(entity.entityId)}', '${esc(entity.canonicalName)}', '${esc(entity.entityType)}', '${esc(aliases.join(", "))}', E'${esc(entitySummary)}', E'${esc(searchText)}', '${esc(params.sessionId)}', '${esc(params.sourcePath)}', '${esc(params.userName)}', 'application/json', ${Buffer.byteLength(searchText, "utf-8")}, '${esc(params.project)}', E'${esc(buildSummaryBlurb(entitySummary))}', '${esc(params.agent)}', '${params.ts}', '${params.ts}')`; + await params.query(insertSql); + upserts += 1; + continue; + } + const existing = existingRows[0]; + const mergedAliases = mergeDelimited(String(existing["aliases"] ?? ""), aliases); + const mergedSummary = mergePipeDelimited(String(existing["summary"] ?? ""), entity.summaries, 10) || entitySummary; + const mergedSearchText = mergePipeDelimited(String(existing["search_text"] ?? ""), [searchText], 12) || searchText; + const mergedSessionIds = mergeDelimited(String(existing["source_session_ids"] ?? ""), [params.sessionId]); + const mergedSourcePaths = mergeDelimited(String(existing["source_paths"] ?? ""), [params.sourcePath]); + const existingType = normalizeString2(existing["entity_type"]); + const entityType = existingType && existingType !== "other" ? existingType : entity.entityType; + const updateSql = `UPDATE "${params.entitiesTable}" SET canonical_name = '${esc(entity.canonicalName)}', entity_type = '${esc(entityType)}', aliases = '${esc(mergedAliases)}', summary = E'${esc(mergedSummary)}', search_text = E'${esc(mergedSearchText)}', source_session_ids = '${esc(mergedSessionIds)}', source_paths = '${esc(mergedSourcePaths)}', size_bytes = ${Buffer.byteLength(mergedSearchText, "utf-8")}, project = '${esc(params.project)}', description = E'${esc(buildSummaryBlurb(mergedSummary))}', agent = '${esc(params.agent)}', last_update_date = '${params.ts}' WHERE entity_id = '${esc(entity.entityId)}'`; + await params.query(updateSql); + upserts += 1; + } + return upserts; +} +function parseMemoryFactExtraction(raw) { + const cleaned = stripCodeFences2(raw); + const parsed = JSON.parse(cleaned); + const facts = Array.isArray(parsed["facts"]) ? parsed["facts"] : []; + const dedupe = /* @__PURE__ */ new Set(); + return { + facts: facts.map((fact) => ({ + subject: normalizeString2(fact["subject"]), + subjectType: normalizeFactType(fact["subject_type"]), + subjectAliases: normalizeAliases(fact["subject_aliases"]), + predicate: normalizeString2(fact["predicate"]).replace(/\s+/g, "_").toLowerCase(), + object: normalizeString2(fact["object"]), + objectType: normalizeFactType(fact["object_type"]), + objectAliases: normalizeAliases(fact["object_aliases"]), + summary: normalizeString2(fact["summary"]), + evidence: normalizeString2(fact["evidence"]), + confidence: normalizeConfidence(fact["confidence"]), + validAt: normalizeString2(fact["valid_at"]), + validFrom: normalizeString2(fact["valid_from"]), + validTo: normalizeString2(fact["valid_to"]) + })).filter((fact) => fact.subject && fact.predicate && fact.object).filter((fact) => { + const key = `${fact.subject}::${fact.predicate}::${fact.object}`; + if (dedupe.has(key)) + return false; + dedupe.add(key); + return true; + }) + }; +} +function buildMemoryFactPrompt(args) { + return (args.template ?? MEMORY_FACT_PROMPT_TEMPLATE).replace(/__SUMMARY_TEXT__/g, args.summaryText).replace(/__SESSION_ID__/g, args.sessionId).replace(/__SOURCE_PATH__/g, args.sourcePath).replace(/__PROJECT__/g, args.project); +} +async function replaceSessionFacts(params) { + const ts = params.ts ?? (/* @__PURE__ */ new Date()).toISOString(); + const factPath = `/facts/${params.userName}/${params.sessionId}.jsonl`; + const linkPath = `/facts/links/${params.userName}/${params.sessionId}.jsonl`; + const factFilename = `${params.sessionId}.jsonl`; + const linkFilename = `${params.sessionId}.jsonl`; + const deleteFactsSql = `DELETE FROM "${params.factsTable}" WHERE source_session_id = '${esc(params.sessionId)}'`; + const deleteLinksSql = `DELETE FROM "${params.linksTable}" WHERE source_session_id = '${esc(params.sessionId)}'`; + try { + await params.query(deleteFactsSql); + } catch (error) { + throw wrapFactsPhaseError(error, { + phase: "delete_facts", + sessionId: params.sessionId, + table: params.factsTable, + sql: deleteFactsSql + }); + } + try { + await params.query(deleteLinksSql); + } catch (error) { + throw wrapFactsPhaseError(error, { + phase: "delete_links", + sessionId: params.sessionId, + table: params.linksTable, + sql: deleteLinksSql + }); + } + const entityMap = /* @__PURE__ */ new Map(); + const factRows = params.extraction.facts.map((fact, index) => { + const summary = fact.summary || `${fact.subject} ${fact.predicate.replace(/_/g, " ")} ${fact.object}`; + const searchText = buildFactSearchText(fact); + const subjectEntity = buildEntityAggregate(entityMap, { + name: fact.subject, + type: fact.subjectType || "other", + aliases: fact.subjectAliases ?? [], + summary, + searchText + }); + const objectEntity = buildEntityAggregate(entityMap, { + name: fact.object, + type: fact.objectType || "other", + aliases: fact.objectAliases ?? [], + summary, + searchText + }); + return { + factId: buildFactId(params.sessionId, fact, index), + subjectEntityId: subjectEntity.entityId, + subjectName: fact.subject, + subjectType: fact.subjectType || "other", + objectEntityId: objectEntity.entityId, + objectName: fact.object, + objectType: fact.objectType || "other", + predicate: fact.predicate, + summary, + evidence: fact.evidence || "", + searchText, + confidence: fact.confidence == null ? "" : String(fact.confidence), + validAt: fact.validAt || "", + validFrom: fact.validFrom || "", + validTo: fact.validTo || "" + }; + }); + try { + await upsertEntities({ + query: params.query, + entitiesTable: params.entitiesTable, + entityMap, + userName: params.userName, + project: params.project, + agent: params.agent, + sourcePath: params.sourcePath, + sessionId: params.sessionId, + ts + }); + } catch (error) { + throw wrapFactsPhaseError(error, { + phase: "upsert_entities", + sessionId: params.sessionId, + table: params.entitiesTable, + sql: `UPSERT entities for ${params.sessionId}` + }); + } + if (factRows.length > 0) { + const values = factRows.map((row) => `('${randomUUID3()}', '${esc(factPath)}', '${esc(factFilename)}', '${esc(row.factId)}', '${esc(row.subjectEntityId)}', '${esc(row.subjectName)}', '${esc(row.subjectType)}', '${esc(row.predicate)}', '${esc(row.objectEntityId)}', '${esc(row.objectName)}', '${esc(row.objectType)}', E'${esc(row.summary)}', E'${esc(row.evidence)}', E'${esc(row.searchText)}', '${esc(row.confidence)}', '${esc(row.validAt)}', '${esc(row.validFrom)}', '${esc(row.validTo)}', '${esc(params.sessionId)}', '${esc(params.sourcePath)}', '${esc(params.userName)}', 'application/json', ${Buffer.byteLength(row.searchText, "utf-8")}, '${esc(params.project)}', E'${esc(buildSummaryBlurb(row.summary))}', '${esc(params.agent)}', '${ts}', '${ts}')`); + const insertFactsSql = `INSERT INTO "${params.factsTable}" (id, path, filename, fact_id, subject_entity_id, subject_name, subject_type, predicate, object_entity_id, object_name, object_type, summary, evidence, search_text, confidence, valid_at, valid_from, valid_to, source_session_id, source_path, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ${values.join(", ")}`; + try { + await params.query(insertFactsSql); + } catch (error) { + throw wrapFactsPhaseError(error, { + phase: "insert_facts", + sessionId: params.sessionId, + table: params.factsTable, + sql: insertFactsSql + }); + } + } + const linkRows = factRows.flatMap((row) => [ + { + linkId: `${row.factId}:subject:${row.subjectEntityId}`, + factId: row.factId, + entityId: row.subjectEntityId, + entityRole: "subject" + }, + { + linkId: `${row.factId}:object:${row.objectEntityId}`, + factId: row.factId, + entityId: row.objectEntityId, + entityRole: "object" + } + ]); + if (linkRows.length > 0) { + const values = linkRows.map((row) => `('${randomUUID3()}', '${esc(linkPath)}', '${esc(linkFilename)}', '${esc(row.linkId)}', '${esc(row.factId)}', '${esc(row.entityId)}', '${esc(row.entityRole)}', '${esc(params.sessionId)}', '${esc(params.sourcePath)}', '${esc(params.userName)}', 'application/json', ${Buffer.byteLength(row.linkId, "utf-8")}, '${esc(params.project)}', 'fact entity link', '${esc(params.agent)}', '${ts}', '${ts}')`); + const insertLinksSql = `INSERT INTO "${params.linksTable}" (id, path, filename, link_id, fact_id, entity_id, entity_role, source_session_id, source_path, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ${values.join(", ")}`; + try { + await params.query(insertLinksSql); + } catch (error) { + throw wrapFactsPhaseError(error, { + phase: "insert_links", + sessionId: params.sessionId, + table: params.linksTable, + sql: insertLinksSql + }); + } + } + return { + facts: factRows.length, + entities: entityMap.size, + links: linkRows.length + }; +} + // dist/src/hooks/codex/wiki-worker.js var cfg = JSON.parse(readFileSync2(process.argv[2], "utf-8")); var tmpDir = cfg.tmpDir; @@ -265,6 +786,73 @@ async function main() { text }); wlog(`uploaded ${vpath} (summary=${result.summaryLength}, desc=${result.descLength})`); + try { + const graphPrompt = buildKnowledgeGraphPrompt({ + summaryText: text, + sessionId: cfg.sessionId, + sourcePath: jsonlServerPath, + project: cfg.project, + template: cfg.graphPromptTemplate + }); + const graphRaw = execFileSync(cfg.codexBin, [ + "exec", + "--dangerously-bypass-approvals-and-sandbox", + graphPrompt + ], { + stdio: ["ignore", "pipe", "pipe"], + timeout: 12e4, + env: { ...process.env, HIVEMIND_WIKI_WORKER: "1", HIVEMIND_CAPTURE: "false" } + }).toString("utf-8"); + const graph = parseGraphExtraction(graphRaw); + const graphResult = await replaceSessionGraph({ + query, + nodesTable: cfg.graphNodesTable, + edgesTable: cfg.graphEdgesTable, + sessionId: cfg.sessionId, + userName: cfg.userName, + project: cfg.project, + agent: "codex", + sourcePath: jsonlServerPath, + graph + }); + wlog(`graph updated nodes=${graphResult.nodes} edges=${graphResult.edges}`); + } catch (e) { + wlog(`graph update failed: ${e.message}`); + } + try { + const factPrompt = buildMemoryFactPrompt({ + summaryText: text, + sessionId: cfg.sessionId, + sourcePath: jsonlServerPath, + project: cfg.project, + template: cfg.factPromptTemplate + }); + const factsRaw = execFileSync(cfg.codexBin, [ + "exec", + "--dangerously-bypass-approvals-and-sandbox", + factPrompt + ], { + stdio: ["ignore", "pipe", "pipe"], + timeout: 12e4, + env: { ...process.env, HIVEMIND_WIKI_WORKER: "1", HIVEMIND_CAPTURE: "false" } + }).toString("utf-8"); + const extraction = parseMemoryFactExtraction(factsRaw); + const factResult = await replaceSessionFacts({ + query, + factsTable: cfg.factsTable, + entitiesTable: cfg.entitiesTable, + linksTable: cfg.factEntityLinksTable, + sessionId: cfg.sessionId, + userName: cfg.userName, + project: cfg.project, + agent: "codex", + sourcePath: jsonlServerPath, + extraction + }); + wlog(`facts updated facts=${factResult.facts} entities=${factResult.entities} links=${factResult.links}`); + } catch (e) { + wlog(`fact update failed: ${e.message}`); + } try { finalizeSummary(cfg.sessionId, jsonlLines); wlog(`sidecar updated: lastSummaryCount=${jsonlLines}`); diff --git a/package-lock.json b/package-lock.json index 7ec599d..6c1e471 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8,6 +8,7 @@ "name": "hivemind", "version": "0.6.37", "dependencies": { + "@huggingface/transformers": "^4.1.0", "deeplake": "^0.3.30", "just-bash": "^2.14.0", "yargs-parser": "^22.0.0" @@ -1080,6 +1081,16 @@ "tslib": "^2.4.0" } }, + "node_modules/@emnapi/runtime": { + "version": "1.10.0", + "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.10.0.tgz", + "integrity": "sha512-ewvYlk86xUoGI0zQRNq/mC+16R1QeDlKQy21Ki3oSYXNgLb45GV1P6A0M+/s6nyCuNDqe5VpaY84BzXGwVbwFA==", + "license": "MIT", + "optional": true, + "dependencies": { + "tslib": "^2.4.0" + } + }, "node_modules/@esbuild/aix-ppc64": { "version": "0.28.0", "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.28.0.tgz", @@ -1522,12 +1533,39 @@ "node": ">=18" } }, + "node_modules/@huggingface/jinja": { + "version": "0.5.7", + "resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.5.7.tgz", + "integrity": "sha512-OosMEbF/R6zkKNNzqhI7kvKYCpo1F0UeIv46/h4D4UjVEKKd6k3TiV8sgu6fkreX4lbBiRI+lZG8UnXnqVQmEQ==", + "license": "MIT", + "engines": { + "node": ">=18" + } + }, + "node_modules/@huggingface/tokenizers": { + "version": "0.1.3", + "resolved": "https://registry.npmjs.org/@huggingface/tokenizers/-/tokenizers-0.1.3.tgz", + "integrity": "sha512-8rF/RRT10u+kn7YuUbUg0OF30K8rjTc78aHpxT+qJ1uWSqxT1MHi8+9ltwYfkFYJzT/oS+qw3JVfHtNMGAdqyA==", + "license": "Apache-2.0" + }, + "node_modules/@huggingface/transformers": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/@huggingface/transformers/-/transformers-4.1.0.tgz", + "integrity": "sha512-WiMf9eyvF6V2pj4gs12A7GQV3svyFIBtB/W+Hn5lT5E5DyqWUno1ZrWoAfJv69X1RNv/0GoOo6DFmL6NOYd+rg==", + "license": "Apache-2.0", + "dependencies": { + "@huggingface/jinja": "^0.5.6", + "@huggingface/tokenizers": "^0.1.3", + "onnxruntime-node": "1.24.3", + "onnxruntime-web": "1.26.0-dev.20260410-5e55544225", + "sharp": "^0.34.5" + } + }, "node_modules/@img/colour": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/@img/colour/-/colour-1.1.0.tgz", "integrity": "sha512-Td76q7j57o/tLVdgS746cYARfSyxk8iEfRxewL9h4OMzYhbW4TAcppl0mT4eyqXddh6L/jwoM75mo7ixa/pCeQ==", "license": "MIT", - "optional": true, "engines": { "node": ">=18" } @@ -2108,6 +2146,70 @@ "url": "https://github.com/sponsors/Boshen" } }, + "node_modules/@protobufjs/aspromise": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz", + "integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/base64": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz", + "integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/codegen": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.4.tgz", + "integrity": "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/eventemitter": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz", + "integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/fetch": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz", + "integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==", + "license": "BSD-3-Clause", + "dependencies": { + "@protobufjs/aspromise": "^1.1.1", + "@protobufjs/inquire": "^1.1.0" + } + }, + "node_modules/@protobufjs/float": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz", + "integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/inquire": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.0.tgz", + "integrity": "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/path": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz", + "integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/pool": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz", + "integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/utf8": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz", + "integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==", + "license": "BSD-3-Clause" + }, "node_modules/@rolldown/binding-android-arm64": { "version": "1.0.0-rc.13", "resolved": "https://registry.npmjs.org/@rolldown/binding-android-arm64/-/binding-android-arm64-1.0.0-rc.13.tgz", @@ -3221,9 +3323,7 @@ "version": "25.5.2", "resolved": "https://registry.npmjs.org/@types/node/-/node-25.5.2.tgz", "integrity": "sha512-tO4ZIRKNC+MDWV4qKVZe3Ql/woTnmHDr5JD8UI5hn2pwBrHEwOEMZK7WlNb5RKB6EoJ02gwmQS9OrjuFnZYdpg==", - "dev": true, "license": "MIT", - "peer": true, "dependencies": { "undici-types": "~7.18.0" } @@ -3241,7 +3341,6 @@ "integrity": "sha512-/MBdrkA8t6hbdCWFKs09dPik774xvs4Z6L4bycdCxYNLHM8oZuRyosumQMG19LUlBsB6GeVpL1q4kFFazvyKGA==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@bcoe/v8-coverage": "^1.0.2", "@vitest/utils": "4.1.3", @@ -3380,6 +3479,15 @@ "url": "https://opencollective.com/vitest" } }, + "node_modules/adm-zip": { + "version": "0.5.17", + "resolved": "https://registry.npmjs.org/adm-zip/-/adm-zip-0.5.17.tgz", + "integrity": "sha512-+Ut8d9LLqwEvHHJl1+PIHqoyDxFgVN847JTVM3Izi3xHDWPE4UtzzXysMZQs64DMcrJfBeS/uoEP4AD3HQHnQQ==", + "license": "MIT", + "engines": { + "node": ">=12.0" + } + }, "node_modules/amdefine": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/amdefine/-/amdefine-1.0.1.tgz", @@ -3495,6 +3603,13 @@ "readable-stream": "^3.4.0" } }, + "node_modules/boolean": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/boolean/-/boolean-3.2.0.tgz", + "integrity": "sha512-d0II/GO9uf9lfUHH2BQsjxzRJZBdsjgsBiW4BvhWk/3qoKwQFjIDVN19PfX8F2D/r9PCMTtLWjYVCFrpeYUzsw==", + "deprecated": "Package no longer supported. Contact Support at https://www.npmjs.com/support for more info.", + "license": "MIT" + }, "node_modules/bowser": { "version": "2.14.1", "resolved": "https://registry.npmjs.org/bowser/-/bowser-2.14.1.tgz", @@ -3686,16 +3801,55 @@ "sharp": "^0.34.5" } }, + "node_modules/define-data-property": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz", + "integrity": "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==", + "license": "MIT", + "dependencies": { + "es-define-property": "^1.0.0", + "es-errors": "^1.3.0", + "gopd": "^1.0.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/define-properties": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/define-properties/-/define-properties-1.2.1.tgz", + "integrity": "sha512-8QmQKqEASLd5nx0U1B1okLElbUuuttJ/AnYmRXbbbGDWh6uS208EjD4Xqq/I9wK7u0v6O08XhTWnt5XtEbR6Dg==", + "license": "MIT", + "dependencies": { + "define-data-property": "^1.0.1", + "has-property-descriptors": "^1.0.0", + "object-keys": "^1.1.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/detect-libc": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz", "integrity": "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==", - "devOptional": true, "license": "Apache-2.0", "engines": { "node": ">=8" } }, + "node_modules/detect-node": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/detect-node/-/detect-node-2.1.0.tgz", + "integrity": "sha512-T0NIuQpnTvFDATNuHN5roPwSBG83rFsuO+MXXH9/3N1eFbn4wcPjttvjMLEPWJ0RGUYgQE7cGgS3tNxbqCGM7g==", + "license": "MIT" + }, "node_modules/diff": { "version": "8.0.4", "resolved": "https://registry.npmjs.org/diff/-/diff-8.0.4.tgz", @@ -3735,6 +3889,24 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/es-define-property": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", + "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-errors": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", + "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, "node_modules/es-module-lexer": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-2.0.0.tgz", @@ -3742,6 +3914,12 @@ "dev": true, "license": "MIT" }, + "node_modules/es6-error": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/es6-error/-/es6-error-4.1.1.tgz", + "integrity": "sha512-Um/+FxMr9CISWh0bi5Zv0iOD+4cFh5qLeks1qhAopKVAJw3drgKbKySikp7wGhDL0HPeaja0P5ULZrxLkniUVg==", + "license": "MIT" + }, "node_modules/esbuild": { "version": "0.28.0", "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.28.0.tgz", @@ -3749,7 +3927,6 @@ "dev": true, "hasInstallScript": true, "license": "MIT", - "peer": true, "bin": { "esbuild": "bin/esbuild" }, @@ -3785,6 +3962,18 @@ "@esbuild/win32-x64": "0.28.0" } }, + "node_modules/escape-string-regexp": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz", + "integrity": "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==", + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/estree-walker": { "version": "3.0.3", "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-3.0.3.tgz", @@ -3893,6 +4082,12 @@ "url": "https://github.com/sindresorhus/file-type?sponsor=1" } }, + "node_modules/flatbuffers": { + "version": "25.9.23", + "resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-25.9.23.tgz", + "integrity": "sha512-MI1qs7Lo4Syw0EOzUl0xjs2lsoeqFku44KpngfIduHBYvzm8h2+7K8YMQh1JtVVVrUvhLpNwqVi4DERegUJhPQ==", + "license": "Apache-2.0" + }, "node_modules/fs-constants": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz", @@ -3948,12 +4143,63 @@ "license": "MIT", "optional": true }, + "node_modules/global-agent": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/global-agent/-/global-agent-3.0.0.tgz", + "integrity": "sha512-PT6XReJ+D07JvGoxQMkT6qji/jVNfX/h364XHZOWeRzy64sSFr+xJ5OX7LI3b4MPQzdL4H8Y8M0xzPpsVMwA8Q==", + "license": "BSD-3-Clause", + "dependencies": { + "boolean": "^3.0.1", + "es6-error": "^4.1.1", + "matcher": "^3.0.0", + "roarr": "^2.15.3", + "semver": "^7.3.2", + "serialize-error": "^7.0.1" + }, + "engines": { + "node": ">=10.0" + } + }, + "node_modules/globalthis": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/globalthis/-/globalthis-1.0.4.tgz", + "integrity": "sha512-DpLKbNU4WylpxJykQujfCcwYWiV/Jhm50Goo0wrVILAv5jOr9d+H+UR3PhSCD2rCCEIg0uc+G+muBTwD54JhDQ==", + "license": "MIT", + "dependencies": { + "define-properties": "^1.2.1", + "gopd": "^1.0.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/gopd": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", + "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/graceful-readlink": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/graceful-readlink/-/graceful-readlink-1.0.1.tgz", "integrity": "sha512-8tLu60LgxF6XpdbK8OW3FA+IfTNBn1ZHGHKF4KQbEeSkajYw5PlYJcKluntgegDPTg8UkHjpet1T82vk6TQ68w==", "license": "MIT" }, + "node_modules/guid-typescript": { + "version": "1.0.9", + "resolved": "https://registry.npmjs.org/guid-typescript/-/guid-typescript-1.0.9.tgz", + "integrity": "sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ==", + "license": "ISC" + }, "node_modules/has-flag": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", @@ -3964,6 +4210,18 @@ "node": ">=8" } }, + "node_modules/has-property-descriptors": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.2.tgz", + "integrity": "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==", + "license": "MIT", + "dependencies": { + "es-define-property": "^1.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/html-escaper": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/html-escaper/-/html-escaper-2.0.2.tgz", @@ -4085,6 +4343,12 @@ "dev": true, "license": "MIT" }, + "node_modules/json-stringify-safe": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz", + "integrity": "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA==", + "license": "ISC" + }, "node_modules/just-bash": { "version": "2.14.0", "resolved": "https://registry.npmjs.org/just-bash/-/just-bash-2.14.0.tgz", @@ -4466,6 +4730,12 @@ "url": "https://github.com/chalk/slice-ansi?sponsor=1" } }, + "node_modules/long": { + "version": "5.3.2", + "resolved": "https://registry.npmjs.org/long/-/long-5.3.2.tgz", + "integrity": "sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==", + "license": "Apache-2.0" + }, "node_modules/magic-string": { "version": "0.30.21", "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.21.tgz", @@ -4504,6 +4774,18 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/matcher": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/matcher/-/matcher-3.0.0.tgz", + "integrity": "sha512-OkeDaAZ/bQCxeFAozM55PKcKU0yJMPGifLwV4Qgjitu+5MoAfSQN4lsLJeXZ1b8w0x+/Emda6MZgXS1jvsapng==", + "license": "MIT", + "dependencies": { + "escape-string-regexp": "^4.0.0" + }, + "engines": { + "node": ">=10" + } + }, "node_modules/mimic-function": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/mimic-function/-/mimic-function-5.0.1.tgz", @@ -4660,6 +4942,15 @@ "url": "https://github.com/sponsors/oorabona" } }, + "node_modules/object-keys": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/object-keys/-/object-keys-1.1.1.tgz", + "integrity": "sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, "node_modules/obug": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/obug/-/obug-2.1.1.tgz", @@ -4697,6 +4988,49 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/onnxruntime-common": { + "version": "1.24.3", + "resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.24.3.tgz", + "integrity": "sha512-GeuPZO6U/LBJXvwdaqHbuUmoXiEdeCjWi/EG7Y1HNnDwJYuk6WUbNXpF6luSUY8yASul3cmUlLGrCCL1ZgVXqA==", + "license": "MIT" + }, + "node_modules/onnxruntime-node": { + "version": "1.24.3", + "resolved": "https://registry.npmjs.org/onnxruntime-node/-/onnxruntime-node-1.24.3.tgz", + "integrity": "sha512-JH7+czbc8ALA819vlTgcV+Q214/+VjGeBHDjX81+ZCD0PCVCIFGFNtT0V4sXG/1JXypKPgScQcB3ij/hk3YnTg==", + "hasInstallScript": true, + "license": "MIT", + "os": [ + "win32", + "darwin", + "linux" + ], + "dependencies": { + "adm-zip": "^0.5.16", + "global-agent": "^3.0.0", + "onnxruntime-common": "1.24.3" + } + }, + "node_modules/onnxruntime-web": { + "version": "1.26.0-dev.20260410-5e55544225", + "resolved": "https://registry.npmjs.org/onnxruntime-web/-/onnxruntime-web-1.26.0-dev.20260410-5e55544225.tgz", + "integrity": "sha512-hHd9n8DzIfGSAjM4Dvslesc8i6h9HEEcl8qt7X3LfhUxMgls6FBJ32j2xrDtJjKJFEehFeJmyB/pvad1I8KS8w==", + "license": "MIT", + "dependencies": { + "flatbuffers": "^25.1.24", + "guid-typescript": "^1.0.9", + "long": "^5.2.3", + "onnxruntime-common": "1.24.0-dev.20251116-b39e144322", + "platform": "^1.3.6", + "protobufjs": "^7.2.4" + } + }, + "node_modules/onnxruntime-web/node_modules/onnxruntime-common": { + "version": "1.24.0-dev.20251116-b39e144322", + "resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.24.0-dev.20251116-b39e144322.tgz", + "integrity": "sha512-BOoomdHYmNRL5r4iQ4bMvsl2t0/hzVQ3OM3PHD0gxeXu1PmggqBv3puZicEUVOA3AtHHYmqZtjMj9FOfGrATTw==", + "license": "MIT" + }, "node_modules/papaparse": { "version": "5.5.3", "resolved": "https://registry.npmjs.org/papaparse/-/papaparse-5.5.3.tgz", @@ -4725,6 +5059,34 @@ "dev": true, "license": "MIT" }, + "node_modules/pg": { + "version": "8.20.0", + "resolved": "https://registry.npmjs.org/pg/-/pg-8.20.0.tgz", + "integrity": "sha512-ldhMxz2r8fl/6QkXnBD3CR9/xg694oT6DZQ2s6c/RI28OjtSOpxnPrUCGOBJ46RCUxcWdx3p6kw/xnDHjKvaRA==", + "license": "MIT", + "optional": true, + "dependencies": { + "pg-connection-string": "^2.12.0", + "pg-pool": "^3.13.0", + "pg-protocol": "^1.13.0", + "pg-types": "2.2.0", + "pgpass": "1.0.5" + }, + "engines": { + "node": ">= 16.0.0" + }, + "optionalDependencies": { + "pg-cloudflare": "^1.3.0" + }, + "peerDependencies": { + "pg-native": ">=3.0.1" + }, + "peerDependenciesMeta": { + "pg-native": { + "optional": true + } + } + }, "node_modules/pg-cloudflare": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/pg-cloudflare/-/pg-cloudflare-1.3.0.tgz", @@ -4813,6 +5175,12 @@ "url": "https://github.com/sponsors/jonschlinkert" } }, + "node_modules/platform": { + "version": "1.3.6", + "resolved": "https://registry.npmjs.org/platform/-/platform-1.3.6.tgz", + "integrity": "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg==", + "license": "MIT" + }, "node_modules/postcss": { "version": "8.5.8", "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.8.tgz", @@ -4913,6 +5281,30 @@ "node": ">=10" } }, + "node_modules/protobufjs": { + "version": "7.5.5", + "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.5.5.tgz", + "integrity": "sha512-3wY1AxV+VBNW8Yypfd1yQY9pXnqTAN+KwQxL8iYm3/BjKYMNg4i0owhEe26PWDOMaIrzeeF98Lqd5NGz4omiIg==", + "hasInstallScript": true, + "license": "BSD-3-Clause", + "dependencies": { + "@protobufjs/aspromise": "^1.1.2", + "@protobufjs/base64": "^1.1.2", + "@protobufjs/codegen": "^2.0.4", + "@protobufjs/eventemitter": "^1.1.0", + "@protobufjs/fetch": "^1.1.0", + "@protobufjs/float": "^1.0.2", + "@protobufjs/inquire": "^1.1.0", + "@protobufjs/path": "^1.1.2", + "@protobufjs/pool": "^1.1.0", + "@protobufjs/utf8": "^1.1.0", + "@types/node": ">=13.7.0", + "long": "^5.0.0" + }, + "engines": { + "node": ">=12.0.0" + } + }, "node_modules/pump": { "version": "3.0.4", "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.4.tgz", @@ -5027,6 +5419,23 @@ "dev": true, "license": "MIT" }, + "node_modules/roarr": { + "version": "2.15.4", + "resolved": "https://registry.npmjs.org/roarr/-/roarr-2.15.4.tgz", + "integrity": "sha512-CHhPh+UNHD2GTXNYhPWLnU8ONHdI+5DI+4EYIAOaiD63rHeYlZvyh8P+in5999TTSFgUYuKUAjzRI4mdh/p+2A==", + "license": "BSD-3-Clause", + "dependencies": { + "boolean": "^3.0.1", + "detect-node": "^2.0.4", + "globalthis": "^1.0.1", + "json-stringify-safe": "^5.0.1", + "semver-compare": "^1.0.0", + "sprintf-js": "^1.1.2" + }, + "engines": { + "node": ">=8.0" + } + }, "node_modules/rolldown": { "version": "1.0.0-rc.13", "resolved": "https://registry.npmjs.org/rolldown/-/rolldown-1.0.0-rc.13.tgz", @@ -5086,7 +5495,6 @@ "version": "7.7.4", "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz", "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==", - "devOptional": true, "license": "ISC", "bin": { "semver": "bin/semver.js" @@ -5095,13 +5503,33 @@ "node": ">=10" } }, + "node_modules/semver-compare": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/semver-compare/-/semver-compare-1.0.0.tgz", + "integrity": "sha512-YM3/ITh2MJ5MtzaM429anh+x2jiLVjqILF4m4oyQB18W7Ggea7BfqdH/wGMK7dDiMghv/6WG7znWMwUDzJiXow==", + "license": "MIT" + }, + "node_modules/serialize-error": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/serialize-error/-/serialize-error-7.0.1.tgz", + "integrity": "sha512-8I8TjW5KMOKsZQTvoxjuSIa7foAwPWGOts+6o7sgjz41/qMD9VQHEDxi6PBvK2l0MXUmqZyNpUK+T2tQaaElvw==", + "license": "MIT", + "dependencies": { + "type-fest": "^0.13.1" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/sharp": { "version": "0.34.5", "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.34.5.tgz", "integrity": "sha512-Ou9I5Ft9WNcCbXrU9cMgPBcCK8LiwLqcbywW3t4oDV37n1pzpuNLsYiAV8eODnjbtQlSDwZ2cUEeQz4E54Hltg==", "hasInstallScript": true, "license": "Apache-2.0", - "optional": true, "dependencies": { "@img/colour": "^1.0.0", "detect-libc": "^2.1.2", @@ -6015,6 +6443,18 @@ "npm": ">=9" } }, + "node_modules/type-fest": { + "version": "0.13.1", + "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.13.1.tgz", + "integrity": "sha512-34R7HTnG0XIJcBSn5XhDd7nNFPRcXYRZrBB2O2jdKqYODldSzBAqzsWoZYYvduky73toYS/ESqxPvkDf/F0XMg==", + "license": "(MIT OR CC0-1.0)", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/typescript": { "version": "6.0.2", "resolved": "https://registry.npmjs.org/typescript/-/typescript-6.0.2.tgz", @@ -6045,7 +6485,6 @@ "version": "7.18.2", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.18.2.tgz", "integrity": "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==", - "dev": true, "license": "MIT" }, "node_modules/util-deprecate": { @@ -6061,7 +6500,6 @@ "integrity": "sha512-P1PbweD+2/udplnThz3btF4cf6AgPky7kk23RtHUkJIU5BIxwPprhRGmOAHs6FTI7UiGbTNrgNP6jSYD6JaRnw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "lightningcss": "^1.32.0", "picomatch": "^4.0.4", @@ -6140,7 +6578,6 @@ "integrity": "sha512-DBc4Tx0MPNsqb9isoyOq00lHftVx/KIU44QOm2q59npZyLUkENn8TMFsuzuO+4U2FUa9rgbbPt3udrP25GcjXw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@vitest/expect": "4.1.3", "@vitest/mocker": "4.1.3", diff --git a/package.json b/package.json index 81f2b6c..e4bb544 100644 --- a/package.json +++ b/package.json @@ -10,6 +10,10 @@ "build": "tsc && node esbuild.config.mjs", "bundle": "node esbuild.config.mjs", "dev": "tsc --watch", + "embeddings:backfill": "node --import tsx src/tools/backfill-harrier-embeddings.ts", + "embeddings:embed:python": "UV_CACHE_DIR=.uv-cache uv run python scripts/backfill_harrier_embeddings.py embed", + "embeddings:run:python": "UV_CACHE_DIR=.uv-cache uv run python scripts/backfill_harrier_embeddings.py run", + "embeddings:upload:python": "UV_CACHE_DIR=.uv-cache uv run python scripts/backfill_harrier_embeddings.py upload", "shell": "tsx src/shell/deeplake-shell.ts", "test": "vitest run", "typecheck": "tsc --noEmit", @@ -23,6 +27,7 @@ "*.md": [] }, "dependencies": { + "@huggingface/transformers": "^4.1.0", "deeplake": "^0.3.30", "just-bash": "^2.14.0", "yargs-parser": "^22.0.0" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..bc2bb47 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,14 @@ +[project] +name = "hivemind-harrier-backfill" +version = "0.1.0" +description = "Local Harrier embedding backfill helpers for Deeplake tables" +requires-python = ">=3.11" +dependencies = [ + "numpy>=1.26", + "safetensors>=0.4", + "torch>=2.4", + "transformers>=4.57", +] + +[tool.uv] +package = false diff --git a/scripts/__pycache__/backfill_harrier_embeddings.cpython-312.pyc b/scripts/__pycache__/backfill_harrier_embeddings.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e4d1d656a257b1bab68e50e3afe5908bf40cee31 GIT binary patch literal 60971 zcmcG%3w%^ZdMDcN_fu-A^@Ii?gai_A#2X7qfOrUugfSAhEp%FhyU?w2bF&_JHbC*ZWj`_0?D3_tjVb!RfSV@Z5d$cj21%G@8GmAJStKJ=#ZcgGO^h z!)Z8eMANVB&}zxAi|9IZ?5*$6v$vtcz~06VBYT@VOzds$FtfL%!-BUyV(qte*jO2c zh`rz5VefZzIM{Dv#M$raa529r;_ml!c$nWD@%HC+)^`puEM(EM?liX)?;7q57r=Wp_Z+tj?=@VITaI@v_dHjD zcOBQot-yONcb2QfdtJz;)9lx9D__xYRqyJ=UVk?`5FP8eZf+G)ZD47tk)|fJ@m;+b z-LZ*PV+$AJ>hP}TdbqWCZ{>Qqb$D;%&T;GU-p+-&4S4TByEfvz z6YouU@8bHn&3NzT&U0Jv-or(>dc2?F`nj!mH*f>oHoW(8gWPt!8@VBF2i{G1@5H;A zdx6`9_dag-E1Hh|+#a|GIG%e7VJ%#gYry*;YxiEXyHRfUA>?a9++i-pHRFAR8|L=m z-O63y_Tzn&yT~2D`xtkLYr*?z?lN}}@8evYJA`+eRg?Q4_ab)~@$I=|tr{-D9YOqw zMdL@fR>VKUeTh4Y_sK=my~G_u{Id(=xl!(Egr8a%uKvw^nLCa&9o)-Y8{VDVSGab( zPjjzuC-6SQ{Q~z4-p_@C+{yE+G#cKCIrKdKo|7O&g6x;Imvz;@_z?j}wKik#8WHy=CPaH#pQsCapQ zxSJo04)(;>p6lZIaEPxRj7DoOtgqcrzpg*-KNpJ)MYpeA+cgxf<3gdKNZ0vLU03)c z3qItdr{IJ&qZ*B>@~2NUA3qgrZ#mL@^hA5`NSmti$D)ku*Wu|!DH_dF*Pi-_=8yPG z^I*?z!rCAG9`M~qpOEw4$@wWcpTemw%$U0e`}?~FxQw+Y90>)7&_5XibIovLA$}lZ z8sZ20hhiCLbogvnZ)hMEj9nfIWsK-C{&Ge?%tta#3`FSc;NW?&0NY@M3r1snc%V09 z8wg#L{HSaoh5|Ci==pFYk}-7gz0r&*))hV9!evZR4CZh&V@5rFq3&45&V{1ge0YdP zIHQXVX3YJeXcSGLBxp5ixA%8l3i6@oa3mJpU50PX7`g|!P{trO@$BVTD4H=2i&pUb#eJxxlBJOhJAhhsy-vF1xd=wFoS%{FB)av~aP92`cU_%DQbREY0EueT0! zy`k1n2B7v9O(fWpaHOHA02?_h9)M3Yx=T?UJih#FH1hC!xEa`01@$~PMY3Y z&1Frst~H~zRqHeQ=w*OLG{&LdGRBL1IED$#uSWhTH6rlYQ)|&?KDw6cx)6@7-H*R> z;R~Vu@Bp_~gqrBuvt8Zi0oIXVc2)*M{bxfQ2e6FR4PDNdLPG%282>cNJ3|GRz!}xd z8O&cj_~nDM24B+Pn=p@Gp45#$_km%>BgAUD$&ntp2i`^qdO+@-Ts;t@)a>aqNj~K* z^+BIi36p%B_O{$deRe5Sc}r+AXI?#Lkd~n~O|`L=r@;Zrvx_*30Z-htdfmG1>(*to z9n~hDMk=Ep7`(_+J2Amy-Gm?+FlVe`pt(W5zbkf2!yiXHe~c2Dkvxbfz7xN?@E6?y z2YqSJzqa$r&YPC6JKk`l?3J_j+N8bq&N9JXo3ihmweL&X_oeIyX6?t4_G7}+r-aVu zQud%=2(q$sgWEootnTcDCME-%^r^fjkrU*ku4)6|wP|hv*ffAOTR0jXz|WXUaB=ZN9eS%8u7| z35M-=bZ|cBQ}%w5PthebZJLB;RC`$)%MwQ#{FWf5O~VuLR_pj6Ui|aqoQ0!qw5cmf zIGFE2c$nfU;GoTB?|9|S{26n3+U*;2q|J`ek+j=8XLpSazqVa46o^fidl#7u_+yj) z8F+3`vvdjcq*Vp{O4CMk2_2_B--H&yhmSzY3e{+>(C|hrLc4XR@QD{Sm-MGJ7qu7_ z5lD3zJ*H>I_6&_dGh3?lJWarirKc+rq1lbasf&+>xF~f@AP^A8D31jOhX$kJ7;r+6 zCV4*+ynw%GBb-r9_FMg9?uR6(He`=ZvU(9hkE!vi#kxwFWO5qziwZpe@DAYKMz=G4zX@J438REb76+upsJqr z%2-$HTR$@n$9ihF0y-_B0jyU*XMkZtUlgmPF*4YNwZO`Uy0~Br>lkn*VKp@{SSd}A zlJmoedXb{DFl={U+kR#HICo><`oL80_&};~OUhm^80sGf1T(U+FZ=`^0)#%i7e0M7 zEDIh^%f2iNR2cr86BVI3uq3CCqco{E2)uIPyqJt5`)H6BJP9plKnsl0MuRg^nVk8y zMM55EldjLbARB%vfX!AaYis*0T?S+l4_X3bLg-RB8Ve4dj~nV@{X-wo2Z9haVL>8V z!wTdW3!V>Mj`9Q({2)0)l*Yn(t!{8{(XqXp@bTEUZhw%K{!xD!z z5aXSNxXl{CjkZa1X0=8$qQ^)RBrkYk@+#cN7$Wnd6MD{c+bosGS=2D8)m*JL03CAd zpOl8+r9!hF)9@m;>ZdeX{JWe*ch!I+{vv9~XfG;Iu@BSR0Rj=^T(B$J9S$?7!RkDM zl>C<{xnXc9G{7IF-vki+%W$dP>ArqjHqA4 zUz8S-#eu-;yjFarczolef2?@MT0Qm55A*$FjcG^TwZm5q-}HUGNU2SS!;FDTK!p+rsg--H>}aWzsA_G**p(v$|xm+L^rUB8PNg_ z$V3$avky>+?zUd)TsELO4CppCp!!@$M_TtW;QFZcYGFc~iy)23pwLGkx!_6YCB!4W zG1xnD4Ys(>%^z+G6W6zD14vw~W++}n07m{*@)OyVF?9`LR5@`(qX@~4DE}HIxk^b4 z5!#H*A5LsGvN%1$zm7z|gum!nI2g@k6{Ah*$H&fBGS-yN_utrlef#S>$C^IO_oqEY zX@}?9p(}^7vw2ghXw?Uf>YrFO1*L2hA38J+&*-6_Ix(VIzc{47wx=PWe?MSssF^>c z)Jit9Tj6=qI>=$Tr9q9lVzh>`hXV!l8#-m5@G{bYmese$3p*QXJGyG)!82LUy4r2Q zGd1y|mH`lMVJ^`2^x;65@GtG5`0W8KlsX=}x1Qlh1T<{-Ii8j+%r(P67dDqFI?oz? zArfR+`8SYc0)NqJI7HJriKZ2b)=zJmv2UGo6$zyqr<B!_?!5Pz5-`d^T6V~=Dgw@=O*^0EEPYp zdan4#^T)ZAwN&&MPUNMmWk0g!T`B%n`*$kuYVWT9qt&U+`|d5jclKUUa`V$O?a$7f zdQJ#-3EiR8sh(7OZ*p_5aDG7O8@#`HFqJ=)+&nmC9dDV4T|X*p9!%yBrK~SJv>J`3 zpM7l6_+HRPi3ECogKe+gG7nM8>*YTnUP6>WfanCGi4wZ=_4taigihI!a&o5=i!JJ! z7bov78T%~{$T0`ta5DM>+S`&ljS5s`&`!vuOT#X`QBH?J18U5eYYk4wEe4ms^rjv& z-rPz%f}Xgc7H&qnE^Y#ouHjVYXSy|kxc$ub?LDA=Vw*b8R2woz=I5`YhcX82UU^W+ z7<6#EG{jlGl5sRQ!`N@&zdqwF)uKC|qz$UZCiF59Z@8B^onKu@@rZ7Vaz z7%{Cfra`_Js4-)qH{s2Ul{i(R*Ku)>KOe;o8QU(f+Q9M*5{J;yAL<|EF9%~?XF-s- zqF}ZW-&XwHit(}pS^i)!V+#iR2f1NzBZC=7F!;i7SA;6TFLyAg;`s)Hd*K4J!dbe#eXEk^Dfd?vTzSATD9oN zt+Pm#%7mUOl1Cvoj1{ZW)0gBfdl^FV5#5(`7qt?*t2)pcx5{FnxThNod?Hx8 zBGEd=q>(oYanHUkNC~(=Y%st$T!GJY+XESwEVF7qdZ@WI^J{^-~%#lInmMM z`$?STmQ!T|jKJnCT)g#27^r!mH_((7R0Y%x3LGCE2s|&zumX`m$S$4_4Dx|!C>D^b zs0oAyE(9)sGgp@}qu5qjBP_H=FrVQ>Tl4XZd3fMFxW)sS0y*EwqsI@m9cyT8rUn+s z4V)Kjm)94PMTdVL1EJdpRYS??YS>0dkw@F;8 zGJd(tw&u3BmZPn0!FHCwFBiQa!Xhne)fU$4{J%rj^M63j9df=+&L5KVN9245PPLOK zte-KQ8$=(O7!N6`?Ed%|MY<2R9c^s_tDY0tpcz}+b>}RH}w7R`(feO$<)4QQH#@a?c|k{Lcz+Ev+BOHb86#z z^|$JUjZLZ5&G>n*;4h1RP%N}{q*^*h_kC#hykVmKT5&k#>Juz|Kel_*uKcvSAnh%r z@0i$@@>bsWuAM4O=lO3`U$34xn98fVpVxYO!+YCrZGUg~t=)H4rdBr~+cyNqJV^_r#{6?_IogQK)Z8tv!gJ_dlcNkTT}jR_x=5M+P-`8+b4zQ=Tf_Z_$hSv zf8w<|Ob<0yqv_L6@--gcth*xVt^jkJ#wV{#Sh{xFHkG)u;ZFCR6GC}oDzE8Yh2S{y zuV8v3#b;4kKL5AM{W~?^-Sl9AIv8Nqm2j|pR!8rm=v?v~L%N~GfPr+sve6`ue@f`l*P1D0a8 zdMSk7$_5N+v#dz4l=b)}rv^!)34+KJACMiIpiDAmMWbuM0%Qo?#4{O{h)x)uA?yO7 zFSH#7hhxF0C>F+77^0y_k2t>>?vWGo{|z}Fo9-aQ!vo=1Fc|l#KEF;1{cFmz0nVuA zp+{3#I%c?POZ%7H_|o++O?IdJHTdmF7nb2=PnWG4Gt5|u(@X2d3?Ep^#J!CC?I;7p zvDl9#_|7lD9@2*8O<^wn9Vc%!u?@~CK)_ZKJ)VoZhZw_~h7mo7UeCq!#WJqrOo$_Z zUhr_{oB9!GyNwtX0ze@K=QD`0O}~dNfhm?R71HOEe99Yep-UJz>uuY+S$G&RV}qdN zDOA!*zJwXAlL;#+RHM0VmpC=xlv)&)KwpWHM)D;redUs0c}ts=zNJc-6JG)NLef)lG)(*+7h`_$^^We(w?w`$K>eN^@4wNrWifyh*d~s$0{XX z!j^DwuG?-&*dx_=U3ayV@uEZCF*X!yE>NrC0_n7w>uY`qlUN0?y26Wb&OU~6HDgM^M_c1t}+;t$Le z=a-!0+N-|^9Oq;tF@9gvs``uY_JRlf)jHx-eQhCHzjZ+({G^#G)rK$pU)TD-UC{dN zQd`i{u3os81BYN*d;7ah_jiwyS+9Bo8yJl{)TyhNDpE1jOM>!{xL7hOoYqj0JpUc|GWH`4&jyb- zw?pNxE#m^iDiS^my|NdWQlR;`fKJ7cA9WaJJbWO$lonL?`d6zwa5ZS4*1C)$FI&^T;^1{*0a8lw?x!!mh$8`_$K zP(*Av6>Mv6#Acu^W5F(m6_zof4J7<`w2N!MSjF=wz1q$H6IG%IM>AtMJIGz;35D~Y zP@oN}lChxJG&AY?k&>StA59+>9pb-2DSt}NYvlYAIe!BuCWZtT&{OL`1{le2HXY!sMYka6H zcGk=lRLmBvP8O`5YP{o{-F76o?Z^iOt-|qV=9Vn~dfyv;Q@;0#Zxz2=I$OOXS-s=V z{#12SYDx3>eu%{ik7^&3Ec^QEH&#!EQzh%}mu#8dJ8mYL=*8g>4T z!;D7cK+#d{r*rvBXY;F*`PCn5EY6yApyD0NTb6IwW&`V! zf%Vf>cdLcK`c&ZXxO1+2ZMv{_s{6fjx6Vys zXxR58SMRy27iyaBHVD3bN?ZNQ#Ofbfk^D0z>-C*!m0RHdUg?tN z3e)$t>EQn}zp*)B{j)L$`B&v1v>E?wN5i)LyNuu8t|xa_A%gDZH)vYQj6W#WlUwFM z&<|FXA2b^O+-QWqwb~&P($52iMQp-+jHG5hhFqAB;fUWuI+~wQ%N)oUT1gApz&i=Q zu{1vTKJ$^Ns*{jLC$Qz30CbbjoB^3Xe?OD~*FoISOKQPcM8N+u>JXQQ4)Pr*=NLI3 zNzNQR(Zg_F(L8i%mT!CLbQsq@aOXWV;*D|7D|l!opGD*KJ+zX~rtz0Nw3E-FDOt+W zBd5*v$c?a1%BqY_TCpOkqg&p<;D;>aoWKt?Nw3IrNb3S*FbJ;Gzyc!%kj+^_5E~+y zS2kj@l5UW?Y?Me3`G0v3#oxD9R=fkDaSI1p30UbEk$GY@wPc{$;6J7w+f1DV6k|Kb<%RBWx_cbnmj42 zIdHG=o<%rzTxfenIDS$neD(v!sh^lN<;y<>yT!AdcKzND9Uj`-dp~1%e6nPh?vJev zR^$5?E!^BAi~Y-@BdZ!og$Oe!p%^ z&&VLclrX?=ep8lXh+;%}gNjObB;i-`w+T*H@;Q%+79 zIXLu#rI=-BaPruGyf6zHvhK227rzo2DRYz*_|(ub=PVq5G3Bfn-AB;TK3OwaHudz> zi&OnV>CV~GretZ;-L`ubsnXVz_vq-shdQItnYP%kS+7{ru9~UsQ){Ms?wq)@UuZln zoaq)$a{|{RIC}p@ud~`d{luhkEFo1$=Z6*tDMC6wi+WN2w@VueH18K!8UKs3yTPj8T_>j?@-TmxQK1h!Zr0bfo(rFNu2**xHNj(ydhG$t5dF8!C5!ee`o)lZFlzz_QQhVu-Nwrs<;B-VMh|>w&mX@uslEQ<)QY6Pj>VFieu1=KT$OTH z2$qUZ=j`rT`_iO+DJ@9_NcSpRlnTT#d(YQk(7bObXeiUaU#f*WZ_H>Ou`#2iMLlM@ zRP;&X6mu(!0Y7i#M9gJgMGA>$s^j|L>GeQZzlu7r3m-m98u}Np=e~geHD27pU8+8+HpsAaj*S1{QGF~&$Gs&gwt7q-&llJu~`$oaA@o}^wf0nq`DwBJ$ z&!SaV`Y^Hamf)&RC{>=Rp4VUW?chIaA3X)nVylHB<;X?aNzjdIaK_^x%7HY<*vk(N z58;p=6kq8qrjhAos{6*WGt$+6mh0LbFYAUR83ZzVNwx51W1poiD?p*6nyC}-b>8Z{ z<9+wJ+1iF=ZNp4$BkL&uER6)e6GMM!IQV$i z%UhqD$f<^?d006ML)hwQl`V>l8{CQ#P(qvKKC9+5c;vbmII7}crwR=HH%MBpjcrRz_BOJWoQK%5CVV~bxt-7^*;}l10zDKtvsPIaaE%gj4f>$Z~M`s zI40bB4Ci(CwKN}YYGbO03J+4G>x^KqbzKx$Yu`k*89h#fi1a2+T(%_?v8Pu&Wf!D# zrl9!IB)%23b>c5d8!!N+Yso}3<*FFnPl&YlW^Bg3V$Pj6UjHk*CeEhZ%LL0Zrl|NM zcVJAPw!6lAlXj+u=1YcyCzf zJ~z`87MAo01?NAoM4+nZS@y`L@oX21t9o^}q~X{&v3XKASwFS$t(}u~LhqzP1xZ6d04{6xxLDj1%=z2d!^TQ$=s?`@u`c^bdPX(Ueyd>Mf^erq578uo3A zOe4Jtg!NCDM*7F65y(*;L>r_pM3Z*GqiDKf8X0&K_L#JW8xq44&hgDUAHAg#OJY$hE^jJt_Q$n8G@x!H4%V?yaehpIh}m_q_I>Dz!7D-(7u zpXptnNjM%r|FBS@9v}fXm&cX9%m}LV{8hk4X zh+A!xbBQCV>%0Jcgv=@rSmN8MDUkIA!*zd5W{7rh-krsH07ZW z$Y^NtWNl;cL^psf9gampOav%WJt9S;XT;FH)^?D`3DnPDfm7|uMdvUHC%+bj@xM(e z*^d5Y@)6rAV?+=1M>Bbh7&_kEb_`0BkXs%yq7+GDN&mX47xFpLLB6TBL)L<-7 zkvt+8;7?FZ)JI~+IH+T=qZCiMioc+k$ZU!@#Qq4f(;fT~%Ggb9GEkc`797%r0GRA( zFk(tB#Be?Yv8R!3zcNO)1&9)UVWil6MTm{$8UpYTf zopP-J#fTEBZf=}0FHO4($6OEcOK$AAzGLD-Du3nZVTxSywqeFxnf6!A`m2)uswr#A zzhTTlgl6RJ#u@vnbV1>GA{%F$-kb8*3x!+n=; z>XJ+9rs8)tq?YU!%J-LGplqWD| z6nO^iAJ~`UB*%F8zI*9UEShC2ADInhd7w6zt(-7?V$_sZO~!7w&6KUb-jpsb|9a6I zMUz#j;+p%#p(&1}pwq+e4&ul6d?cs5*CYhFnTF6$^xA?R?Snw&L$k(P3@$24bZB0{ z&MH-UAvrWGoQu4Da4Pn#wwV>%-e~$x?7Q)A$M3Cxg6GVh?qpfFFf=@F5lS!6naXX~ zw_!-~SKfE5eCXGBHhxl~DX5%$F6G-adL&(1j`Ns>;$OunY0Nj8lN+0br%wo6cxF}K%m6<- za49)(Nx1x_hhT^6j_UAwPS>r2q<;{+i;pp(IVhYK5$h}Z`q8mwFiR$!Nq1$I9zSz; zb8>TwaN?A3u79RxV1~ai%O{e20>yj@s;YP$)wR*|jcU(Pp{vjRFDo?dD;_h9 zwY_S6Sg!G}{7FEwY3tQZqIrU;f*Jc-!O(c;88|=txE>?*vyUB`vWwa%?F0X8dqd$7 zt?B#zbw^C5c{~i-$zpzLAED{kTF`;h+@c6<0TlAnpjzm2r#DJul=KFvXi_+}i_{`- z7#4|_OI7g%a@Q}5+NAokTOOIQ1M(Rg7YMuJrcT;XZs(r|vdU=p#I0u_e8TP$2MKJ- zUNdgTiDFU70})OZAq7CZMS3o5JIe4LGb;kg#a9rj=G%DGm=Z#g2tSBQrwB`a4M}mr z(&icWGyK&#Rgv=637)kNHQIG~b9oh$``fpt zeQWjfV5(-{J@a4Me_$8dpHCg=n&}P;eJ{-R4JZ4C0j3wVdv%WpB{$NG(>4*ba@xZL z!UuGRDX{Pe{U~VFJv8d9m@xXBLK0$}GLZRoxFX0fW-OTJV9|o%!s!^bqNg*LB2yhq zT70AUiAp?9a#6t)|F#-vGj?#6%c!DZ^D76JcbuFM{NS<_(Oq za5zsj*bDVD0@8gTbV+K?2;Ij;%cL#<5>Wf-@8Nker<#x)~xoy|t)40@o& zfFt+x61NC4uRUXmFcDF;kzWUNMfSCD_L>eJaX9%z8kB|@bV1p%4iyj7#WFz;Q*2bc zS{CX;M}|@TiWFde(m$p}=F9ka$O%2Z8`kSqp=52!vras*z4z4}pwh;0A~^3_-<3Y0 zcx}qHPGo8>e|6i0ej@bib}@lFe|+EgiLnH<5#epTSs^Uhlq%Vr^lcV=TLgFgm|@QC zy>{Wsh4JuY;na$0->ucN)w`0_yHeGAQtqb&%TtWM$*)CIM0;u}tjtAFQnr_Y?JIeq4IbhWtn!*lw@(<69EtNiwC&@BTSGREcRedt!^!?86U zwWx%-Ov=mn;fy$7yF<1y?4fkD@H45sD1Z4igyD=_2}QBZpdK<_egug!dfF9_lFuQ2 zf7bqy8H&XH{SYw{v`hQ4EUWx6u;5=q0n_-4{x%$v73WWEO44R0Z_M@~Sm71pUDsDm zH2z903nAHJ{+tL56Zx+Vj_IKvWyu?__*!hnT=sU&RNH$eZ=IYznX29Oo!HFsroXU3 zqiVLTBiYs=bcRxGJu~MbGY9%5>14%Z7Zf|)WCh1fnwyUdR%3(qVWGxfI@(B^E3w?k z{CE6s`KJoMv2?oi{<6K;=e*&1`vTN7dvEnlhf{S=rK%cIMSBH*qu^?~qMx%80QzRE zOC~qIQ~y@|)Ru4T5w^74uV|SoD4l5h`u;cePaaN{txpwfz&CE(HBlsL{v`ct?_%qg z_LNNQoT!`9-P-+~l~B<1zwf_SoN8!GZEsJlJ|WaRBe+iHu4MZB{fY)j*Z|F2vu*T6 z)@?XYe=U3^{Mz}^{r}mKpSI@-g=>@cwX%JbBlj-O*iS;waLk4c&gj8UKWWjzevBfq z#mT$8S8b$5lrKtbzgDnsgXaI-P_W;o|GrfVcOLaFlVZQaNHK*xQOnm7+9(X=RqG+SEXeT6aPs}HuE>h)N@&)qAXN^?(fY&%X&TQFbn91GN#|Tigb;}p1Q#K@^ zs&oEW@$xS+CSqtJ%RXf0ITU24Bj`X183P^)L3aWMUid+n~e%Nh^?9CuIs&q8OUEvq-)gFOj=S;{kRp z3}X&vToP|IC=$yCdhxGQF^pQXDXor@s7YFVvwA-MMLv#y6D9m7{6&8kj>r?em~vH( z?*Gu92Q#B-M?Tpan=q#wfzjqHV{ZnGy)47;V9HY^GyD*Ks}h-h>?nTVEWLSh^5i?6 zZ*@+Wrz-cP%AXP(4QVA*Sid(_*_bMC5**DBJcV3V3kSZ$B=Hm7N{84LEZiqX~|dJ2fjE1p;ZmZ+P! zazA$D!&uLO>jx%IrF>N>2TnfdoyBwR!tvOROV=+=no|C%lzY|3dW8O14h0*uI_0kU z*oaW*+-?!}6l)DzJ zN~`l@8&=h{qh!{xEa_M_*)Z8Vd1~tHv~SuZEU*8-vGw6njkoY&K$9n)ao?Tt*5Hi3 zs4SZF?VTK+>H}=0e0x*&y@Fw{D3K=^mLAep$C9gk^cV2VlgP`eDNNrMOCD4)x@9s~z z4+|E?wy(OmX~rB-MP_w5E()Q(nWpn8cSNv6(st+LV=S{qNxMZo#>#kdTGeu{$z`)k zA+V&y1q6)#iMixOMwLrVI^?cPKJ%f$N0fM=ytgRS-s%dn)WVv+dQRz z&wR@~?M$uOoeJy`JWu6L`_OK3x*q9)W*_Ehta*1Sa=i&7<68L?p?I!Ta;r=V+f!i?a%QEE z7HRck{Q;z)vO};)uE+K#$YpzCT{5dyYBh2>o|x+~njg7b2@_m*LI)QrBXGS51NJ&H z0+~1M^IAn($w9OeY&Wy(DMTeh#^oT&3j!93WM_Rut3Sr(3uO1%7NVDUt%kWxq-GiY z6}PuB@;JcAT~C%)mI>gvtsMmhSb-S>U0V@%un_5b3^NF75{_5fM1qh}inPoyk&U>X zh0!wp_sDskoJ|xAwyqyMh?+vdBbKXRwmAGlZXmsLNg5{`GK4xUbxo=N3DH)hEi#hFNeWD*R%v^SrQ zba;iltf&%=^MH>)Nq z-l=-4YHCetd3~y6tKiv|8}q5Kq_A#pYE@$@&?I=8)86ulxZtTtd;Jr>aaY>Abh1qF z)S--t1A?dWq1Q_I(hhuCMEJ7QMflPs;Y;+K{18Wj;Y(tJwfu6h}Oj} z#hwrJgkjgG4#&}qtelL!rGE&9>&QUW#18I34<&f|0n3Xz0`MG zkOha(aL6zJhn5nY>vgzBTR#27s&V+SYay;DY(DqkShd^64g~A0`LOkbh|i)PWPRCS z(TrJ_LtVIB|E6)dzFX`s(UrTG1UBmKW!OJ?c}pp-M~Oy4{igZvw>X^jMPBJruyw* z+X51cgUi9+#USzTlk**N*ytMhKZPF_HK@>Fk1EaIppzF0NXoN=E~bXDcagy$Rd5eQ z9VRGfL0*CaGeLpxN0wT_Qu`yzHUbCe0ZnY1>=)Q^l7)cLw>V%FlxyC!*6V-Au|fa) z#(I6XJeN?FjEL^y2Lxz1#-vEc=PkzSTu4g~hogd$Sc&#F~i&*ENAc;3tdJ66cFYA0}Y=e z91V6|fJBSX8~+yw!nAdWqa@n4r7Jp^b&&;!rofAd4n}Zi2^YE$2E!_aywB1Qwxh8m z$ncEsyKgQ@yNd)%(VW>emY8*~NV-={hNjwQ%p0Z?v-O9Q^@nHck0tAm&DNhu)}NWF z4?@`oA`sCW->kVZX|9|xSCQE}yGO*Dz>@MqEFOk6i4>sG%cUYCEdgXp7Nffg@S7)z zHF-zwQNxa#6mAG@EO>s-&>RqvyT(OBWrn}f5Twl!%IR-fupNLX6s6p>7l@l8?u2LH!uPW7Fv1` zZ#kPy881D73m}FF|HJ5KZ7oNan!&6lZo`2kdn3P#>iCeHzar;{uP@KpdkZ^@0N*O#We%fUPXAL5z&=F+s? zJAUHTJ!!H;QkBfF5*!3o4?JbFo)t;Y3OPJ2hsYi&3&@{YZ)MV3$NLJRXM;p-eKB ztyukg+y|W&E$sj4VG^J002(+8rzd-WFo$oz+jQAjZG$mY@s_`4cAZ#=@0M9o=qaj{ zO`m)4(DZ>7KlsE4Yn~>Ee$DwhvZGwXlyM8p9QwJktco>OcALQ6msl!2A~l3T!j z$%u~qq6y6&_DkL;lJ;BBF2_;JWF;W7k{6`?H+x`v5G(Efg?2N&3I^QOu8f7wNYNz; z8IO$2vUn@wR)w&;_-u4G8_bYiF#SF3%zThoBGKUBK;&}9+TGQC4oDBT-{^w_Lm3m! zJ<+Oa5#PZcSOsUs$7xANEUTHHBbc^b05fHfN8Wzizeja3C1Pw}{t1oe|8HtFOK23q z`fn-V@5uQta30&UFmo#Lq6H%?n?Jln6N0u(SWfda#mivl(3uXbBs+)Y>7^^^U6wAv z`>Hz~SSJP-FUL<8DHIs4TGE9jc-hlMWFpa-E?I(?TMA_66z%B}va0A#mjuQPzvLFd z8cmfS8Z!oA{~a#VPm!Y(&zGIKk>8hb${aB{kTBn_$U$>1l`^a-wJ;6J9A!xy4amM^ zIQ_z9(o+5TWy+8#FGE^|harWXMj0cz5pZ)LhxAHqrWu&aD7w^Qa8=~PAuO$Y{6i`M zCpS$JfdX|WV;^Dt1rO?=ZU2G?p=Aq#@Ka$;L&ip&pB!nz+7T<1sziQ`6S_TC&a@MG z$y#Mlmh=r&!3JbDCbmqf2})L3=;^DF!j!jS^qsTNhp5YgTACr3;#Tgj>(Pca3G3V1 zU)I8oqphz}sYCKfBLn*1)>kEkDsS{_wxvix*vTiwF}$ga3t=aDToM-ilII63KC1C8 z;Z3qx%O1sUVGNQ!@j**ANq*&xJEF6AS!xl=kq@=Rwn-^D)Rt?cq5o?~Or4ZYc_W6% zBRbl>;6V({S=zBJco5@yQ|5<}QJZRaYsP$FU+}=uBh5H&alE0mZQs%3N1Bhrd|X50 zfo5pqG8D%gqDAP|QQA>39GKD8@xv_eT$sbQH5dh2je9$@o6<9|>kSisTp(@;V8cYp zM=aZ}xM@uQXm%I>Wn``{=l?5S{6CUIsFeR}G@JiVa56TWmd%>xW?NC7b|Q?H<^P6! zL|X7ahLdsk;BwjE5G-WzUAWeW6Y(``_CHag=czbYRf^z*jUfcuhp(Yp{+{CKN>ctG z$RYgB6ISN`BYb3Ki5aW@ZxsAAf~)P~VnAwPY=gyrpM1&&j1};R^5SAw5b|u9afs{N z`HSqDS|j_#F0hgXppvNr19e#3w{X#*&~aKYh?M~f3L-b$3@O-`!*tzCZ@nZ`?ZxF; z4-mTh)!lOr*R_LJ4rXO|)l=>7J$LK58OP3f;llDQcMRWkecLtT*#Bc^@oUeG>E<>b zOKm)!v_Q3O?BaxVa@UW+Kr&9oW~+84t9GWmyT(j&h1FA))4tjD&B^u6cQ2$0TgU8a z-?CZX`lN3?-S~9m%8_*Wvf1(t$#OckTd?7gMqgkXvtobf#FcNefz8Rl=7)OSD%+R? zqDzN=V%AYHR=?C?Z+5;WhE6=O}mbo4>Z=4`;+l4X;tQ%mY*muyNd*)-jjTC(lV z^5l|TV@+3&rajf;)>%(=(o;RP;=S5iwMox`J3HZs2a;`9Y%m~}@&CFW^$l9;P+^d4?pQtt&5`0O9N-YMax3E7z${ z3BgK8i?nvh3ex;qp}DThRkQjo5!|#0LJ?Za+QnOeQ_A{W%B2aYO5~EmDtGW<+0MfX8TRf|#SpP9`h|BVWM-Bgxw3A@kD7`4posXt%`e` z&(b|%3`OF?=mFU2!~Tf~YzYfwt(^o}g$#S(=F!1OJg^WQEKKo-gc^PyB>^=+Nfn2j zFa_K`!5t7R0l0Ji(uwDOw_%E(YR1HzE#HzX-;yfddZ%@E=dt8YT$g)1xw9=<-Zooy zDp__aRo0pEpB{63X!ob{D}SqNDsM8B%wHoo)}%|TXG?37rM1(`gwonn>89xylBHY0 zKS&o;%@))q3u>pjk_GFar3a(;Yp$;WJ94xT17P}d zX9TevbD8xPXDH(2tq=Pe=E!j}doo8HaY+|Ea=yF;5%?*`*&mN1k|&ojaNJ*sOW!bi zvixbnEPXOjqm(1{GER{juEBO6yMAq%Vr7wTy!qn9s)R#uZCeFSUNbTp?aR>@pJ%{& zvqp;@aqj-*a2h+IX9Ob|R$r=oiR4R|`T~+)dDH!yM5;<-4{V~zbzVIexeFp1m)+gA zDg->?MsOUnaHQ3|Wd1zTHCO*JA~09Kd@jW1B7mh4905t2Ym?Gft@MKAOUMGrzSUBg z@|H@BZj9ARp-&)9b8K9)r0F`T1m(?0Q+a~{E|?{Ish@ttIAS0p=<~0^ygHq|1k)q3 z?jaTWXQ1%*jBY3fMF-lP(>1mtGKS(Ac!_n<8ETQ>(Q&h=8-d0Y$_TAR~S}mI5wt_A+jaBkERUkL`kWxh#!aQx`Z6YV>k| znWdJvZp5vldv5TX@zVle0N0781B)4nn=yhjF&O0;&B>T@|1Z8pq>dS#tWc|rcx7ye zGn9l8w2VW6ThxQSd>6;t@gW)SV=Gw3Dr>qEUoQrp3I9jbW0r7K{8(5djo`91+|ol5 zV^M0vI3Pq!LjmxgFmjMsKvEp#i5Sip+3Kb`E^QX=-(cNS`V9tfm{FWu+JA1AEXWOZ z4>QSDeCfhvZ$W}T!?#7LO{1C*-M%?*`Rhx@_NMa-$n@~U(;(B_Z@KRWHchV=$~NCQ z{@spmcZ_dM%Q)WHJh}*^u|lqUrPIm;7gY-eQ4Ks1CKN&uWiiu(52ba zr2X#sZ=e58|BUDGxBI_oo7PXCz@3nx8|SW{n}~h=#W!9|6|PQtSKsL$GbTNU(@rm) z+Maa=lFq;f&gJRCWw=}W{Ppw6!kWouIAi;9Jb4yJwTtVfF3zq!kX(Dd4U2^AzyIn%@x>WIjv4cM~Yg`qu*qc{2>s_AoE>C$^%zCSm z-l{3%bo(E4ezS9S^MT~%1A@0IA_orLhWwE?Ad|sfOp3uy&>NP z>yl8Mgk7M*G3U=76>Gdlv_En#7o5vKV+ZN~y?*8XJ*MxO8axM#`g=y>0hhTI#swHu zJ`ZpdH*d^Gm_XD*V6%g5=-5_JOB0*+n4iH?0v|{O$da;n8ifqxLKn(MS()F&lL{?Q z6q<-6n2*Ji4@jmI@t;|l)n$}+M$F6N%nOVzi?)^7M5Zap2gvE5l8=*fjGPBlGdUL# z8SO+6tSsvajmsXutg;br?3iFy*-SoI-OYb!C7(@GQ1sAFK8FTi<|Lm><6H93O+F8q zTK2;CXx%EK?NPhdYTWfn;Syt^STEH!Pj{KPzlAi!u4o*kkJ({xbyl+2v*Cv}>Lk9Vw z6_$PYRd}BC;nG@|^V3zDtcrq{q6dKeke!%gSnmvq6i7MAOX1AUtdY`*-Q}!2RoP$3 z)m9#;JW|<~(c@C~xaXWGI;bUqAsFBh9#I))>%hI(or#a zH09Vtn%C}e<7+#F;?+s}>gj@M^Yjbf@!u^Y1C=A<4brXXzXD$clROJFHf$UFKm|HZLi*u`Tfi^*1YG$S> z*KVEjC`x;r_jaB%>DXRHW^s#sMXm(DfG%Ha{Q35agSA7@lnF^2O@-2dZ5R={w#y_B zczWGkk$@;HiPjAaT^4ms)&yebLIZ)}A!1h1&4SW>4RzRi6S>LLl|nELgX=$_#Y2Wv zm>LT=4*?Mrup_gg_AtgZJH`l?A5Pl~>Hcqt#`p+)@vv{rmDb2pe zVqgn7usHyl>t*C7v5C6-IrZw|rFc=VY;cI#=#E#4HfV{BtDLxbEZM&hqMSmXf*oVA z&svILV}GLf{+bq{?O8BOW-O=VV4=8rdJ_xVstQ{@-Oj?ctHN@ntDSz4rE65B+bHa6 zWr0Ukf!l<}4i?y{3fwB}rRIEKIVB=v_2iMK;lIPT{Hpc_HbhszR8S$kMJA@A3a7AC z7M*k|gNuwU@uaD2QjDWvA{ytRId$vN_4Gia*dY3F_q@D?{sQ?_MhX>U6JH>oRjCgs z;K}W?|D5R^N_qw1e}Q@kiO3ADMMuN^g;IIg0+U@*7cDFQ3#Ia56rbF#f-h87;rvuU zfqocr08S&-g4!_0uS1&&cR`twkJV-kvC?$ZBD$j-U6*S4!Hc*Csusi*ShlsI zR0~~; zxqc6BRR- zB{Ke+D4SWkhvBrRu$+!BUVAea?a>N~WG+g>F(37EkSCK^xUU;E7{_ zP%j3x(>Q8{Fa^m^lusL2&}JoP$rMg*A=NfHh?bT$2>ZaY=X3CT1QX!@AAV;?;hu!C zVLfC4VPbCmEWfr^q!LwgiT^D$gx^L0w2PeW>rARh}9@j zT{x$qCa+-$dUznd8LT>yZl6zxi&xvJ8xW4lW!CY{)VIGS^*wkRG>7AUP&lj?>l(83 z(J?Z<&#Sm2X4;FOKKgZdAVP*nm?_bgU|UM@3m))c9BkEI5KccyoGaQBg5f1?E_G=+H@b7- zg9EojHL?ds_+)P4kfh82LXgkQQK zi;~XgTI7plIvtPqIGy72@ z!-46uS(lRSY&cYsD~6QMQ##tghXBvY0B_)z6N7O!^A%WKiN|Qb1{!YhR9pCcb@!g5f z9O_K1Kb>-)flVc|{j2dW$Hz}FjrRQUKCq-A`i7p&=%L3KKY7ZK?i39Z?4dKB9H+~^ z8IP_9R@wo!ac6(mK)44i3wE44ZX@(Ap6rg>Rd`jD;b{3&_!{*#(j{eQrYWNQ8N_d< zuU)~u_JPGW{!-GiGL3g4X{n(1NYb(bRscxzkDC(d}FG7!~jDwNr zL6JnqVTmxVoE0JKAV7@{JY-ke`QYV~!-K3?cI5&eLiM|-yAPpq23%hq`SOThE@Km& z?ySfCFLR#!@rDV*Y*BTxs2UvOBe#wui?-c)Ch3Q<*rw5zIlF5tCf;yAc?ze+1jAYp ztO%5(hXns@RQ?e7L!>JrL&BFq125!1ogmrLPHO z&xt~-PdR~(RN}exHMr=me0iXuP2nr5`8O0@x*(eE+5-EIA3cJ|5?q#dya`toVGd*(*XRi3=8mQW%+hB9xj$CM=XD z417bvoSOl!TI-$!v4%IHo-!{zt)~kf71ZJyjb3@qFq#HPeBnc;Mf-BFz!hC)z`TW7 zGqN(1nl=XivML1NABGqR{;RDS9oPt%xj5xa;P5&*WY>oOU2@(dhngoE?$9#0T@2>q zzFYtwH)p39e;c(xlOQ_*cIB7=pz`H2;4~rD1iEp1GgC<;2B>O|@V#h06yN0Ofm^IE zLHmK@&(VYllS3<^c(IHn7yM^D;eqbRFc$)x$GT8{mog(Vu4pKNlM~Wyqd8RC^29kJ zo!(}XPhV!Mg_2t5%#|@D=UTruQAXd-C1^;Y6pVL6qlQ2(m30apP{(Y(CztGT=ZmC`-cqZg#O7@eJg{GO;|V z$iyhcC!AnKW?PB7F`Of=geR&S*AA>p*c3Al3E9+G93?=(-vbkGe-EEmuDeimaKei= z%L@8Z|0o2=K!NI}3Q9}UbHO7KAS1XpB5J}dDHc|<{p!wGxs)`KCn1)R{Ftmimhkp1 zlTs*esPto&(<8k{d{4|HFQ{^!0OUAX%264Zz@U4U-6|-M z`w-!tgbl7YVTYTSaKO!fQ*IRyJ4|Qv)jZwV2~EP|LNh{pVF*%k>#4JP1b}6OuM$GbDV}bCBgbnVBg#C4G!V%jgE1$ZRIadX6}%UxiR7AK*;5smLqk zQVtZMtjDkoXHsJuIT3OQBZ03b`(;ao%r*t1AIO+s-1h&KcJ0A!U1eU^maO;FdRczSj^nqTI&t21 zTsMgmr?!(OP9I5}G_}$+X?GPUa$2 z%NRgPl><4W9gyS6iDflVy)DI=3xq9`vs|`GXI}BirIOL&)vriT5M{AkuD(;VlE?&} z1cG^zSzWeC%@&V@HtJ?{g7{=t8b(b8L#Nn7u$b;r-bhVe4p7Jmd0c=$N&?TzZUsU` zN05l;xHd?H6D592k|kSq$_S%`yf)SFM9H(0J42QDlT>-u0;2VKqGkBsQ4~Tj_;nag zrXXwCB^?Xsz{~D{_#fikvPYhJkRFg9BIx~#`VA{>+vf(-ZT%^I^W@<4*7&&ly4gd} z@okywOPB7vyeq4>*h4FUro@`WMDobIUpR7ZG4RB+J`-L)ZNKJ)HQ{${Zs0^;E{^Z| zk~w9nnXzR($%&=SL#fR}Y2SY6 zceJ~&S#;n>byd!~Q?A-+J6UlFZAwn0LtRt%u2ifM)v*b!JJWT$(iOX>23IOkQBvFn zUQJOy=F|{brP=oRZEv(C2j`D2bRG~6j|!(BUphUJIz6#)cp~Ep&-MZy7hGXvpRom( zY}F}S^#`G_q!-TnwaRp8-AYBRs(dI6ynvHYi8?~YP(al-B}-n_;u!ngnp^9Kh@Zu4NwSlHPw?4-kwy$o@^O~ zgeYeTtIC#BNQGL@N_wtB3`~}Rbg(H~O#xw6nQRUDYjx&eyfV9n{B^p@wv{yv3440Y zrc7lO{qu)19xyD2uXiBA5-u9Ay-i*UvtgtJ#sf76x(E;(>D6N|isx zdfM8XR9r^u(KJ$x8ki84cjC^n&GX=sP;f5OxGWy;zny*fdAK7SrT zqDJEtdRZbeMX)abQ4$@vU9FT*J$t>7ar#}l|}TK#?bBBC#0f{p(Q zWinKwy=(QyBxCe08Ou|~@TqqL8 zwvaFTj@*0aylK-$?qxfd0h9+~Y=!~~6&H0?q)qs?rgeh;DAg(@$< zRrYLERqt8@SITq;;z7*l><^cbly}~rs$x!JE?2-nT9`58?-*w!MSTS1$hZiz1&U>EQ!9F5nspyxu5Fa- zK`RZTUjVa^5lY~D-vYj8Q zc8;-ITy(~q5kJjn*c3MFVQ2NCVh{m{BrLd6e&sq1u3?QRc8GbrKdWFFYAKr8Xh)B+ z)v5Mf%#NJ~Ynv~M_+X1mS<@e(PYbW#V9bvBQ^rg!D7zy}Ensl}n$l<|Ahn9rN3cU1 z8seX4tw6h@orBW(N?{a=$R}rfo<~nX)f@vT#9Q#N$j}HzE!yHl2WHlE{O8G`Whlxr zK0}YpZUQj~L_H%!8aze_3&0fy6|mzjZr~W}0SPiPkxnMdsVZ8E7V$0MhcrL&#L1@^ z-*~iqw*&#K5=vlV{o!a;I5Ku>oKYdnmaPJ^NbifDZGnTEncG&QIk*TFd1R4p_igg^Usgz+a#i zM5&H;@fu&w0rVDSB?n9K$LX7ye^L*NM|B4)LOrV|P|M#FEtP`@*QjRSOI4HiKy_QZ zEbUkW)3a{x^aHO{KnHKIJihlU1FsDTO7=!U`3&ypi@2zZq z+HpWI9k>ccpZi|;M8;Vz^7$+}VJy}0@}A3krUzzgzuEXkV`9^s=U+lU3C%X9Yj-W$ zc4sOZrcBe|;tyqNYT>I$fe*E@Y04zH>r$q*E6p9>VGlTBHmrb8wJiZg%=OR~0tjGB zd>~cYBzT&IW)N20i>4hJXJE?tffE*Y%i_(Xu1=IWl}w1y@w#WGN0H{tz}7~w~LhEo2Qa{{(kTWgX#8rgtnc+&XdCE6T;X9;gP6t@;Sj3 z)6j<|9VBb(XX;;Vn`v9}w4^*OINWF4J}R(zra4ne%7;D8utibYn$sCdEmKCEiP0Oq z#B){IsPrI6Th@cO>9!Btej(79XomFY*bm0uareP;hr>PH96vQ1`R3RgW2vf*X~!nm zSPPPNno%LxJY~-W!%M-tQo+04zL@S9dHdr0{mJ?{{}0=*TugP0q=Uy1qq2F*IBl37 z6-*&g54}0QXOWELmXgXEsFGHZ9uDX9P~GI-sRsc!t<<-D?abs}2)Cy6Yw`}(f4W|&TYsEm ztOi#$+3xjNR-K1QjEa%j!&r7&GMhe5^bJw2m<;NbqWlqqs+HUDmayCJVpY`!a?@*b z!$-)VA;}eD8sR0YQW^77nKV2?AYt>~YBjM-M5|nX#J#!LOr_h&4g>5bD3z}P~J`Q$O78q6o_)3yzQwKKVEaclpT`t;WO zge?aV(!=sSieBTAI6l)pP|+aXwc=AL=$^p&&ezK4X}M-b&G7gz~Y#h-cQ6N0~C z_UWwEU~UF#D091DZ76>bB^1}nUba@4B{hvv-CI_ShW|uaG5W}DlpK8$7vHzqs$$$* z%F$J+iG=514WTCDA4(V1k4PbyiCP;7AkOpVwOKQNxrin2t#wt-`D?a8O$tu|-v zI&-pJ!S%X()%tJ`iLgI$sjQOTq*FRhDqN0-M=^bcTUG|pail~sva0*#Y|->p+O0I< zTGJcaO{e&Eqhv;_17@`JWJasW5G`tKh!$fB(&}u87Bw^tfi9(m#g-WE;hNCd{C8;~ zeUBVkQsY<^j3{Arh-hf5z0evO7g-fm(Lcr5)_z)8{m6zH+XCxVc?rzPx84PvW>bCB zeervt4bAEPLLlycrCM;+%$6>$-I#ndy|!DZ+c7u#R>PH+^gZ_ry$=WvJhW&#@|%`* ze`1w-VdG|w|2C=^bu)e;@ZCUBw00{B(AFcex3w$RZSuy%(@{#Krr#tV?Ml%yO$rTP z9HpUqqNuY$WIJlSzjxs9?nB||&L(2r6Qec~^Pi?1O$JTUJ}1_(oo#ng#m+=l?Rej( z;b7Yy+wmlhJ4TegM6Za?j{iRD0+pzp_|xZZVZ1B9!wB&|Lh3#m6Qai_b-4I| zVJnk^nCl8`d2ZjUWiw^hlu^r>F@7LoJSCI+0P`D6e|AU2uC(#5*aMNg_rvUa!A!7r zI)>PF^)P>9Zump7N#6CeT)eal8HMm~=mIT;pzLWSDrKND8?9n8QdIF=G-#(zMs5 zVcb&Up#^SghZYR_v%RdaDBS{gLQ5)2-i4C2aA&E!i2`V{Kmk5i2$&o%)6~^_+p+Pf zAR_wPCyoYSKa`7n{qnQ=EB7YApWg$Rv8v!CQdkS8Sq1*sz}9BhlmzHT?tA13rVfBY z#u)pK0roMAzO&_O4P;-$FZTl=pprr=##ENHLaM{$H8bwh^i&|`z!(MrHfXb}g8b!fsjE(7OtxD zVJ2djk?=7w3jYG7F9fYg94NPk)}X&%6b*#dT6Raq1SEh&u{*Z%yjwD$X;51W-lX%V z+8OE;ZBe?5UO-Bb@GwQKnfQ&R3pZ*{)DXr;zMJtYMH`w(hncMGOorFOkDNlmrzBM$ z0PxzylgW!{AfJcw0XGwyhuRK<-{ha^+kRWj>>~AZn4&Q3>kE`2p<2ssF%ylb2*bD_ z6+KRh+W8>GEF))zqL)mJkya)~28gFD6?3ac_5i}z1t2XccEwi+4ddUS{Ju)g*U6#r z)8Ji_I!_vBvVR&m^)db%6obghJHcEschO1d#+3g@BzTx;mEXXFgBf%-`EQ4G87)&= z=vGh&wxq4C3)YS#mxi$p2(I;z0Uk#nZeI$vrh=_PpiOA&1J?9%>~c(~SeJCCof`y4 zXRo+&3Y!iHw)?VXuvXtNnhnIx0HgM&FQ1-1ml&No zop!7TYKJClela#3T`<7w z_*d1Hc60-8+PWPEynLbf#@RLR`0A&2XM7d$!!yUH`XtYiuQ}ywPHav4HX;mQ+4h<3 z@n_Pd^-HDAsnX`f($*D!IOFe~UppTF1=r7k%L3dVufdkO(6}Gt;k%ENi6)f6GSV7FXb^eAM;QFYW-I)j`w-L&y zLA2_d5=T%ZJVV@#^&peu8)i+3id1;hoJr_Cl-e@9s6U*kttULI zDy6T!6S#G&I38xbNUh!j&+VipC8<`N#UqgVU`kpMJOA%AE;UHfl?#YzifajBr z6!@X_u9eNRigKe&pp^e7ROH*V4k?x>DW=F2;%*%J0oMv9nC*3udhMw$K_3v?qQx6b zNXjVhU%>OK4m6AwQByD{P%wu;!RR$qIH{DPp1|KKYv;JK#WgRkm1PbLW{m&>0T)f6 zK^2?yur{cEQ>EOVd5sfG*GomMvSEpbv_(19ejVlyliJOh?T0f)d)} zR&`dWEDoT$4RSuJvN+76OPDW3nt6cPqRgXW4PK;BlMIT1B?}$Bf8@FWeU`lA4oy2Cu!4D|%zv~-VY*m#{?wl2NU=BOJ=C8`XIsoyx;|omqJ+7` zBc&i#n6H<~raI{uT)SFL6_JUu#xX`Wi4;Y&x?iCk)rpNc$ryz%YzvFCJ|L4Z%8jU~ zo&lUh#4Rh{_^C@k;Ykv?kmw7~F8si*q|zO93o53bB3~ZQ5$z=MoEK-jhi1G)2gy_Z zUy<^sH08gI=$P_WC$t4DSS#3t*WGDHuVCuE%@XM7nYX1o22-Z?Df{$=+18A+LX>cY zZSurJOKsh$w(iBYp843)o+GI}M;7-S!<~^(y<;vm_oPrcnDz__HVwPdiYX8;jt|dn zNsP?(&&L)`!QN9O$@l(v+-KZF(lCf?)}E{Y;tohVWZYGR4A!l6!1;>&@uEa}65D+QPt zDVUPUXz_N9EC$Mum+u|YiRN2+ZeOB$a4vK>kC@JAxP?@;UQB~FbYjPfRg+Dt>ZXV> zQ{7bQDKHckY~Si74`xB&uRlh|@&7>1-;whZa`w>t$A8G#l5ZcJ;iylmBSM<*SdK>! zt;xxhdhoxcB>zQDpd zpUb%XuR3NNuxIu%bO&dA|cuoA7*@;CH z7Nm>x8qlkcK{3UO2{vqJVHOHE#>$KH3&sjut(Pt%CObOZ47PwNu~c7r@@5SSBIyuv{!_nB0V0LX(Jg)5Qx@I-y9J6h6$EcbyrjO0V;0OhHMvOG16Ck8F(D{o~Im3wE#1!ql~{-4)c-w9iFr^LG@@{sd9LyzRwNaER0_KWuZP z?<1RtteUl``l^ku&>&FEsD)LK<|55PMgix|7`5Hfu1a&W=_HIbo_O>;6djy8ae^mu zihafTAJej>D`wGB*cZt+NRAONmaU|pU=%hFAZ5qn^Q;q9ViR>p79l3cwm2I#z=Ri` z0({9fFQ{STA@BJJZ>AJ_DuHN3{{P4!Zhn!qkAxWbtMu4IzPrikCFi>o7NBJROFlFC zO31mFoa5wNqImbw+oR+}$a$2UC&)QZ&Zo#>@_7s+eT^OyS(PSMzIK z!~446`#S&oy2kf)tw7cc#!LI2-*;)~`5~dSJ*{7NT~}mi$QaF}L+aAUpZ~Z3Ei1G8LeK?3$-)0_s5+xBxivp$!Tg+8jgFjAUtUDD zuwFCk%dN->nLAyR){HNR442E+3p)SxqJXbR(7CVQRq4mCC)=efr$Tcj9Je-S;=oEn z?)IF?!1;47BiEkua@>}jNzaw%9C}z>$T>`$E9WnUV=LiGavmeM9uj9o);|`RxL{TX zCs&NeEIsD@L9XP+CXO@5D{koU^GDMrZWnh=M{bVijGQ~?HFNe{(15I525ykcxy;!;B`42S znGV;W{u2iqP)z^iy0BfTA3Glcyo5<-Bn!5b+J6~%d)_)lmq$149F*@Cm)ix z xOeC9wE6dedxIQiy@^jtU^*X(6D!Ncy_O8V{GyKZ^X-kEmugDhb^dYvi{~vRCEfN3# literal 0 HcmV?d00001 diff --git a/scripts/__pycache__/backfill_harrier_embeddings.cpython-314.pyc b/scripts/__pycache__/backfill_harrier_embeddings.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f17d278138a305c5ce1009c639b494192afadceb GIT binary patch literal 72656 zcmce<3tSvYdMB9menM68eu|P1LP!Dx=>0~31VTb|fC#%{Ep-Dd9B-eK&vM{SQi>e=0&6BX*JNptS@dDeIK?s<{b<8I5| z^Zmcb%B%zxpq@EjN<>CQMnpzNeDTHijM$u>X6JC-d;Hh^o8IQQzoH+?Ws)Ph$DbNG zZj`&g@wyhSSr^uoP+Z@l59`_85H_&8F>GXaQ`p4r=CGOFEny40TfEU!1H@CQ&-C;M2TUtEL8R1M8w<4Yu&Mx83o7%T1O-D4lw9&0R<2_p(O5bo?c+CYvxP;FLd$)4@FAwU%YpXc^e>tfOufy-J z;CJZ-U3fj8nalB6x!iy|T*hZ3p2OlB_*}&ESiGFiN4$W=H}Zvu7qNH+UyS$~7T?5| zAns-H&HP%#*Rl8(z7+BGEMCc%A-;jdtN3!nH?nv&UxD~07TTDYoW<{RY9F-n=kfajdw!hvA^rl3pWu5CzsTZkd@tgD z7C*@c5WmFY?R+2Nms$Kdz8~=yS)AvuAU?q29sD5TLoD9O49`P@*_&NTIh>x=PdH$CW{}PK| zIPc@Xyz4Z_`O|6ko__)J^uk(BfgWX+T-NdWQm*2we?tIMp_3dvLqo&io^b#0P^b;R zZT_BcPwzlaC{&@BtoRWK_YVdnD}|Bap#ZWN+k3*7aW~WtUB}H75QJd?5sMIbX(SK| zOZm+$T}>xi+8TY$^*rCw#QRR1Y-~F2YiX478=IQikJq<0J@qJ3JlNkW42OmKS35Vz<>!b~#UZFo24pm?5>AiBPe_+6u=mB3~@M6I4?;q+5RRyn0PM8O`x>I zf~igG;+!1M)#??XD#0R(Z`VsYds~G8-+VnJnL}YeAVV@;6Z*pd8zBQVuBVCHDMXQKB*tS@PTo?Kwr6NV5qTdV2Th~ZeXgZ zVFtm2EJ79qnH1z8K$CKDlZSvciS;N8dguXVlDqW<9rff3y75f;F1H1b)l2Qt-$v4E zUAmS#NBP;+ZdPK+yV9-gwq%+T1FfOknE=gp&3dOiSLS=FvIM3IPDHO51?oWnqeGr~ zf!^UEf5g0b>()J6w@SL_E6f6oy<`{~z9vvJCBsO#m!>)9UT;9M^<%O!FlPP9Iz=8@ z3rhI#RH#5H)|Yz`8Rh03nK$>ny6=wl4d?65sG}_AsGN0F&a4$3l~KpOnB&N-<4Dxe z9CNhKI@-nO&WL9(L>)fS=zDtb)eiTvnpd8ccTEfcB-7QNfsueWSoZ-4=|WCFVB58l-YL_}(5o27SnFxb-@7HaW;eNhQ(*nH83$&v5$ z#)@hm6xEJxzq$9u-mmQ!jeBPF2$G+rG++HJFDt!rw+ow*Y1O&bk4$R!nshmjI0WOR z3v`<(WxtZ`KwfsK?dZhMXYO8xvb)fIUEC!WC$x1AG5PA^l$l4!tWBswa)n+X+FM2& zHHc|D)!Y;Ep~HpkNN1k`(iW|mw|K_O?qtR+#SbjSaaY!uGj4Hy>GMBvdFCDIVR?=C<(1CuLgyAPf_$ZW zgZfS|S-ZZetfHM*IXjWttC<^S&W|1;W~Ng&qA$jv^>cd-#azGcuC5KEvK#M`>|HdW zn-~FNU@$N2qI7exM+gP{AsX5Z8@w`B%3z7U;Bcrv+K1uiY_lD16xVLyg zfSq4;e!YT{>ip8_b`iqbHQq?*YuCgD_A3dD$jdGj7~Al3_4O<@%HlO^l``}+v!0kn z!j~#{-PK9fz)&k{L$`*e`o@Q%IXj|`ouYAP@)Lia`DSMG%_7e?Y7QF@$r9|j zPHhqDDT&RqY>KTm*}jNO>SQzO3LkL@CNLzpO&wsj89z&Wda}{t#n6NmA3{`5I}(;& z?F9HsE{q0^M`GfZEVIm@J{tQa7q8a}C6uuY>bvx*=!|nr`a)H-({xyJt>wCO7xZY0 zVP~S1yir>_(3@G)N`1R)Nd@?s#MB@?Er2O&=UqJwhY%PFhX7VD_lLs1;VThiRd_H6 z!qnV91P!y1&yG&&N9dpj-Z@m@{bG#Ljo<~pfeBQCbUwz6)*PD)Lo2(JaTygUSCgmm?*C^$j%_* z5+cAsM(bBwf2lQQ%$_x7Pxz-gv0BU-YvZoWn5$&gRWfOO%kidT>e55+wy10SXmi}? zj2Uxhjk$M>Z`gm`KDl$c=ALmbuj#(=Vcv1E`+{i9oiqAaE+VPNJ3lZM{Hx3cqcD6e z^dxHlU4M2ljynGt=Ehgh2_foDBE5~eCXn6~j{9j^H0i{3JLJgBHu=HRGlZaz&5$+! zDl>p9QaW zc_iJn8^{f!WrYsPZyXK=h6ExZ*(?yc5d%YUh!V~s zkj$5RhWrEIe+ejzNWn6kSce_SDKH@T@rVt1FCxnWzb)-%{*Cwi z_2-$nV-0a<#?9k5j^D|8qu|#ICL5*$bNTyb!VmKsK5#ZZGH_XWpBOm%CKSQ{2eyhQ zA>x_7zpH+)>Ak&pK2*|BE_ah3%I~V9CuBl}| zlXDdL*rlR=I|J-0E~o>#Mttc-h@RK42%>s`=wSw;hE$8iwnUWxVS@@tm{dhvYcbHT zE)|Dz&aAYfT?KF@MFwjHc$GVVxG@3521rm0NgyWLab+MTdNBdSMoA|m!MCl#B=q3D z!d3*5xhDun^~+Ft6e(IL6^LM!j062c0U1WU3XIwr{XtlIKzI?E*nnL|1R%P$bhI&^ z5+Jh*#v0?9xwrS++VhQlV~rnX=KjQ;7k9dEw%%w>VCQYoyp10?D<0Xn>_P^b3r^1I z9&P<34Pg3#ts?PLCM18e-d0~@dauTDSpV|@s-6hRyT$||In*;KvxzdKu12Ly;B!^N z(Lg|ofMiR!O@J4`?P(Y_q7XyA7g%S}Y>=;1vL%SRX(>;TBr2S_3C_-`awi^8zDMuW z$eS)Bg$T(=&e{6P=X)w6zVnI5*2>+!^P3`hEkj`S`~BXI=Z<^(Y31*~1cBlnFX&9Y z0MW&zk%0lo-+C_#`_Me_%Y(QrqA^1+4ft3|LBJ!{7lemNh>=D@3^A`}dRxq~>w#m} ze0rW(SU27H-Cbh-zG(V>(YinGaE-PmdqFR7O=QO-^K>8=)0tuu`HOjeIHPb0cq3 z%j0cJujG)QBYeuP3%Uz7rHq0$E;ueYs}@nDDNCkb@GdQ>T}dx|u}Y`??dDElC9md2 z^hW@CFL`+LMSUyUL2K-YKB!aPUW#8OETx~Tut>Ud5&gMylCC_WFE5vL9FrQfVeAN; zC0khN843+Rj813M}-q~;xmb8 z-$GMVFt_NiW+Iq4OOlG(sSft*X)sq;&?l@vo>rHHW-p`_ayF+)#Nf9C8H{T6S;aemJpHX7T`p06-UCKeZH?URfU z{R&{z0kGh8zCcMP@Lb^#gJzl0QC3hl!LVG959*wr>%tiFvr)UkIzOJ9Kic%?p4>50 z+~I!hz{II<2d0|7H859PI~{m;nnzdfcESo$lI%-5?jSL2Or+OuDkM6O& zqMzr&I-Y~x|GQzsdf>S8Mqa-GnOV%B#*Avrq{hr@%%aAuYRtxB_OOX}V7YQi=7!;+ zOZ|NbQEhnmN?=Gb4-0)*Xe2A$X@!<-BcXsGN1fM(g)1S*Z6U4lOBSfUd?UhuK{>_L_Q0tP_$13spB)iWyIP4!8fYyj~NH2}_4A7hK%jNT3 z>K8&`B7ug6yE4Aq7SG6z8nfeR?k^utM6+_iy~;IF`9*kza3WikiV&ey zDV6H&DGd*6%WA~UAE3q%AsV8<^JtYsFy6&(PXmloX~^?f8devFDbzF1X-yJ%o%UCd ztCs0^mCnIC5|>h*mJqu98wsaCmTH!NL$G31V#zxgM=q60h!|EyWu#z@pza)~Ect5B zP-4kD(f#ct%UW{r7NrJ_(Fw%-B&S`bgf&G8ypfgArF@5K*b+1@Q)eopm6coJ_{^c1CXcQS+-0n zbC^wXnUzO1WeZdT8qJ6%TEKgj>HiGnD>Y@H1X%$JGE6T*yyRR_TS*^Q|C738fbbm+6I~6lGR^OL2-4 z%3Id(B|8(?hIn!f4kE0$Leb{sJy9De_79)gZhK|ex0J( zsqjW@s%=Na-75qb>((<6s$$aq2E2_)Ya{L>J^cd#zc)PWWm+BY6a5}}UmzwD$f0T?;TxzaFf(dJJi$BtKq8|(EP@lIkVQ%8pw#dtm40&Hr zEK$4z!+rfjFL;LqZzvGFIh&0u0Ulz zBxS4Rx=-@09qsiEP1L|_wSkM09V&Iu0^-0QxYRQ;5SH9(9rf)kzEk{h$*mSwQXm!S zKLtWU(_i(6k2h)ii~Tmsgoc!>*4fe2(b00Ut;5&JGUTdNFL_{9h>{8`>viEZ8aooB z2){zXO$x>-xJAL&5mcnfS{38vVf2xi$?!u-!Hv+2Jd*2J$H}%vm@4>XwV!0~xE=}z z2Af{)4@>&tkYquB_Y2S@8TtZYMv~C@9;rWdywjKXe9%UA8p#lo&r2p2mkfcIdl3v> zO}3ZuJCF&fVj&09Ji$(asJK5wdl(r}13CqA0Z;br{IBPKqj2Jthn`Jh<-v!ZgQLgd zMQcWz;ucTLk~eF~i&+XDSPGz9w6*Exv+`~q`}(noNUWr8wxn*RTP&%ImNZ4Pj*K;Z zY~>(j*mPslCeyon8GMlT+>o{;dD~e)04R(Y78rWiAj_8&et0u8L)&f4ZkjzgPa8z_13Sk=Cj3Tk5dfGnqxtZFT-kDQkaYHnt@qVf3Jn>IZ6eGtISOWrl zHnbez-an9iY?I-~nHk5*3_mV2A>NBe>XqODb&rXp9fhe>f?0`&4ZJQ4Rt2nuLCR2Y z)N?R!xz04=2}Zyqd;+*9ZF-QT{5y zy_Hp-n(~LpSkI#ape?~t!dsvRu~k#Wc3E&n*j_f=V2FlcAi_J6i>w)kN5Z}kvv~>$ zzf3u^C?Fb*^~*BmVN#x^$yHi=y#E~~Lq`x0N8vP8Kce6xSH^+~_XRVTk-cD{n3eNn zE!Ze#=W+`c929eM1tkk<6iesq>5p89$xMh!@S|SpaQxXGUt}-ob`Rk$U(hophC@lA zds1-G#SIZJ#jftHLu%gcLqkS2y?sb!MHF+sYSQ|!Mbiu>5tcJNjv%v#)=aw_Xd5VYvY1vi&d_?kVQyXN*6Zhc|0 zH=4U?%=nr!o>TOiBVM#|&YB-DsrtZLB=;6gmBe)6=)3Tp&uX?Qi^vTSTF4N?fZ-hK zSP2+}KS757hQAPL!f4#fO^r}kQaF*h!0zJO?Q(+LM0FTYAOYmI$M2xySLZ4rE7w)4 zvYb>J7>H*RpARKuN<}{CFnW@Ld&Cn%tuY8YR^EghcC>HFMG|;Nezz$fF@oBa!Y$x>QXE4lPKlrazM=O9MM;im>UMfLFc(r<=yAXlvww7 zco*+h%rlghx9MAQAY`MF!bx(W~JP@{YtFU^mMCvm6uU!ysTSU$9Mc!WDQ?J z;O^|_z<-cTja}^z?|C+)gefp2$n%`21Rf(sMzRwoa8_EtE`f zc@VBM?Q_Igtukbx>PT8O6VHaKSq>?!ek6Q(Sm=*1UyF$4Z~(?if;ZxXOgIb$Wo75} zU?Ad#x_Y3$m*uG@o;Q+ty0X5P>=g%whl7}>Z6zk&YS)7HNQFw_=F*XG*ejbzEc6EE}jTnf`S8LDcaRtQC$+T0Z-NmCKLl??v^ zGC9F`L83BbCNYATJ7$R{b+1qF?Sxo zt+TGJ)45UC?lEH=EN;=QqVe>JfvIh=&HH9I?-MuepE)n4w~iS;au!Zpn+(4dc{4IO zAQsofodsf1RouCDa@$*b-`u-kF=pDwEFg6^ZW*)B=N8>LJ5}?)`aa0596LIn;f-Z% ze2}qmD)U~h_+0l~#_6$!1$}tV!Kbw?miLAZhM&BCh}eLYu3H- z;;+9rmGyT1-TdDvj8*KNt=KzrG+NOZUDGsv6t)#PCv_hctbJqi>zgP0qXk=I1v|uo z9n*)$EksSea_g1J9iS#-xm(5Dty34rjX!BQ5zTFTKkHuU9pjyj$y}Hc74Q2XZ~yyQ zv$<^#+-;9sTwc}3dpOtHZ)Z+EA1kXD%j%~OMAsgUrZrPhORX4r0&!C z%#v7U#cXE9$DB27Q`}qnmi0~Rx9l-*&8)X(y8K>+=&gx*kB_I#7gxt~DyMqizI^xc z)brEAOeIYB_M8xRw~1LN9~<;*GRE!TAF^_9@4B^Xu4MaM=JuKLAGXfzIhhb4%$3wk zKR09f-RB>c9Gc6j|BI}mc#cE=Jl(IfsRdmTp{jz2Z0A^xYC*{!*z zKizoP(Y()ee~+HReK|;aKT}6xR=wlcM$?Zs=qcRjMADD9(}N%HC~D0x{ih5Q;;;lt zvLg&dFf2p(2KJt*_+kkHEm>Bic#tI~xA0>W5@`OUWS#$r9#AlkzYr0gU*W*Zt=s(x zadn>*CHVrGo2IOb7+zYr8M{J6&2p+P@-idSKw@>PjbLWnj%_P_YEaKPn=+Y)l9#wQ4G#8SpOIvf|6=0DJAj))I zX2$54K#G`^B^-)!=p~uOBvjxaB#lbDKp22nBEq_uYduJ=9ExFCB(45o>FC;_CJ-ng9B0!4837=B% zj|e1_thRN?3$v`~HTeAlk~y(dLz@8f&G2`^b0J}kNh)<^IVVP!4T=xBfw!I6<1a>? zYoSsvT03ja8*|Ki3MTeOJ>~FTfV|N|a>SM!EfZ;zfyr)hOY{Al`&O}?7dyH{zFW*W z{ekn$V+&Wj?o-Gf-Rnq<<&m6j5>|Phum$Cff`j@WIO@|)@1^Mw{@l=OB=}{}Yn8<` ztb1re$bCesr0_o}U_AJ00vYD7S3=NP!8+m-&ZbLyM8Tv3(m7q)3>b0T?!y8KH?gCe zJzN1eMOKdn@RD4$L6fRlWCV@LRN~PY05V-iN)Xj2ji?mR44N1~YbbU!fnB_z3qZ_6 z4~h_aNQCBERR=2@ziV|eT2Lm#comeblCj_avQN_Ce`Tbofp4nsZ1TcKx9Rj#aBN=Z zJ=q3$&R^#})zQ*+)ay^^KOnX72VVAWBv7v)8z%vcWQ3#!-pv|#XT$i!}|(q(8BqATF&?@(X`UhBLwN4lba@srkpzRd96dL6<=vw#o^ z!{J`m)MbU12~S}ecSBJE@*cFC@C2n~m}gUfq#DZzAsNhXCYUc|Z~6qjau^T=M?1hg_xIgBMq(Ly6GzOVeAzdW+ zWw5j!kyYpcy~Z05Tne0~6*z>Zl7p~+X<}D|~)@oo?N1sJ977}NgbSEIrq;SGO zaCJgAmAFqECJBU!rQ$d&_wt$`(dF3l<`pz=UOqf((OVpF0E#kDF4GSYfV7Q@7j##{`x6We`C%KDc7=V+*eZ+w3 zGSDwVCIZPmL{85G{gD8)uJGUipXOiU;*h3PDmx10p&0Ae=TH}50c=#kt%7e9jUJn~ z*vIyK`HK@3bC%M$D_^wc&!>CFH+^k*()KXDV)SS{J#V65qC!kxA4}gno4z@kUMZ$k zO%2W*o!NcwsOUH@8jmL@IbOpqbqKupxf1Yb{0wC?V*+&sCz0&ME<6d}(iNrQkSB6` z(FPSO*>pQ-vE4BNqjB8C^?-lelMO}zcBu_koZ1rZxsWg>*VYw1r-9Cxg%s7qu6CM1 zOD?jIQg9h8pp@={hiTGfO)(i(ke0m919=;e-*vJKTBE{W>M~@bPGz*DW-7`1qBR@& zl{H(~i{?ZBQjMFelsG~$l5z;|Q1F)s5|+dQNnRJ(5WB-L5R^S$hcCVed?*y+d)e@W zkrz-X`y^*B@>V8NF>gLQ|Mv4=e}2*w&E7C}1Z!cz`pF~TX^iF9JjkyZyDD1uPKOc9 zXRVnS`L;inwfRBT=CSRfar<-@f=@rTEYgPNNtwAsM#4FkmeAkKso%)Gw=w&$&hST^ z4&l$Wgg$K`uR`vsAbJ%2pjr;wblC)x*)oV`Pr|POzgB?Izh#}b2o6;2w-gTg*n%fJ zoTt*)s8~?frEcNm$MQ(_!xuQMRGHW9Myq95r23x07Eq;H83d}cA(dta5Qyi*0Chqy zp!Kx|c+uBI^k-jL1c`(mWpSPgjtNMVtV~fD1Wu%qib=bBu;=>4Kms6{F%J=SY$#qp zHV}4>?D4>zj+kTZ1IJp~Y2KA}^OYN~eC_k2%^x~4$6ub*{pu@I>*sQ-*i%5NDO-_N zZH&4~MQiD&@c)fDN@g7;M7w1p->Zyz%YG8dz)RlKXV>R(@8#vz*Baid(INc#VNV!9 zg{y*EhZfXyI=Cz%7zewI+XRf%aeErnhz(#1=(G7=lUD}NT4EVp3CxImi&!?2tIYX| zh{1QR1`FLV-i94pMlO=;^)<*IUqV6%?N;a-duRwd?f80oV6(Iy>Fk@<;5m(1+RYuW z?ik-RacR;Yb!?6~YGxfZQAeFJvv{(peRu>eIW<> zUHAA9t2s6Kb3vV)saQk>E#NJ>zoFiP1GA}57#;~qrV+Ta(Pm2~=DnmHC+og}p23U$ zo&!=*FO19}DZq$n2;jui)6`|eRF85~r`|q$_w0=4cP_*#>t`$L=PDamPhln#R0!*k z^=WWORxVT_0~-P&DwE)_dA6kCUT4&COf()#c0GLvT}J#p3m7cok_b5%USwT z1vt`Rkn)J5Z~}q=x=+Fvitfo4vgRS6b)B_v0W|6x(Xw!*=gdz6Vpz%sjXl_jyDh z;Tq$3;R$2x8yLPQIbb&wgy|3*DxeG+=m-4|AC60xVD$-cEG=p>!Sssc6 z^P$T-W{5k|=N!z%#-01CPWa@=o;DMP@sWwc-?2|`f44NY@xX(P2VNifVdIaE{n4@e zBVzC6xyF8R&5L68l@F`~aKmx0eQf94dsuD1azOFjX_(kPsh`~WTl*%f#Qd5MoVAZk z$n=Ss^OlWmBc^!#RMb%@8lU^l`nNazyG_&G_qWe&dJeynGZSCOE+X>S%-o2`N=t|$ zC)dguS@%*6Qabdou-5HK^d7wQs3*xKM%anxpFs$71uY8^fll1=9vvC~P&m=M!0yw) zHT3)59{Dcvr{oOsr>Q6e6{xcDc_52rkd8q!nGK%-=@?cFiWSeS#rVmGck-X5mLXr%d`Sgg1jE2aO&Drxqi@m zrVF@+U4BeTl{;OdR2FopheS4%L=W;x1f~}wk}g+(w)VssnNRp z`1xmp*tf(N?+<%h_wfa5&(Vbmt?+v%^=;v&~aMX9~on|5%OY$s5-pHg6VW~28L!32jJ#)$?(LL;Us1PWCs zd8Dl(Jrym??6Cw=TMO5bZ3zi1ned*$kd(m)X1=MT9gapYvf@v+Gsy$vH!Ct_I(ZLO zLgGwDG0P2N{yf4#N~E3>4k0Evsi}Cvc4|xfQo7M8_mEHC7nGc28Nm%*DHAEjm8JbUBpL|!y) z9WB_7+S4@Yx%iaEPx3^T`P z4v3k@MCY+&X=^mM=Bd)f69cg|RkLfVrXn-7(KQFe;)6574@>V2L=Sg}^_^npMKQBi zboTz0s}?+oJNLHZmSdtJ>h_MAWN~9>%&~6Hu@0{3@riAv3}06p0-YK0BXwS9A${p#}9rRAHV5= zV-r4p&Qj6(dilZ7%i{GfEO1<_{v>&CUC{UH zKjye$ou6(Mem(X$B2_@>8z;w_Aab2Zn{$;VSh@4}wm;n7BAz-UULKs=G&Cn%jR~K7 zAbbwhd;z{(xSiB@(CwnWk7^6)F4JxE<$s+@CG5tOl|N=2>v+xfsF=&$@QIh(w(GTR z@}7}X*>jF+(bzE4h2S6mekaE2AOGIT6Hg!etg6mH*rvcmjx z$&j_eEt%!7Cxn$n0Z~S)N|vX9J5(ixqWzy3l9m6N%P9YeVg#zuJ^p_XA?1H+=_G9) zrqIfuzO`IuN@XsuT4pH}8cLax+n6l#7L|ir=DAvHQhc^dPL(oL-mjsr8I?qaGKz#8 z{EZo69&5yWme~G1Y!Q)k2P3xguup+71?#LmDO!m*uoswYT#E;3?17R%CNN}S%)~DY zowKcgVH`kG9IbHHtz7_MkuV^f#{jZHBr^wW?PGV3=Q4DBcB(Y$sS@4Q&=PFTn9nGk zJo;Aao2}E%==%N9j02+QARZjjWzT0hfaBbZy)bcgkiX{BkPn_t(chsBj&cbw*3;?}G`PS0{qP+E4Ie!---VX_;3 zA=3oro5xHOl8s*6JrRgT*ZQcni=q#TuYm|I=zMmqu}v~I``tsAP9uhjdQX_yp!5^Os)GF@e2 zsl??KMqFBydQ;1CF(J_rbgjV7f__~jYfdR^#V&iR*5pTrqbB)<2vbXfv4=Aau}NWK zc=KWb3vYqXrC!e4I0m+ySWFK6PxWqLO`GTH3Qo$SkLva_pGgbVwAhl5>bDt2P zr9I%UWZP!+2SNuTS^KJ=PHl^r&kem&x8?H@z1{vd|ALErs4u}f7W)IzO=gi*C9Gr~ zU`7@2XCN~)I^_YouF+-{e#uC_U$FXv-xhP(*XW7TSs{2$$*qzqR)C1bSs6^cf{%om zkUQ(P?Uqd}sE)d~vc1~I55Kk-yvP`~^~$*U;*A%@{OV}>R>p0NuX}CxgyD`ue(cH| zKQex5>~nA&a%J8r71wNw7Hof*wO!2GA-Z;s8RuP|n^#}GI^I8-GqwKTZ;n;$pRL#* ztvDEU9TKgF7{I~PEeyXn_-)aiXo*90ATXhy=e;t6@4;b3*jlk?2uiucsXKNYy@Q`r zG-CB`WWV-Hx(X(oh}h4nC7$TddlTv(>ZC~RIhiOtd*R&q^PA3{KNs38(}aked*$5c zaZ`xDJ&B|PL~=Ya%T0RXNY;HG8=4%!GmXU-X^NQHmP|G>83;7I93vYUNyrQjW%YXy z58$P8h@KO{BpG1HB@D|k2#3NHbIM4Cx-2ZMkwyyEPy2k%(hlfF|d56W^1~I+yhGE{8Io@z5D`s2sz_wg+n9!Xz=A`@6 zi-}6BH)35JUB>1jNw3SQ8?vrUy@zIU34lF?2_M2P?CMF27Q2?x1%1q(V42#I)XOv0 z=7*{cO2H+j@63W!RlR+pA#-m9{}3T0S_<#5$fDr2oFWm&b9s#sJ?X_#mT@X0!G z)jCvFr&33qzDngcf(BT(7`x!!1=S3eaHxTkBV$^DuVKr5E2(CZ{bfpJIK`_}sPcZz zHynNH8&cPysu5Gv!D!G${gAp2RZW_r4)!$=NZ7IO#cJR46wjjh9$5S?8I#m)QI%>b z+Vb?crmo{@4#m=?QsxOY>VJnC#hB`_4&Ke&@jP7?QY|a3C?Qn~W+QXR%SdU(qvj1- z`II^6_|EQKb$}?9AOrrP3j|pfTkM4%qzD%gM4TP+KB-U6^5aWG50VJf0;&Sst4N$ zd;_rIGfw9^OwG7T0pOThWHCteLme>!yAVC*C9B+#u}COO+>VaD{YA-bC&pI0($gcBW?M4l*Z&By*s;%9v1?oq|WM zGGf-@*$ulV{J-CMyYuUv(G9y3elfNJ-MGC#2ivDY`Z^{KV`L5^Zrqesi#Etq4|r!z zwsx|y^8jVhCk+vFKRAwQSV~uPEDpkWsUwg9IA;P4L+$DvJOy%S*^L4`4 zQ3G3?W)YEfE!U#y<)cSGbY$RQuDCOkHtCq%)4_Z zEN?hocTBcM^R`CaH85WFY=v0QvvoeRU}EPRdtcu>c{N&C70s-M-+Ws7g2igHKQ`FY z%%e@9jMkQpw#~bUp7eZuL}sun0nRt_!Crl@Q4CkEwFvT{c>cT3b&2~CJC?Gro5nH$@%M?lQEb{2aC z)KB(Jo|(Ehoi%M1*X@iscRecMJUI(qE<@ga?m*PD3A^dZUR1MLhbKn>jnS;bQO9A? zcsNNpU>6?(up=h&fHQb#TI;TI)Y0liN&4xrF{%O;Ty4*|L_sjeJnk$&ny^ z(m&_s(ou5>HN>X?DnYV{`lWK!)K1w$1W;U-TjNw$Hfoyv`o+@zEx zu}vo140Ir&x+Zlqe5`4HYdmF$wUJQ{L=U1e-SW;efHxlOI}g;;uEKb-zq2i7@jkG47iUQrR$dbWFU~byiMj?v>%dPOX}=h7v!l!ICXO!ar4Fd4 zAd@P=ai*|a3^`3BAC3eK(~7UblmR4EfBY-AzW2Gk=|Yl;YrgfUUTLLgh>`O zZpoFT>J=&i^sts9PhCo-KsBX|8rm~;Db-w2Q_8$*Db+VyQ_8w(sZ?~c`hM-a#MXw? zFAS0;>yd;JhHOhh<`|NtT^J^BFUb;2M6k!A4Crt&mK=V~4@<|$;2`WpK*-1usI$mb zk@>F5t2Qm?5Mwg9A&`;jiG;^%;l+{u0Y8K;Y%}_#<2Lg^{kO=?0OnUv1tyClZ{C{y zktg>~Zjz0U=GTbs+PH^S&yKgc-|U`xK3cXbTD)6y?}>W~?i`(Lcvu$p zc8cy@anG7>Tc!+eTkcw>)1n&>M7;+^_o38z7aZoa^v4FQoC_J8E#q;vgS>4Wzo@q@ zn^3|8_0wugLaBDbD4WbrLP?AGy-aqy4_z&OJw0L4&~0o<_)}D(oiLahNMCv-<*W+e zf$>Eff)l7G4N1@Bo~^#CR#W&M?f*ABUx_}u#=0xGVgk75-yWZCHj?xKy90G znG)(m31S~mFjXpp4clT8E3hlcUe>9)ZDk4cGqz64j4AKzlI7K~GsN=gQtQ&NZAwn% zj`yqTo^z`DgEuGpY?-fks(*mmEUT7!ifKnF+p49WVv|wIv1+NOm|B!d>oOxu@6sc5 zbr}%4yNqD^)pjjHULKi-cM?sHgB2O_k-4-(=v#SZwzG$9qX>CBXj9Ww$Z{onfNo5r zD_J^Z#*6U<5l08(K z!O&7K3uPHE!xqY4rZzIlP(_j;JXF*u^L@WX1&NW9+{q6~*q8k_o`nc~CnU2n%Tgp` zQH9c2f=2%X$}luS07hsuW4!M6&RaVtc6{St)Pa-8h+(LEW9RETCl5pmYonQUqsQYe z54lXb-f-gKT;bMta%K!auzcTg&;0wYXif9|(myMIzg*7}ibceIKpeRDf}^VxZKGR5M`Xns{R zyBd3Xt%Tz9?vziKzE%Ea`P7!^x}DL2U7~w;YP79E@#5CQ(TxpJZ=>jLihGJDBBFa! z+><+zHJ%>#luQl7CbgW_6{KXJVN$`>4falQz3gJMu1^fFCr_j2C^Tb zp9XX_L{B;Z)fz{q+ee%a|6V+}hx2 zyq}72Qp6)5sC7aEUIBL|%jKTXW$da4W7Eq$wcB?H64E8x<-p7S{=Pscd{@UHT5H8p z1T70robe@LG%?$#D&cq0Q>-IOQ4s^xn+IM!FtMKPz}6QnsERtOV~*Xkj@?nmUeUN$ z?h)iwFLh+vkTeQ&3p(w-g$sgxW2##8vV1`;3+@cZ4Cs<8P|`%A6INurE~q=G(;O{e zV2;dfk<-ei{y2Dx%sYpAuX|q@!WpjDU#Rl7&|&HW1Kt;8iYWx$-Ag0=LSPX1AykQ- zGz6R?SE+t}s4Dz&_ysRJL8l5{yfe=hgyj6YWeNZTa+L8|TdZm}5#994dGTJFdCxY=3FJ zEfc$nOdHi%U;sLq|?X-!g$cA83?015a<&dw!|Dh z0YAC{wmar&B)jIfld+0qzXbuI@GhYPQjZEV6tK}X34e$;QbgJ)v5$egT~ z0>z8V14VWr_ZyC#hTnG98vZw!>?~3beHLMf_UL)ZDSw1~xD?YB(aGJ|QxZoq_~o@S z3!kwn{ya?|yq0}%a(B5&P{_2qoP3)Bp^RTo6Ur(Y{X143(gbd?)z%`yMnxgWoVb{| zBM>oNDoH52o~1;^sq92nLb__5cn2(bbkJF)+=2Tu)RAzst1wAM2%U!|1CF4Es=Rl^ z-y<1%gClrg!SNS#VwYsS*wc%7#c?Dbg3a$B)ce3gWIj(V91JNgw-M%(Z^jwSF=%)fuzY zKCskIe=fH3`0UQ(v7PO+JKJMB&(H2WAKU4Bu+#SwR~Gcm%BdJ-vzD@$rTl@V{3ovL zpExw|xL374TO zc|-x3AL2GG-|p41U-#?SGZ{=1K*c_QFf)GZ9(UoBKy~Qal}}@j}d>G&;x@o5+z zg7hFoBClh=z!lFZ$)4X9zmdO9K!qPR2j~Vmx47rBnvx% z5E0|$o`FlSu?g1ICKBsv*|86j@!};sw_Z%-j1->2(bqv*z_BG%Wu>2tNW9EJ1jomk zgi?A#oPviGNE9Si0d*D$4=6PU0nY3j=!bVsNVd%(!e6HPGnKg|V^H=MVICzIsJ)He z!UXda+%CCQ67{Tu*H3> z_&)hrT$`FYgKNAf<|&)?lqK?(&AJiI7(X7%s+i5H09EZWj~-pff}zJ8#Fcc=?T+c| z(^tg&#J0CEUwG(PBO2E{P0b~sNB5XSu*m~LcpVNd!NDMOB*=gp2bkbk5Z=V%*xD3v zn2jRcOs1o}jm0gzJ#0;!q3CLssj#NOivd5pbNb1{Z_#R|iW+G)twi6$c$Ll$Nt_%) z2MqAeYWVTOZijey$x1%{bPBM%lF^__CN@%%owink2UK79GLxc8Znn`J4pSV!@rXDz z`a0kW*5x?iwWs$oFlfJ^aV>{DVZ}93Dq8Z9iw+ujnUGOC`?gQG9MjrI6nuo^MNGIu zVT0pEEEK~&o|y|airKmByaflvoLnIe8lhM^m$hcWMKL!WIpRS~9ztSJm5YWDNXz#4 z4-pyVv`3C~dxy{;@`WQs5`&GsDGq6l7irfVFYD&KvVaKa`#&69{%JY{ghkbq>r3SsQ9000=2 z>;`mFXsp{ zIcsseWc{4AC|W=@r1-UTzHypfpG{fD2Y9)UotZNx4&OZBjuvl`Uy1yHNt! zRP`#dPMDT__BxkY@ild-6G5dfbV+HZl6DQ>E0ki#WmSIJ?M-zws8TN^oiv2(LQWEq zEV;VOihXR*(q(GTU-B3~(Hq$JbLoXtf}|x^a?=@@gAmn{B2Tq0XzQ{htcySD&-J=?$ zJvpRg=el1Qf{Iwa&ef=~Iwgj;D@{?7aObTGEGVr&U%;eDzRr=*lwFM0OEJ!HV6dZS zs+4@mJDxc3gq(PnTzKNd6B3aux$q>dR#l{tLK{fm)h1b*kNBETo@jzwF<;--apWX_ zqKU_u4)qPqO^}8$)XoB;9VLf`9+PxclH+nes0>J}f!ZUUvk4K1Y z1lbrf+`d0z-r@zS-!F`yGNBJaMX~UA6#u^|pe023k7&5?Jp_^+n+7G0ZDi6PftY{B zW(ofn#fTmhXa_LKbx9Z=^aTYRY1o6Kn*H*^!J555nFvL~EF5Qtge7AD)T*S%hx{!) z`fn8ccM2F)%0$`!1wU!KadwX3|Db1HR6z7IZo>B|fkYa@A5)MNd9Vup0cm71osiYw zFlt66UAfjH^uf2B{R$B#R!wshn%793CS@&Mz9^O9`EQ|SP`$A6*n49y%xiF%#24TE z;)C+TKXGP!gwz8!4~!j{cc$MwcH>yWY@}kU^X&_FFU&djElL;H?U*tCApQI4bIzkb zP0Rno&B6&vvzWGbWK%k&9>P!+on6B zYj)49d$?x*SmSFa;JyW)YnVru={m3J#=-OV%m5Xc)c*>Bi!_8E3&vfi+c=ge92 zKFZ9d6}*tv?-E+QOKI(Y{(iwT4==o;PPmVlDcr)tdw z7~Rl*2Xx_ZHkGI*vs}m~R&8O>jQv+s;#;nV!tS0_Mm|aNo}i^Wx64$g*Y+0hw6P6) z8XAB(fsqNzROt7#wYA9z)FPv>pCXwysvHEKyW9iH{5eW6_rQ>+FCZ7P%J_@y76mf} zOfKsj2wV!o&eQM5S-z4xMDj_PWG2|RkYvBu6NcQDFuY`eo)Dt0P{M&iMqx6??)EWY zWr)ugB;L-u6q7B@!2t5N&?~@daHGNQ1DMCItOqWyX!VLN?|g3I#D%G*hsCwA;vKWa zJEFzAX4+!=+GqE*i_h^7_jSw`W9NZ0vqfj3MQ5YA=f<2LI&$NgWs`x2nOj8XmUv-B ztgv#nuyT5>SXdb?+&2Bv!@^w<#>KPCW7(Cn*_Bg053{$zi2w(-Zn?DuXU*2O&epb$ zZ;56cAG5@<2i)ep;<~*LUHe4qzNhCkC_;H&d)YTEFLcxFBMsR)bgZE#Hg@6&+=>#X zjL@`RQ4UPx{-4kcCQ5;3@ETLjBMK(ZEYNKlk~Q4qRwAEz(LBLSv^^e0dr7jU5K4Uq z7?qBCVMi;?gCc_bG%fu1hXp9HpWA&Fo!|Yr95UaiC^$s zWoVPFT=bk|9ZRl7&q?aItt)_Ada8}{z~bvywY)idv!jz zca9i*YU|kOr09Kk2&RE*H!!N5WT7R=FH>S&=5~C~(hCw@vli`kF4YcRsd$hs z%LP3!w?%Qe4q70=!as%2D&bCBR^lU4^({{l+7xiKGIXcnMwF2aQjIAftG!}rpMV2Y zEk)WlKmAmxQJv99fJ_+58Tn$IMQZ+mIE&Og2XPi#l^T+F;w+L!t6bJ~SxeCbfdbUK&GZlWEClG8y>2JUnq7Z@+OtfUOMzb0L|q0UcBy&IEWLGl^XkTl0kwW z*;M7_!8Ba=vJ?FjaW;v;>8N`AW?B$0bVgM&cFt#!HKUw=j^2bUB zDB~q3KLSc{Fd*yGNp2?)&jXKx_c3}xCN+#$7RkgGa)HPic^fB-yjLrh_++nPvsSi& ztL#0X@F?;U?_$|}{^8ycrtz0bmagziGO+cEn7UE!Ls!PoY`X^GeH7;uu=DeE8oBJ?%+(eS zjaOcOC7QGOfoJpg2ggja?&I+^5A7orOY_dAd1Gno9;U4$PT|VeugvCbnruQab`+Z< z#qz6W^Q)$=#j2ZUtDEnaiR~{uJk}G=O%@UJ7}229y!&W_G$n32ERKv?E`nyq$_}Ylgv>j ziYNP~{BIB69TqDO;K{+g;NCoY9~+FB>DZ(R4j0(RHfJpDA0Ov)o)fwc)7IgWpD;n> zALnmqE;s*heSKDQrRjd9qj|p#(quL#*i2wkBsn`6$Itj?#-}rmolO=7$BfG-2A@r| zHh}au2&BJB!LL&AYZUyL0yh6>CJ7rUn4mP0>o9tVfv@lvR1pQF5($xb>?_>Q^bqX- zY%8RuKkL+i0s16IA*!sxTKc)HBh1pTRonucxvrBs^*+*fQ_EIvC zYE9u3YEjzaXNAupl&^Nwxb(ux%yz<0#qA_*gOru>X__wKZs0z90sG@)U-zw*yQ_Sa z=Qj&R>ee6Ob&?exRsEOXJeAltU)~{|))L_zWYU^SSwaINef^iN3zcXp>wpc&MpjX5 z%X~4TY%(;}`1Y~8$EHKSb294K7sGbxj$`-iDfdkOEDW|ua~hEF&(t;&4#@(eEQ(RD zu1JZKp!RQR%;YA+%|~|%j=U6-O$Obk!A{eC#we#hO_i%n{!P@jx^%fUlu4VPxX(yh*_pDzGJj=RbJ>Zq4#^N6 z3`X3S6ZT$YKmoHnnAn}kvNOYi31%Zu(lChlszUvvM|{i9-4=R-%QXzY}S1g%h~Hr^mlZ9)yts;7C9 z0l6MKN2aQc_InKWz!HQI4>)zA?itXplMxUB4m+Fde-QK#u+faKA$th~V%7$Bmm7wM z>ZQU;1kLTStPU+S(-r9jSqG+s8pvdLqr!~^vG)k{$Dcx}R%}oN ztLa3F9XTk)04azHYidg&m*$;(np!JtGwv-zO zk|#@<*}F6nfAAS9lxtd;k+gch;N>$FUV(8mswOCzUzvHB2U(F9IdPz0k+#xptg{ zhJ92S5+wD=N?;&B!P?>;H}#wCOmYzR~3l^JiE-?)~|WoEl|t(MF3Ou4MQ z4Lg6WRu`F6tey)K>KSs;6w=g18_idEB9_WZW|J(eLt6k#mfp+5B)}7<&=ZmLN_i`u z>vEx|J0ihZdx5NYaULWoydQ% zrA6#G4H4v=^^BS<=2uK_V`;lIX`81zS=t^=TB>}N)33064Vrv);{G<4cv6$NTWolq zC7#tJ?h+4Eb3U-1Nlrv%3K`M*XGSiid$&y*_@qquszvYx4~?8Uiw9MVvc6R`v5l18`^c5{aSnuJ=}er1*N zDLHrb!b^ZO84*+Fo|h|y;cFor##vd(Bsi6_vK_^Gdxo@0K+3YSDJ#Uz>g+#+$rj1d z;B%^vJ)-*Phis-7u`0qYW&uVE2NRJ|1h)6R0chi)GRFoV!bsq*4vHn_5I_q7BXS^r zgK1wSnf65-m6gn_k5-B&s28@JK(F+nLV$U5RR#xohH#Ql6TV+^D@V$oIN8{QgMb=k zi@^*f`EY$_L$j}=<@u)M>GK^8^=&@Q12yxB`qRGSO>IXzo6(Pi%Y`EdWV7v2nZaNf zT^3ZWiqRKkW4P-O#^%#e?kCp#iPAai8WodH6wOs1WZ3nPng+c7j$!6GmU>W4ohbTF z!%P-SJdjLW^PS$gy(d^|o0=--Z+WL~?oc;NKCMZve5c`_T*VPhYV|w4_vCd1!ZT&b zNm=nk!BiKkbXziG{?rkcRHsdX;tNZac3G;NhTlEXlFQ9R@wAa8ZBLdgn1VAua_v@= zXp!53v=6KYlgM2`ss9QqjR!=bdeW-FwC53AgZ7Bto!ITev}dM~{Y*AQL896!`AKGFrOXp(X0>pPpty~KRtm`Zr42p_GAlaL7h9i!DQUtzGS&6I(GQmZ#_%oNWf{)BAnqJxIgU*|_P#MN6I(V` z+;WDJ&dRv6a;`&s=)CBxOgJx09ieB>FB{8JMqAPtiW@^pzcPlFjUBgao}{fLZYxRX z^yZ09D71pwkFm{vT!+X;Vj3B`hIH2p^iIzHFLDw75D&sK2uoLC2fcQy#NR(d$f8Ub z$dXn9IR^bBQ_sozc*m~LZ{Iu*jD9tK#2{Ql`N9_|XhVPt?1Maw0)=M~$f7_QK%-FlE(F{$03Kw3 zeV|K7#$Nps$N+0A7=CDmSK)Va=N)vkEeF44S;Qas6UJCULy?y5+- z_KL2(b5A8}_Qq@WlBZ^?PYiBdu+C44{ z3#G~LJ!ZjMG@jW~z5q*kdHaExld!i-Qe#QpOe&qi37n$Osojl`=|(5k%bx@KTv)eb zy-=WiYxXfBXeISK8?5v2Cz?WlYMnWp^i{`w)d^S48^#Y^Rdc;>Kl!yM|4VSuw-{SK zdOESKKjAt9pkI^i_1J5%nN!RzuyBQGIzgjz>gc^4&WLgtX$Z3dH|}sc?IK}}8qAtb zG%OaJz%(*^n%pQghPCj_C$7^;Oud7;9qB5Q^iJ`gG3R6cGsK2=qGmEEi!wftmQ$;| zlRX*5SJ!_voby@d>$WDg6QY$N=78Yy;!}`$G9as%l{g0p(<)}u@eE2EsL6VXjZ=A? zVHb`IS8$&-r;1hTmf1J+>WaB0h2O~)b0z)0ykf2bkQ_lG{7x<%fc78J8vsi^jg9>s z)c+nv4eqb2N$-E3e%KrNhi-ES#Le4KLH?mdjDGwqLE{TQ_YQj^)U4s0?C0tk`LzwHD$)JonLfS>E z-T|FXq5-$kcD95bke1M}_q}*o8Rzuk$?96>L)Q;>pL%_OWy}243a_Ae_5qHfa5mIm zfBChS0pZLhnaVg*Bv^G9&a}@OlcD-}s2*FD-GAA=5_)(sx)Ov3rNdKQt2XEKMAAj6 z=biwxgJ|4(?-;kD0QRNmy*&UgLti{weKGg!M>GB zEcyzSZ%jJ6XSv;b6h|V>uuITu4yzLm>8+_Oh*AKU5xt&qPR(2;2kdX=>2l_CStaSx z8lvP<0<}Yk9Vsw8DlQN$)|ehsZ12gAV;zTj!xx7du_HoMmgFGBu{4Hd1RYpBXrN+9 zCE<=WhAU#F72)=i+6IzdzD>7_vu-o7QMfa5q8m4hD#9lshdUzSN1k8>Bd0^*V_n@{ zz2RS*!zVgBPj>W@JudZSLBQ)0eu5xfqRGW|(sX!07`bxkayp>hWrfMt(OA~cSQlfx zi*+n|S8Q?}C-OjMc#0*1=c!>*8J3aHXxIv$M?F~AnnYUKzXkYT!rna9kuuq5_|L}x z$}??QwfkqDPT0$F)#fDE$C*6dVyCM^$-u67U{}JvTeLk4y@)-^)ot;>wuHR}cn`SR z>PdM5GbS;(ZJ~N0xM*GqESWPVgXJ;)V?#bs_xxOdH)#HDd&^pq$;U5N zdSvEGF*E0IXDk%60x~{hqnMp5D7x)HOd7HJQKeF(7F2745t|>)f1nXtfGs-92_71p znn&bE*uuv2yN~1+L)OSAW;g55kLDIbvuYd9mP|UG2<0)_+Uv=&2dr7Xfff`nxMo=F zA#P@LLDkVjcd#0eRs7ZiAr1lG4I*+>^aT^;Zk z!e@z)e2xMlBoa!oIVWBC0wq$3L)tk$JOU$MnU<)g0}SL#lE```a-l$cF?#VIkO}MR z-$!Xv9Kq|BeBD9;)wm^ywpz(K#?97vi=5Y4#dk2Bzy%3#g2T+!sv%820rL3Nrs_XUF>SvE9yjw(1 zBPdOiL}?HUb@Hjsqf31^9u-fW7JK{2H`rOwn~k~2-!r{YMH)I5fY#Cy3$T7cAhgrB zyB`a1g-FaX_TKMu13ZuAC54R0P~?`~o5`mL$c(KtV`5P=Q5*}4TDiht#>S$cG``yo zir!0V$k>!I2XG@vbk~@BpPwnSpgnZJ02oh5ho)BYJgx~*I3!mIS)Vs*P*#~^X5i{u z&ccWk|2F*FBXH*ob@MzjBN}sRDpO!}^Kvo;a_M8PVyL#`xle&vfl4@3tyLn@m)Cc- z)hN|$%-!vQFEo{q&^@@DHmIuq?q0{3Gpah!l`w`o`0jI2s|Jb~bt~knQOybUwMN~{ zLjd01m`AM*RE#TJJ_y zZ>MrTRK?DH(L(kuQFFA4^1$`%gQbw#ZO>}A{pb%lm?-B`_K%qR>y?}~Xt_FVD6IfA z<^7&f4S3aOtkFUk1ADjXl&s)60A!rt?=@p}m<5A{jW~4uOU(<{olz@7SJZ~k9knC$ zw5XmQU_AX%4@o++j&*&&g;F8i+sZ|}{dn8b@fki+7&RdDMa>8|c*D38=-;Y5L%R=j z;A$}E)escBh*irvkbYCtw85Psd|O)7WF2^>7H-3RsTNaY0VXUq#Ak(HTbWM)J5te z4UsL7K$s5kV}bqyO6tCg%2)>dq#1x6(fnvYyB-Zxa^dyJbz5juZ033$>Zrl^1!RD6 z%%SU7IyF6w;ysb~#8og;I*Ny^-yVjMbSCr>N7 zM`ywclN=t$qCfl;V7Z2e_TqPBgtnAO;f}QD=;k=H;DGHyc=!c)yTWazMFi*G!-LO^ z3x|-k@CXHOP;d~XsxB@VI-{4T1PEf*Ghy1Eh81}@=9jmv*me;W!?v=N%XQ<=TpSx4 z9s=Ow*u;zbVue`qDOco5NS5y4fl!Z2=LX1a2CP~nNibVUkpMGYsLC7}7l$0rP3SIa z8cYymU@^sDbkcTN7ABp!wWXW1n3H%l;@Z;1+C42RnI98gqW3}2C~3RgY)Q^yXU#=b znAF9gHjJ7IE77cWrh{~4KzaAr7Ly^O(jIlI4y`AwmvQ|iX_refq&W_#P1rbJfzNcg zCTW+l3np*()t3t=sl$*=PoSLuAw~gf1FbLxVjG1eN)o2PPha&v(JOY#i&2szG4%)G zZ|JgD>1(i~R&IQU9wK7Fymh4WnfikyX}&;LcTn&m3Q7mtBfVXn?T2~?P692VyL})E z=!C8l#|0we!X*m6Oci2mGh?9th)(HkOUu3?Nh;Vr~ zmEysauWYU^?!zM=I@~iJRW$FYnd(egohh4tHneOjUoEVfJGo#@6droxaK>nL6s-p9 z=PMVU6rUVf4qlourot^Vj=Mh2?R($ufz=;Y3NtboXj0DoC*JFv5|%rc56vC+B;v@0r>2cGE)l z(&^>a9`V$mc>bB>`HA@XiRDuhDR+3TeZ^fZy2B_SWe+9oo8$J)w|t?m*2u{6zOR=i zingYFMPKdI+`c7Bwgb~)ZZ9s+9?n?uJ>I(plfwx&Bfij`X3pYDIbAc>>B-qW-xvf& zam_BVdbhZ{`^Lx($N>3%zARO?WfoWn2gHLt;(^CupG}Ymz5E@5M1T;&;!1jJ;@%oa z-rp?97&o~KQqT`6+LkdON;3JUGx><&rCJ~Z%9tpQr^sJR_qFbfh0d*S)BT>0CWiFn5Tn>CQ zLn*U+ralE#qaC-mA^p^&cUrl8_l#xPShtEz^hYHIsu<9!ki^rCI&#o3X%?~aXiDw6wi-%?or>g4T>b~XMlyM>TubKDuAJn^{ zUGn2R@6la`ANxxw+>wWHow7NYU}?ej`w^7=CJWbCt+bN`^nr=fu$pfSw_kqojDAl` zir;@{D+zL=%FhZ3iy8r2+Txe3Z1F2EY$+zKYbhozYbl0RZPRTBV)rg;iTOhb{Ko`W zte2nNyIck46H;a$3NNZMUGr>lQt&`&zsS<&<-*WX@;g68X|zSJW@iDrWOGi730289 zYGBZ@SS0C}zCi6F8F}9Qo<q~=I|+)izr%>^pR`K+>*c^C=7@nV>!g7a{yPauxo9g10I7CIZ>U zmkI6_(Y>EcQU;)Gv3|DxTO?;A=cg!>iQ}5v^iy-nQiGgwM4n6{o*PaB4-`A8qT_eQ(Udcc@@(`_7Sn|LQkRt0E9W^ zWlH-|Z%a>|lvW=??)gApbLf=(HI>h+%Oq-CXPjv3hlE2qqPCt!C9eG{{i-jvJ&#R% z_+;yPp7TL-z_)QxE|~n(Y3l;t-HLvc1D+$746A3mRxc%n_PBHRL@En55k@KHm@R6S z#|6F^BsgC52G%G0z9#j^-?$&r8kik@ucII2kdi6VcQ@C$h_7zf;12sHb3dW~07{P^ zbM%cw9dJzV?6vfeDoEB5bpj^GbtSKvKyo09fA;@H3iZAGc-~RSsYw!gC2vgi(mLjj zy8FH#byRaYiJC3qQ%Y63&Je~vNxOSgNt#x?)?fSL5jK{UciETR4e9F;qhAqkHvq?! zxBVNxWzqyu}#mPx8wVHy27fJcP|3W!yb08_q5F_N`OJeKzi@lqE^q6c9f zlT|Tk3)Bvndf!b-`Og&mPYQlc0pUi(@`Nq0M6q1DbZ(3p6UK@=WU<>i)ouod7LJvL zhen6T8QXS=5Y1F6pL{>>`7Q8KVkEhSRGO8lNz9n!mMQ?N1V@s{$jOS(WJW(kjT!gz&xk;Y#NvOmZ0ZrfuFq~tI4h@)ta^MieQ%Y( zH&dv1_R&8(_LXB|eOsbrU$W#8vE-2@Pautj@mtYY^AVX^frhMZK7-6 zvUz{X6`Xc`=qgKjHqADy0Bl|YxJfy^SD$|4X%hJyxOPBAk4eGhYxA|{I}Q^`1R&_h zcf8*HTKDHq;PE61_f$PLHZD}n)F!;UrcJ9h_e}75 z>9x{?ZS&k?V&&dt+g^!x{^sz)k?(eYyF1bRh}d*UJakSRyd-`mCZ2mqbWiHoX#;0x zOJe`^nrk)Jo31q_y<6hmEs!{+Jbo&$;aWqguoUggSh&K*kGcFp>$C}S8oZ2lQrIr7 zQ(xc~_WE+ZvkyH1F<1wbn$d5MzUS$Lhhe8@reSXA?a{A|u9WRaICoAP0PZ9^oM>Dyo`#box^ebFS7jef{FpqoSiCZmeYD97UM(?;n+NEoXVAR`nyV z=jdMR?<|HXAozZ^BC%&H4yr2>EG0*uSK$291=9EwUL_JjqTmMu24MIW+rfCKG8Q-K zC?Q}n7X03FLPrU|Sdc+8$wV>eD1|Z>idi`T_1h>08H@F{1F?ITCm~dcEl<7xDx+VX zV4~&p)4#&N>6a_Y#0`Yb4fF|_W{=)GWEN(qk}D=(qi$)VE`xAAU(<9%oz~;pq0V$5 zl}V=wo5jrQ%Fz2O%wpnosq}fNEGEiW|FkXF+sUel%1Yq6T~|X@dkWVbEqrxhhLjd^&D>R?SO)!sUl1&h!t!HBxbA#?+eEaP-|qHA-~S2^IA5)|Jqnfm(}f& zn6ozA_h?6?14@-F(Q|lP$Dzt7;8)sNVH!x&bV}NUP*-A757v=yP*4341+35hAKWKf zDuxe$T{X>TfFkhlNjG&)+b{}*pCAX;i{C>AlKN3#=ECgcN}w@eZvxVcC-l}P>_Qk# zuXnxHm9lzgcFfd55MIRe5-&hgdnM44ux}S_tqX^jAMUzQlX&=Xad!ksPV!2cT`Pgz z2|IjM?OCjOH+Ul?M*3H}&LrB;iU$dg>(gq&eJg?e3Ht%jc5tcS-REz-lsIx)?CeL9 zGis8ymB7A)eZOcsu=w1&=Wjfn=zK!#!1|v*%?DS5WwXz{^{N=CoqIN8%eOTA(qPJ^ z2&u|0Y5nyeijd5*%CaFr5l$nYSOr5mmBhD9a?8${I~fRY2?14xK=mW_eT5&LL|P>I z^Ia}Tqy+&ZE%;@m1-~S|g(G!}nTd>85R=G=Dz!jah4^P=WSWr?om*FlcPB+;d9Y%& z3#5@;gJ;a3NkUZ80>>#i6&&HxC7S0}K{S;i#2`6U83}47A5cCfos9=2Ci=RrlME>3 zATNtrN#=Hi0eX0GuefTvG)&K}L=b1u)9 zg0q3QHjD1^xx(e@9Sct-s`rUi`xgh_t-Y}&@kmr`?-To;Sg}9(^DSHdoz>{GyB_9+ zAA``vJWL%Gy2x;frl;CLqSBI{c(bZQpwMvPABf7b725xx7!mN;CfyS{1#^0aw!UMo zT1l(5F4}(VRL9A1>`*=Fz)MN%NQ+)qjxK|Gc}kG#*vw`IxndWG*3AuHpl`t@2sSs! zs_SfSAaW#GgZ(4w0^c*|M&OEXVEow;MS~sN*3!zO_!B;lu!5%<9r_tEgqZ|f4!Ykr zJ4sJH#}wi5U*B|X6D+3WZ}zbTF|fnlf~m*A<>Z_1JA;6V@Aw_&3_RZcAoD~h6{?t- zMCz&<_y)Js{?cI1HA_J8Q6if}?jB^8fH2$)_7fheU>Iq_5Wj~7{k@9EKTSiWPx><4 z5hfm|kj)VkVLI{@!7Sl|_9}~-vCW;auqdD{eHj~z+DVbs!JV#0BO%mEZpk85B(=Z=1>v+n%-(FlEH8U|3rpk39OCG4xw55@CfJzc# zcTV9vMDXbFbRd->YDQ?(BLQo>X;Csc&eu;DIA6#8MofeZzFu%0v9N3F#MAP%b^Iz_ zYc?u^MF@%uatY_7`Rjzd7TuPFs_=++^E^a!#aZvhnJX!LR61-JM8LaOTSCvIq1 z`{r|QXsA;nHZ-iNFcR`n@?Vf^<3UMt9%{VnzBbgLH~89x+ExKPH4y8?j=p)65Kzvv zU*)Yc)nf=+$86eGMBkgapUElEwL=-(p)2qEO5cK@cn0}8`(DxapD|b8pX$$h@XfLh z2ySK82ZG&#TsY>MDs?VAa%Gfm_!j+9Cwf63>WT#Qb-}ezi>m&JZ{^6P;GF$b)ZJ5= z-2x5u9jQ0z%h;~+CE!M|yU&z*tCi>b@}dsq%Rou@oroJd6kOkp-QNVmninaevwo#n zD%*)W#q18r9of;>qI?A?h^m_+qyQT!)z=0sE@R5Uj0$M`_?tN>|6*QXwZnFroa=pP zfmU-;j{WUw%gn&Tl7QNxZo|hm(MCNitz@;qJy{~#Sb-#|mN-pP(Tkbcf~58I8MZ!+ zg?c+a)f+y30{^Fu9Sh4?MB&<6ZJjEdM>B=LqlW{sER!KTV)+OOIWW1g_n$3tCS6Kd zJ}xB@{)C>-C0ds?BR3YU!xN_jjdD&_I$$QSkV-QJh2ku=14!24&j@<4SB9IJ)H$|A zeO+_|C@mx#S8>!O-iS09s6Da#FqpG_OnEj|TTh_r3c9)?Y7c*A7zGc@pq3EkH%lw< zIW$nnlK4Z0E;SFe?S<+i>BQA*>4l!7D@nxtESRR_B%^P7sI>p5B7MEC#?FivI}5eOjQg)*>_u-jyv-c7m` zVrWak)|j+y6K&fT_yqh51NY(p1l=iTaMqCwHO51YVz5bU>V(Ak>f{@fV#(G8Pr|ib zbha+EFFd`_zZ4*?y)Kw98C+HXz*wwztXzH&SU;qSRU@-tcTNzcyZxqAMM^D`s! zgVW~|&KAh-(TZK4n*?r*shm~0>KiBLgJ18TJ1(}I6noE#CkI6Przka?c*|0y}uwSe{ARc&9w4Yv=xi`q<&dHs};yaHeJ>9D0 zIa$*duW4H;YfCuyLH7Lc-f8Qqzi4*Hd;Xf~j+DRT%V(!M*-z5n5cfCCKb-LIz!jk0 z@4dEn_PIo1O|q~dUf7TEQykcc6+` zHFs!X5A8CjlI@!Mc+D=_si=sXD}M`db`s+3t(^Bbf2Rejn%V91B`e{bi)OL?vI)p*H#XdX~K_|d9|T-bq}rRsGeLe=Y$uCEJa_#UhSu+7d# z@|potk~?KHkCl)?B2RyVJQyk|YE2SL{7}2}s1`O2qi9^Xg`2Dq#0f|M1uCkDJ2VR? z3DqKYFyNXEuHdHIL}O5Ej`FsDPr|-jN}Tj1Vpb;R>w#kW{dF=I=!1}f5n&EEc`TL@ z5as}W7GSoGA8~(=^m}T6-IW>yL7jT24f-L=2z;hNCl6BPKHxJ?fX_5)E3&>Sh5Nxi z0lu7cU@Zgf#HbPak@7X>0rb4^hQ|H()%3j(wJ7(oA~EDzL1`M6;I6_PxJB9UJ#C)b1oK`?aAsoe z<)xBkGw=`nNi#N7OJ@6*%?;Al+Mbz4v8-jGZJ}9owZCV6?;g-NNIstEM&d*bK0@FBGWx#GOF*Aj?fZJ4 z0P3oP$yPhCS7yxR%`SW8G^D7~sl8lnm%92;>Mp@eVZHVMY0taZx>gSw#EQl?cd3gGZOCG@G0UeH_k^YK98~yURh34T)$>mzoA$+<_9dGR+-y3qG?_f| zWc7dT2T^Uhh>&9=7+;7!95rNEwX1uHeSBY?E9k^=Gx`fjRrz!{%02|45 zvkn=6O&Wdbi0LZ_?6y1>NkNx!IJutI8_Lt`Vn<(Cp*WxTtbwWV9%SQwgOdA+F0&(- z#0vgy3bt~ZluCmNXTIs+5+G{0tWpil1h=npef>1EcAx;|d`s&oQN0P9KyRX20>oJ` zNTlE@1*d7$o}u6@g5H>450fYlKoAJl`CJUnQy}~<;T;NcWk`_MZ2lE!*a`y((iYVT zN!s`vJTwSwwM^s$wPL(7V+UxKOWWkZPe7P#Lr9|yMncOH{tY>@zWxIwhq{%`|7P3N z;Z;ZI%`;P-t5$!?>AO1e#z@K?xbD2>Ou775;dU$KFHZXF;{LijmV8^m)FIONvA;g~ z+T_gh%ce3?g{qvJST;Al?{X6w9^H5W=!~o8qS@`s=4##LE6b)5E&XbsEE%Y~8K{c~ zc7C&U!T8sE5P>7&((-BBGPHT5=P6qT=hz76y#0UKKYv{G?@d_20!y|!Y4ZRqJuhA! z7}!tq<1fC2G5L!-6p-KMU)-hOR|x6@0(-sPhzkelZNWPoeC`UD0pSV=+OdhW0}!s~ zAO)EiV8VurA@4cUBHmNINnZ`rAXH$c5D2~;2>PNlI6+Ja4%q9(&o!oE_D))H;WyE zC++dHh2_xu;@B{GwH_Tg2@_phm0k1=1F3fC3cT#Ws<|6Vg2FKDMh1u5;g`+^Kwc;Z zUKs;QFZm6J_Fyl0#U4-FM}>>$FJ3-3LS-@NHW>XLxK4A*?h8FQi?poWoj;+?PjBFo$*>FO1=JB2SN8 zmC+H{0-1v@C%PXXgCm`H4hPM@YgpkR`MX$t<30!Ec8DXS$qeTM=jo+BZe;HDr% zK`{lJDX5~LnS$LE^ignzf>$W`A_ad$0aF zKHwT5#>qFm{@81ey?*?)<6>cR!nhSa#PXp&gdkz1ho zcEr4g3=tcrTq%D*m&eE-0Cg;Z{V{snU%(iAR z7Tb^~igCH8Y$<;TS*JI(h+N>Kyr4f%Txf;&5T-ZEQq2R`sDcg;Kb!&p;MxF+x>-m~Vj@Q$?*&`8(Z`FuCO=C<%V z*FqNFzZT;8R;tZW)X4$uowe{DdMw7X#9MgRT0dXQAL7aJAMacX*^x&X3S2{hZ6BFU zyz?Vu-ntfI#d!J6cN<1{9zK^TTsw2x=cdcDO#`~$!?lm7P0@$2uPQW9s z6#;2&?NRv9<$Y_14KN#BYb3L+wPSfuZ(OT^a?)BW?JTeL=0WnbW{0?At%8<^Yeirz iGcBCaJ{?;&Y None: + sys.stderr.write(f"{message}\n") + + +def now_iso() -> str: + return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z") + + +def compact(value: Any) -> str: + return value.strip() if isinstance(value, str) else "" + + +def as_str(value: Any) -> str: + if isinstance(value, str): + return value + if value is None: + return "" + return str(value) + + +def parse_positive_int(value: str | None, fallback: int) -> int: + if value is None: + return fallback + try: + parsed = int(value) + except ValueError: + return fallback + return parsed if parsed > 0 else fallback + + +def load_json(path: Path) -> dict[str, Any]: + return json.loads(path.read_text(encoding="utf-8")) + + +def write_json_atomic(path: Path, payload: dict[str, Any]) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + tmp_path = path.with_suffix(path.suffix + ".tmp") + tmp_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8") + tmp_path.replace(path) + + +def write_jsonl(path: Path, rows: list[dict[str, Any]]) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("w", encoding="utf-8") as handle: + for row in rows: + handle.write(json.dumps(row, ensure_ascii=False) + "\n") + + +def read_jsonl(path: Path) -> list[dict[str, Any]]: + rows: list[dict[str, Any]] = [] + with path.open("r", encoding="utf-8") as handle: + for line in handle: + line = line.strip() + if not line: + continue + rows.append(json.loads(line)) + return rows + + +def sql_ident(name: str) -> str: + if not re.fullmatch(r"[A-Za-z_][A-Za-z0-9_]*", name): + raise ValueError(f"Invalid SQL identifier: {name!r}") + return name + + +def sql_str(value: str) -> str: + return ( + value.replace("\\", "\\\\") + .replace("'", "''") + .replace("\x00", "") + .translate({codepoint: None for codepoint in list(range(1, 9)) + [11, 12] + list(range(14, 32)) + [127]}) + ) + + +def sql_float4_array(values: list[float]) -> str: + parts: list[str] = [] + for value in values: + if value != value or value == float("inf") or value == float("-inf"): + parts.append("0") + continue + parts.append(repr(float(value))) + return f"ARRAY[{', '.join(parts)}]::float4[]" + + +@dataclass +class Config: + token: str + org_id: str + org_name: str + user_name: str + workspace_id: str + api_url: str + memory_table: str + sessions_table: str + + +def load_config() -> Config: + creds_path = Path.home() / ".deeplake" / "credentials.json" + creds: dict[str, Any] = {} + if creds_path.exists(): + try: + creds = json.loads(creds_path.read_text(encoding="utf-8")) + except json.JSONDecodeError as exc: + raise SystemExit(f"Failed to parse {creds_path}: {exc}") from exc + + env = os.environ + token = env.get("HIVEMIND_TOKEN") or env.get("DEEPLAKE_TOKEN") or creds.get("token") + org_id = env.get("HIVEMIND_ORG_ID") or env.get("DEEPLAKE_ORG_ID") or creds.get("orgId") + if not token or not org_id: + raise SystemExit("Missing Deeplake credentials. Run `deeplake login` or set HIVEMIND_* env vars.") + + return Config( + token=token, + org_id=org_id, + org_name=creds.get("orgName") or org_id, + user_name=creds.get("userName") or os.environ.get("USER") or "unknown", + workspace_id=env.get("HIVEMIND_WORKSPACE_ID") or env.get("DEEPLAKE_WORKSPACE_ID") or creds.get("workspaceId") or "default", + api_url=env.get("HIVEMIND_API_URL") or env.get("DEEPLAKE_API_URL") or creds.get("apiUrl") or DEFAULT_API_URL, + memory_table=env.get("HIVEMIND_TABLE") or env.get("DEEPLAKE_TABLE") or "memory", + sessions_table=env.get("HIVEMIND_SESSIONS_TABLE") or env.get("DEEPLAKE_SESSIONS_TABLE") or "sessions", + ) + + +class DeeplakeQueryError(RuntimeError): + pass + + +class DeeplakeApi: + def __init__( + self, + token: str, + api_url: str, + org_id: str, + workspace_id: str, + timeout_seconds: float = DEFAULT_TIMEOUT_SECONDS, + ) -> None: + self.token = token + self.api_url = api_url.rstrip("/") + self.org_id = org_id + self.workspace_id = workspace_id + self.timeout_seconds = timeout_seconds + + def query(self, sql: str) -> list[dict[str, Any]]: + body = json.dumps({"query": sql}).encode("utf-8") + url = f"{self.api_url}/workspaces/{self.workspace_id}/tables/query" + headers = { + "Authorization": f"Bearer {self.token}", + "Content-Type": "application/json", + "X-Activeloop-Org-Id": self.org_id, + } + + last_error: Exception | None = None + for attempt in range(MAX_RETRIES + 1): + req = urllib_request.Request(url, data=body, headers=headers, method="POST") + try: + with urllib_request.urlopen(req, timeout=self.timeout_seconds) as resp: + payload = json.loads(resp.read().decode("utf-8")) + columns = payload.get("columns") or [] + rows = payload.get("rows") or [] + return [dict(zip(columns, row, strict=False)) for row in rows] + except urllib_error.HTTPError as exc: + response_body = exc.read().decode("utf-8", errors="replace") + last_error = DeeplakeQueryError( + f"Query failed with HTTP {exc.code}: {response_body[:300]}" + ) + if exc.code in RETRYABLE_STATUS_CODES and attempt < MAX_RETRIES: + time.sleep(BASE_DELAY_SECONDS * (2**attempt)) + continue + raise last_error from exc + except urllib_error.URLError as exc: + last_error = DeeplakeQueryError(f"Query failed: {exc.reason}") + if attempt < MAX_RETRIES: + time.sleep(BASE_DELAY_SECONDS * (2**attempt)) + continue + raise last_error from exc + except TimeoutError as exc: + last_error = DeeplakeQueryError( + f"Query timeout after {self.timeout_seconds:.1f}s" + ) + raise last_error from exc + + raise DeeplakeQueryError(str(last_error or "Query failed")) + + +def ensure_sql_columns(api: DeeplakeApi, table_name: str, specs: list[tuple[str, str]]) -> None: + table = sql_ident(table_name) + for column_name, ddl in specs: + column = sql_ident(column_name) + try: + api.query(f'ALTER TABLE "{table}" ADD COLUMN IF NOT EXISTS "{column}" {ddl}') + except DeeplakeQueryError: + pass + + +def ensure_embedding_index(api: DeeplakeApi, table_name: str, column_name: str) -> None: + table = sql_ident(table_name) + column = sql_ident(column_name) + index_name = sql_ident(f"idx_{table_name}_{column_name}".replace("-", "_")) + try: + api.query( + f'CREATE INDEX IF NOT EXISTS "{index_name}" ON "{table}" USING deeplake_index ("{column}")' + ) + except DeeplakeQueryError: + pass + + +def join_sections(sections: list[tuple[str, str]]) -> str: + return "\n".join( + f"{label}: {value}" + for label, value in sections + if value + ) + + +def truncate_text(text: str, max_chars: int) -> str: + normalized = text.strip() + if len(normalized) <= max_chars: + return normalized + omitted = len(normalized) - max_chars + return f"{normalized[:max_chars].rstrip()}\n[truncated {omitted} chars]" + + +def try_parse_object(value: Any) -> dict[str, Any] | None: + if value is None: + return None + if isinstance(value, str): + try: + parsed = json.loads(value) + except json.JSONDecodeError: + return None + return parsed if isinstance(parsed, dict) else None + return value if isinstance(value, dict) else None + + +def maybe_parse_json(value: Any) -> Any: + if not isinstance(value, str): + return value + stripped = value.strip() + if not stripped or stripped[0] not in "[{": + return value + try: + return json.loads(stripped) + except json.JSONDecodeError: + return value + + +def snake_case(name: str) -> str: + return re.sub(r"([A-Z])", r"_\1", name).lower() + + +def camel_case(name: str) -> str: + return re.sub(r"_([a-z])", lambda match: match.group(1).upper(), name) + + +def format_tool_input(raw: Any) -> str: + payload = maybe_parse_json(raw) + if not isinstance(payload, dict): + return str(payload or "") + parts: list[str] = [] + for key in TOOL_INPUT_FIELDS: + if key not in payload: + continue + value = payload[key] + parts.append(f"{key}: {value if isinstance(value, str) else json.dumps(value, ensure_ascii=False)}") + for key in ["glob", "output_mode", "limit", "offset"]: + if key in payload: + parts.append(f"{key}: {payload[key]}") + return "\n".join(parts) if parts else json.dumps(payload, ensure_ascii=False) + + +def format_tool_response(raw: Any, original_input: Any, tool_name: str | None) -> str: + payload = maybe_parse_json(raw) + if not isinstance(payload, dict): + return str(payload or "") + + if tool_name in {"Edit", "Write", "MultiEdit"}: + file_path = payload.get("filePath") + return f"[wrote {file_path}]" if file_path else "[ok]" + + stdout = payload.get("stdout") + if isinstance(stdout, str): + stderr = payload.get("stderr") + return stdout + (f"\nstderr: {stderr}" if stderr else "") + + content = payload.get("content") + if isinstance(content, str): + return content + + file_payload = payload.get("file") + if isinstance(file_payload, dict): + file_content = file_payload.get("content") + if isinstance(file_content, str): + return f"[{file_payload.get('filePath', '')}]\n{file_content}" + base64_value = file_payload.get("base64") + if isinstance(base64_value, str): + return f"[binary {file_payload.get('filePath', '')}: {len(base64_value)} base64 chars]" + + for key in ("filenames", "matches", "results"): + value = payload.get(key) + if isinstance(value, list): + if key == "results": + rendered = [ + item if isinstance(item, str) + else item.get("title") or item.get("url") or json.dumps(item, ensure_ascii=False) + for item in value + ] + else: + rendered = [item if isinstance(item, str) else json.dumps(item, ensure_ascii=False) for item in value] + return "\n".join(rendered) + + input_payload = maybe_parse_json(original_input) + kept: dict[str, Any] = {} + for key, value in payload.items(): + if key in TOOL_RESPONSE_DROP: + continue + if value in ("", False, None): + continue + if isinstance(input_payload, dict): + if key in input_payload and json.dumps(input_payload[key], sort_keys=True, ensure_ascii=False) == json.dumps(value, sort_keys=True, ensure_ascii=False): + continue + snake = snake_case(key) + if snake in input_payload and json.dumps(input_payload[snake], sort_keys=True, ensure_ascii=False) == json.dumps(value, sort_keys=True, ensure_ascii=False): + continue + camel = camel_case(key) + if camel in input_payload and json.dumps(input_payload[camel], sort_keys=True, ensure_ascii=False) == json.dumps(value, sort_keys=True, ensure_ascii=False): + continue + kept[key] = value + + return json.dumps(kept, ensure_ascii=False) if kept else "[ok]" + + +def format_tool_call(payload: dict[str, Any]) -> str: + return ( + f"[tool:{payload.get('tool_name', '?')}]\n" + f"input: {format_tool_input(payload.get('tool_input'))}\n" + f"response: {format_tool_response(payload.get('tool_response'), payload.get('tool_input'), as_str(payload.get('tool_name')) or None)}" + ) + + +def normalize_content(path: str, raw: str) -> str: + if "/sessions/" not in path: + return raw + if not raw or raw[0] != "{": + return raw + try: + obj = json.loads(raw) + except json.JSONDecodeError: + return raw + + if isinstance(obj, dict) and (isinstance(obj.get("turns"), list) or isinstance(obj.get("dialogue"), list)): + return json.dumps(obj, indent=2, ensure_ascii=False) + "\n" + + def strip_recalled(text: str) -> str: + start = text.find("") + if start == -1: + return text + end = text.rfind("") + if end == -1 or end < start: + return text + head = text[:start] + tail = text[end + len(""):] + return re.sub(r"\n{3,}", "\n\n", (head + tail).lstrip()) + + out: str | None = None + if isinstance(obj, dict) and obj.get("type") == "user_message": + out = f"[user] {strip_recalled(as_str(obj.get('content')))}" + elif isinstance(obj, dict) and obj.get("type") == "assistant_message": + agent_type = as_str(obj.get("agent_type")) + agent_suffix = f" (agent={agent_type})" if agent_type else "" + out = f"[assistant{agent_suffix}] {strip_recalled(as_str(obj.get('content')))}" + elif isinstance(obj, dict) and obj.get("type") == "tool_call": + out = format_tool_call(obj) + + if out is None: + return raw + trimmed = out.strip() + if ( + not trimmed + or trimmed in {"[user]", "[assistant]"} + or re.fullmatch(r"\[tool:[^\]]*\]\s+input:\s+\{\}\s+response:\s+\{\}", trimmed) + ): + return raw + return out + + +def extract_transcript_text(message: Any) -> str: + payload = try_parse_object(message) + if not payload: + return "" + turns = payload.get("turns") + if not isinstance(turns, list): + turns = payload.get("dialogue") + if not isinstance(turns, list) or not turns: + return "" + + intro = join_sections( + [ + ("Session path", compact(payload.get("source_path"))), + ("Conversation", compact(payload.get("conversation_id"))), + ( + "Date", + compact(payload.get("date_time")) + or compact(payload.get("date")), + ), + ] + ) + rendered_turns: list[str] = [] + for turn in turns: + if not isinstance(turn, dict): + continue + speaker = ( + compact(turn.get("speaker")) + or compact(turn.get("role")) + or compact(turn.get("author")) + or "speaker" + ) + text = ( + compact(turn.get("text")) + or compact(turn.get("content")) + or compact(turn.get("utterance")) + ) + if text: + rendered_turns.append(f"[{speaker}] {text}") + transcript = "\n".join(rendered_turns) + return "\n".join(part for part in [intro, transcript] if part) + + +def fallback_session_text(path: str, message: Any) -> str: + transcript_text = extract_transcript_text(message) + if transcript_text: + return transcript_text + + if isinstance(message, str): + return normalize_content(path or "/sessions/unknown.jsonl", message) + if isinstance(message, dict): + return normalize_content(path or "/sessions/unknown.jsonl", json.dumps(message, ensure_ascii=False)) + return "" + + +def build_memory_embedding_text(row: dict[str, Any], max_chars: int) -> str: + return truncate_text( + join_sections( + [ + ("Path", compact(row.get("path"))), + ("Filename", compact(row.get("filename"))), + ("Project", compact(row.get("project"))), + ("Description", compact(row.get("description"))), + ("Summary", compact(row.get("summary"))), + ] + ), + max_chars, + ) + + +def build_session_embedding_text(row: dict[str, Any], max_chars: int) -> str: + text = compact(row.get("text")) + turn_summary = compact(row.get("turn_summary")) + fallback = "" + if not text and not turn_summary: + fallback = compact(fallback_session_text(as_str(row.get("path")), row.get("message"))) + turn_index_value = row.get("turn_index") + turn_index = "" + if isinstance(turn_index_value, (int, float)) and int(turn_index_value) == turn_index_value: + turn_index = str(int(turn_index_value)) + return truncate_text( + join_sections( + [ + ("Path", compact(row.get("path"))), + ("Event", compact(row.get("event_type"))), + ("Speaker", compact(row.get("speaker"))), + ("Source time", compact(row.get("source_date_time"))), + ("Turn index", turn_index), + ("Text", text), + ("Turn summary", turn_summary), + ("Content", fallback), + ] + ), + max_chars, + ) + + +def stable_embedding_source_hash(text: str) -> str: + return hashlib.sha256(text.encode("utf-8")).hexdigest() + + +def import_numpy(): + try: + import numpy as np + except ImportError as exc: + raise SystemExit( + "Missing dependency `numpy`. Install `scripts/requirements-harrier-embeddings.txt` first." + ) from exc + return np + + +def import_torch_and_transformers(): + try: + import torch + import torch.nn.functional as torch_f + from transformers import AutoModel, AutoTokenizer + except ImportError as exc: + raise SystemExit( + "Missing Python ML dependencies. Install `scripts/requirements-harrier-embeddings.txt` first." + ) from exc + return torch, torch_f, AutoModel, AutoTokenizer + + +def resolve_device(torch_module: Any, requested: str) -> str: + if requested != "auto": + return requested + if getattr(torch_module.backends, "mps", None) and torch_module.backends.mps.is_available(): + return "mps" + if torch_module.cuda.is_available(): + return "cuda" + return "cpu" + + +def resolve_torch_dtype(torch_module: Any, requested: str, device: str) -> Any: + normalized = requested.lower() + if normalized == "auto": + if device == "mps": + return torch_module.float32 + return "auto" + aliases = { + "fp16": torch_module.float16, + "float16": torch_module.float16, + "half": torch_module.float16, + "fp32": torch_module.float32, + "float32": torch_module.float32, + "float": torch_module.float32, + "bf16": torch_module.bfloat16, + "bfloat16": torch_module.bfloat16, + } + if normalized not in aliases: + raise SystemExit(f"Unsupported --dtype value: {requested}") + return aliases[normalized] + + +class HarrierEmbedder: + def __init__( + self, + model_id: str, + device: str, + dtype: str, + max_length: int, + local_files_only: bool, + cache_dir: str | None, + ) -> None: + self.model_id = model_id + self.requested_device = device + self.requested_dtype = dtype + self.max_length = max_length + self.local_files_only = local_files_only + self.cache_dir = cache_dir + self._np = None + self._torch = None + self._torch_f = None + self._tokenizer = None + self._model = None + self.device = "cpu" + self.vector_dim = 0 + self.dtype_name = "auto" + + def load(self) -> None: + if self._model is not None and self._tokenizer is not None: + return + + if self.cache_dir: + os.environ.setdefault("HF_HOME", self.cache_dir) + os.environ.setdefault("TRANSFORMERS_CACHE", self.cache_dir) + + np = import_numpy() + torch, torch_f, AutoModel, AutoTokenizer = import_torch_and_transformers() + + device = resolve_device(torch, self.requested_device) + torch_dtype = resolve_torch_dtype(torch, self.requested_dtype, device) + tokenizer = AutoTokenizer.from_pretrained( + self.model_id, + local_files_only=self.local_files_only, + ) + model_kwargs: dict[str, Any] = { + "local_files_only": self.local_files_only, + } + if torch_dtype == "auto": + model_kwargs["torch_dtype"] = "auto" + self.dtype_name = "auto" + else: + model_kwargs["torch_dtype"] = torch_dtype + self.dtype_name = str(torch_dtype).split(".")[-1] + model = AutoModel.from_pretrained(self.model_id, **model_kwargs) + model.eval() + model.to(device) + + self._np = np + self._torch = torch + self._torch_f = torch_f + self._tokenizer = tokenizer + self._model = model + self.device = device + self.vector_dim = int(getattr(model.config, "hidden_size")) + eprint( + f"[harrier] loaded {self.model_id} on {self.device} " + f"(dtype={self.dtype_name}, dim={self.vector_dim})" + ) + + def _last_token_pool(self, last_hidden_states: Any, attention_mask: Any) -> Any: + torch = self._torch + assert torch is not None + left_padding = bool((attention_mask[:, -1].sum() == attention_mask.shape[0]).item()) + if left_padding: + return last_hidden_states[:, -1] + sequence_lengths = attention_mask.sum(dim=1) - 1 + batch_size = last_hidden_states.shape[0] + indices = torch.arange(batch_size, device=last_hidden_states.device) + return last_hidden_states[indices, sequence_lengths] + + def embed_documents(self, texts: list[str]) -> Any: + if not texts: + np = import_numpy() + return np.zeros((0, self.vector_dim or 0), dtype=np.float32) + self.load() + assert self._tokenizer is not None + assert self._model is not None + assert self._torch is not None + assert self._torch_f is not None + assert self._np is not None + + batch = self._tokenizer( + texts, + max_length=self.max_length, + padding=True, + truncation=True, + return_tensors="pt", + ) + batch = {key: value.to(self.device) for key, value in batch.items()} + with self._torch.no_grad(): + outputs = self._model(**batch) + embeddings = self._last_token_pool(outputs.last_hidden_state, batch["attention_mask"]) + embeddings = self._torch_f.normalize(embeddings, p=2, dim=1) + output = embeddings.detach().to("cpu", dtype=self._torch.float32).numpy() + if not self._np.isfinite(output).all(): + raise RuntimeError( + f"Non-finite embeddings generated by {self.model_id} on " + f"device={self.device} dtype={self.dtype_name}. " + "Retry with --dtype fp32 or --device cpu." + ) + return output + + +def slugify(text: str) -> str: + return re.sub(r"[^A-Za-z0-9._-]+", "_", text.strip("/")) or "artifact" + + +def default_artifact_root(table: str, model_id: str) -> Path: + stamp = datetime.now().strftime("%Y%m%d-%H%M%S") + return Path("tmp") / "harrier-backfill" / f"{table}-{slugify(model_id)}-{stamp}" + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Two-phase Harrier embedding backfill: generate local vectors.npy first, then upload to Deeplake." + ) + subparsers = parser.add_subparsers(dest="command", required=True) + + def add_table_options(cmd: argparse.ArgumentParser) -> None: + cmd.add_argument("--table", choices=["memory", "sessions", "all"], default="all") + cmd.add_argument("--memory-table", default=None) + cmd.add_argument("--sessions-table", default=None) + cmd.add_argument("--artifact-dir", default=None) + cmd.add_argument("--embedding-column", default="embedding") + cmd.add_argument("--embedding-model-column", default="embedding_model") + cmd.add_argument("--embedding-source-hash-column", default="embedding_source_hash") + cmd.add_argument("--embedding-updated-at-column", default="embedding_updated_at") + + def add_embed_options(cmd: argparse.ArgumentParser) -> None: + add_table_options(cmd) + cmd.add_argument("--model-id", default=DEFAULT_MODEL_ID) + cmd.add_argument("--start-offset", type=int, default=0) + cmd.add_argument("--max-rows", type=int, default=None) + cmd.add_argument("--batch-size", type=int, default=DEFAULT_BATCH_SIZE) + cmd.add_argument("--scan-batch-size", type=int, default=DEFAULT_SCAN_BATCH_SIZE) + cmd.add_argument("--memory-max-chars", type=int, default=8000) + cmd.add_argument("--sessions-max-chars", type=int, default=8000) + cmd.add_argument("--max-length", type=int, default=DEFAULT_MAX_LENGTH) + cmd.add_argument("--device", default="auto") + cmd.add_argument("--dtype", default="auto") + cmd.add_argument("--force", action="store_true") + cmd.add_argument("--local-files-only", action="store_true") + cmd.add_argument("--cache-dir", default=None) + cmd.add_argument("--resume", action="store_true") + cmd.add_argument("--rebuild-plan", action="store_true") + + def add_upload_options(cmd: argparse.ArgumentParser) -> None: + add_table_options(cmd) + cmd.add_argument("--upload-batch-size", type=int, default=32) + cmd.add_argument("--resume", action="store_true") + + embed_cmd = subparsers.add_parser("embed", help="Plan remaining rows and generate vectors.npy locally.") + add_embed_options(embed_cmd) + + upload_cmd = subparsers.add_parser("upload", help="Upload vectors from a completed artifact into Deeplake.") + add_upload_options(upload_cmd) + + run_cmd = subparsers.add_parser("run", help="Run embed, then upload after vectors.npy is complete.") + add_embed_options(run_cmd) + run_cmd.add_argument("--upload-batch-size", type=int, default=32) + + return parser.parse_args() + + +def table_name_for_kind(config: Config, args: argparse.Namespace, table_kind: str) -> str: + if table_kind == "memory": + return args.memory_table or config.memory_table + if table_kind == "sessions": + return args.sessions_table or config.sessions_table + raise ValueError(f"Unsupported table kind: {table_kind}") + + +def artifact_dir_for_table(args: argparse.Namespace, table_kind: str) -> Path: + if args.artifact_dir: + root = Path(args.artifact_dir) + else: + root = default_artifact_root(args.table, getattr(args, "model_id", DEFAULT_MODEL_ID)) + if args.table == "all": + return root / table_kind + return root + + +def manifest_paths(artifact_dir: Path) -> tuple[Path, Path, Path]: + return ( + artifact_dir / "manifest.json", + artifact_dir / "rows.jsonl", + artifact_dir / "vectors.npy", + ) + + +def remaining_scan_limit(args: argparse.Namespace, offset: int) -> int: + if args.max_rows is None: + return args.scan_batch_size + remaining = max(0, (args.start_offset + args.max_rows) - offset) + return min(args.scan_batch_size, remaining) + + +def fetch_memory_rows(api: DeeplakeApi, args: argparse.Namespace, table_name: str, offset: int) -> list[dict[str, Any]]: + limit = remaining_scan_limit(args, offset) + if limit <= 0: + return [] + table = sql_ident(table_name) + return api.query( + "SELECT id, path, filename, summary, description, project, " + f'"{sql_ident(args.embedding_source_hash_column)}" AS embedding_source_hash, ' + f'"{sql_ident(args.embedding_model_column)}" AS embedding_model ' + f'FROM "{table}" ORDER BY path ASC LIMIT {limit} OFFSET {offset}' + ) + + +def fetch_session_rows( + api: DeeplakeApi, + args: argparse.Namespace, + table_name: str, + offset: int, + include_metadata: bool, +) -> list[dict[str, Any]]: + limit = remaining_scan_limit(args, offset) + if limit <= 0: + return [] + table = sql_ident(table_name) + select_columns = [ + "id", + "path", + "event_type", + "speaker", + "text", + "turn_summary", + "source_date_time", + "turn_index", + "message", + ] + if include_metadata: + select_columns.extend( + [ + f'"{sql_ident(args.embedding_source_hash_column)}" AS embedding_source_hash', + f'"{sql_ident(args.embedding_model_column)}" AS embedding_model', + ] + ) + return api.query( + f'SELECT {", ".join(select_columns)} ' + f'FROM "{table}" ' + f"ORDER BY path ASC, turn_index ASC, creation_date ASC LIMIT {limit} OFFSET {offset}" + ) + + +def plan_artifact( + api: DeeplakeApi, + config: Config, + args: argparse.Namespace, + table_kind: str, + artifact_dir: Path, +) -> dict[str, Any]: + artifact_dir.mkdir(parents=True, exist_ok=True) + manifest_path, rows_path, vectors_path = manifest_paths(artifact_dir) + table_name = table_name_for_kind(config, args, table_kind) + planned_rows: list[dict[str, Any]] = [] + scanned_rows = 0 + skipped_empty = 0 + skipped_existing = 0 + metadata_supported = True + used_metadata = table_kind == "memory" + + offset = max(0, args.start_offset) + while True: + if table_kind == "memory": + rows = fetch_memory_rows(api, args, table_name, offset) + else: + try: + rows = fetch_session_rows(api, args, table_name, offset, include_metadata=metadata_supported) + used_metadata = metadata_supported + except DeeplakeQueryError: + if metadata_supported: + metadata_supported = False + eprint("[sessions] metadata scan failed; falling back to scans without existing-hash checks") + rows = fetch_session_rows(api, args, table_name, offset, include_metadata=False) + used_metadata = False + else: + raise + + if not rows: + break + + scanned_rows += len(rows) + for row in rows: + text = ( + build_memory_embedding_text(row, args.memory_max_chars) + if table_kind == "memory" + else build_session_embedding_text(row, args.sessions_max_chars) + ) + if not text: + skipped_empty += 1 + continue + source_hash = stable_embedding_source_hash(text) + existing_hash = compact(row.get("embedding_source_hash")) + existing_model = compact(row.get("embedding_model")) + if not args.force and used_metadata and existing_hash == source_hash and existing_model == args.model_id: + skipped_existing += 1 + continue + planned_rows.append( + { + "id": as_str(row.get("id")), + "path": as_str(row.get("path")), + "source_hash": source_hash, + "text": text, + } + ) + eprint( + f"[{table_kind}] planned {len(planned_rows)} rows after scanning {scanned_rows} rows " + f"(skipped_empty={skipped_empty}, skipped_existing={skipped_existing})" + ) + offset += args.scan_batch_size + + manifest = { + "schema_version": ARTIFACT_SCHEMA_VERSION, + "table_kind": table_kind, + "table_name": table_name, + "model_id": args.model_id, + "artifact_created_at": now_iso(), + "artifact_updated_at": now_iso(), + "rows_file": rows_path.name, + "vectors_file": vectors_path.name, + "embedding_column": args.embedding_column, + "embedding_model_column": args.embedding_model_column, + "embedding_source_hash_column": args.embedding_source_hash_column, + "embedding_updated_at_column": args.embedding_updated_at_column, + "start_offset": args.start_offset, + "max_rows": args.max_rows, + "planned_rows": len(planned_rows), + "scanned_rows": scanned_rows, + "skipped_empty": skipped_empty, + "skipped_existing": skipped_existing, + "skip_existing_supported": bool(used_metadata), + "completed_vectors": 0, + "completed_uploads": 0, + "vector_dim": None, + "status": "planned", + "upload_complete": False, + } + write_jsonl(rows_path, planned_rows) + write_json_atomic(manifest_path, manifest) + return manifest + + +def load_or_plan_artifact( + api: DeeplakeApi, + config: Config, + args: argparse.Namespace, + table_kind: str, + artifact_dir: Path, +) -> tuple[dict[str, Any], Path, Path, Path]: + manifest_path, rows_path, vectors_path = manifest_paths(artifact_dir) + if manifest_path.exists() and rows_path.exists() and not args.rebuild_plan: + manifest = load_json(manifest_path) + if manifest.get("table_kind") != table_kind: + raise SystemExit(f"Artifact at {artifact_dir} is for {manifest.get('table_kind')}, not {table_kind}") + if not args.resume and manifest.get("status") in {"embedding", "complete"}: + raise SystemExit( + f"Artifact already exists at {artifact_dir}. Use --resume or --rebuild-plan." + ) + return manifest, manifest_path, rows_path, vectors_path + + manifest = plan_artifact(api, config, args, table_kind, artifact_dir) + return manifest, manifest_path, rows_path, vectors_path + + +def embed_artifact(args: argparse.Namespace, manifest: dict[str, Any], manifest_path: Path, rows_path: Path, vectors_path: Path) -> dict[str, Any]: + np = import_numpy() + records = read_jsonl(rows_path) + total_rows = len(records) + completed_vectors = int(manifest.get("completed_vectors") or 0) + if completed_vectors > total_rows: + raise SystemExit("Manifest completed_vectors exceeds rows.jsonl length") + + if total_rows == 0: + if not vectors_path.exists(): + np.save(vectors_path, np.zeros((0, 0), dtype=np.float32)) + manifest["status"] = "complete" + manifest["vector_dim"] = 0 + manifest["completed_vectors"] = 0 + manifest["artifact_updated_at"] = now_iso() + write_json_atomic(manifest_path, manifest) + eprint(f"[{manifest['table_kind']}] nothing to embed") + return manifest + + embedder = HarrierEmbedder( + model_id=manifest["model_id"], + device=args.device, + dtype=args.dtype, + max_length=args.max_length, + local_files_only=args.local_files_only, + cache_dir=args.cache_dir, + ) + embedder.load() + vector_dim = embedder.vector_dim + + if vectors_path.exists(): + vectors = np.load(vectors_path, mmap_mode="r+") + if tuple(vectors.shape) != (total_rows, vector_dim): + raise SystemExit( + f"Existing vectors.npy shape {tuple(vectors.shape)} does not match planned shape {(total_rows, vector_dim)}" + ) + else: + vectors = np.lib.format.open_memmap( + vectors_path, + mode="w+", + dtype=np.float32, + shape=(total_rows, vector_dim), + ) + + if completed_vectors == 0: + manifest["status"] = "embedding" + manifest["vector_dim"] = vector_dim + manifest["artifact_updated_at"] = now_iso() + write_json_atomic(manifest_path, manifest) + + for start in range(completed_vectors, total_rows, args.batch_size): + end = min(total_rows, start + args.batch_size) + batch_records = records[start:end] + batch_vectors = embedder.embed_documents([record["text"] for record in batch_records]) + vectors[start:end] = batch_vectors + if hasattr(vectors, "flush"): + vectors.flush() + manifest["completed_vectors"] = end + manifest["vector_dim"] = vector_dim + manifest["status"] = "embedding" if end < total_rows else "complete" + manifest["artifact_updated_at"] = now_iso() + write_json_atomic(manifest_path, manifest) + eprint(f"[{manifest['table_kind']}] embedded {end}/{total_rows}") + + manifest["status"] = "complete" + manifest["artifact_updated_at"] = now_iso() + write_json_atomic(manifest_path, manifest) + return manifest + + +def update_embedding_row( + api: DeeplakeApi, + manifest: dict[str, Any], + row_id: str, + vector: list[float], + source_hash: str, +) -> None: + table = sql_ident(manifest["table_name"]) + updated_at = now_iso() + api.query( + f'UPDATE "{table}" SET ' + f'"{sql_ident(manifest["embedding_column"])}" = {sql_float4_array(vector)}, ' + f'"{sql_ident(manifest["embedding_model_column"])}" = \'{sql_str(manifest["model_id"])}\', ' + f'"{sql_ident(manifest["embedding_source_hash_column"])}" = \'{sql_str(source_hash)}\', ' + f'"{sql_ident(manifest["embedding_updated_at_column"])}" = \'{sql_str(updated_at)}\' ' + f"WHERE id = '{sql_str(row_id)}'" + ) + + +def update_embedding_rows_batch( + api: DeeplakeApi, + manifest: dict[str, Any], + rows: list[tuple[str, list[float], str]], +) -> None: + if not rows: + return + table = sql_ident(manifest["table_name"]) + updated_at = now_iso() + values_sql = ", ".join( + ( + f"('{sql_str(row_id)}', {sql_float4_array(vector)}, '{sql_str(source_hash)}')" + ) + for row_id, vector, source_hash in rows + ) + api.query( + f'UPDATE "{table}" AS target SET ' + f'"{sql_ident(manifest["embedding_column"])}" = source.embedding, ' + f'"{sql_ident(manifest["embedding_model_column"])}" = \'{sql_str(manifest["model_id"])}\', ' + f'"{sql_ident(manifest["embedding_source_hash_column"])}" = source.source_hash, ' + f'"{sql_ident(manifest["embedding_updated_at_column"])}" = \'{sql_str(updated_at)}\' ' + f"FROM (VALUES {values_sql}) AS source(id, embedding, source_hash) " + f"WHERE target.id = source.id" + ) + + +def upload_artifact( + api: DeeplakeApi, + args: argparse.Namespace, + manifest: dict[str, Any], + manifest_path: Path, + rows_path: Path, + vectors_path: Path, +) -> dict[str, Any]: + np = import_numpy() + records = read_jsonl(rows_path) + total_rows = len(records) + if int(manifest.get("completed_vectors") or 0) < total_rows: + raise SystemExit( + f"Artifact {manifest_path.parent} is incomplete: embedded " + f"{manifest.get('completed_vectors', 0)}/{total_rows} rows." + ) + + ensure_sql_columns( + api, + manifest["table_name"], + [ + (manifest["embedding_column"], "float4[]"), + (manifest["embedding_model_column"], "TEXT NOT NULL DEFAULT ''"), + (manifest["embedding_source_hash_column"], "TEXT NOT NULL DEFAULT ''"), + (manifest["embedding_updated_at_column"], "TEXT NOT NULL DEFAULT ''"), + ], + ) + + vectors = np.load(vectors_path, mmap_mode="r") + if len(vectors) != total_rows: + raise SystemExit( + f"vectors.npy row count {len(vectors)} does not match rows.jsonl count {total_rows}" + ) + + completed_uploads = int(manifest.get("completed_uploads") or 0) + if completed_uploads > total_rows: + raise SystemExit("Manifest completed_uploads exceeds rows.jsonl length") + if completed_uploads and not args.resume: + raise SystemExit( + f"Upload already started for {manifest_path.parent}. Use --resume to continue." + ) + + for start in range(completed_uploads, total_rows, args.upload_batch_size): + end = min(total_rows, start + args.upload_batch_size) + batch_rows: list[tuple[str, list[float], str]] = [] + for index in range(start, end): + record = records[index] + vector = vectors[index].astype("float32") + if not np.isfinite(vector).all(): + raise SystemExit( + f"Artifact contains non-finite values at row {index} " + f"(id={record['id']}). Regenerate vectors before uploading." + ) + batch_rows.append( + ( + record["id"], + vector.tolist(), + record["source_hash"], + ) + ) + update_embedding_rows_batch(api, manifest, batch_rows) + manifest["completed_uploads"] = end + manifest["upload_complete"] = end >= total_rows + manifest["artifact_updated_at"] = now_iso() + write_json_atomic(manifest_path, manifest) + eprint(f"[{manifest['table_kind']}] uploaded {end}/{total_rows}") + + ensure_embedding_index(api, manifest["table_name"], manifest["embedding_column"]) + manifest["upload_complete"] = True + manifest["artifact_updated_at"] = now_iso() + write_json_atomic(manifest_path, manifest) + return manifest + + +def table_kinds(args: argparse.Namespace) -> list[str]: + if args.table == "all": + return ["memory", "sessions"] + return [args.table] + + +def run_embed_command(api: DeeplakeApi, config: Config, args: argparse.Namespace) -> list[Path]: + artifact_dirs: list[Path] = [] + for table_kind in table_kinds(args): + artifact_dir = artifact_dir_for_table(args, table_kind) + manifest, manifest_path, rows_path, vectors_path = load_or_plan_artifact( + api, + config, + args, + table_kind, + artifact_dir, + ) + embed_artifact(args, manifest, manifest_path, rows_path, vectors_path) + artifact_dirs.append(artifact_dir) + eprint(f"[{table_kind}] artifact ready at {artifact_dir}") + return artifact_dirs + + +def run_upload_command(api: DeeplakeApi, args: argparse.Namespace) -> None: + if not args.artifact_dir: + raise SystemExit("--artifact-dir is required for upload") + for table_kind in table_kinds(args): + artifact_dir = artifact_dir_for_table(args, table_kind) + manifest_path, rows_path, vectors_path = manifest_paths(artifact_dir) + if not manifest_path.exists() or not rows_path.exists() or not vectors_path.exists(): + raise SystemExit(f"Incomplete artifact directory: {artifact_dir}") + manifest = load_json(manifest_path) + upload_artifact(api, args, manifest, manifest_path, rows_path, vectors_path) + eprint(f"[{table_kind}] upload complete from {artifact_dir}") + + +def main() -> int: + args = parse_args() + config = load_config() + api = DeeplakeApi( + token=config.token, + api_url=config.api_url, + org_id=config.org_id, + workspace_id=config.workspace_id, + ) + + if args.command == "embed": + run_embed_command(api, config, args) + return 0 + if args.command == "upload": + run_upload_command(api, args) + return 0 + if args.command == "run": + run_embed_command(api, config, args) + upload_args = argparse.Namespace(**vars(args)) + upload_args.resume = True + run_upload_command(api, upload_args) + return 0 + raise SystemExit(f"Unsupported command: {args.command}") + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/requirements-harrier-embeddings.txt b/scripts/requirements-harrier-embeddings.txt new file mode 100644 index 0000000..086ebf4 --- /dev/null +++ b/scripts/requirements-harrier-embeddings.txt @@ -0,0 +1,4 @@ +numpy>=1.26 +safetensors>=0.4 +torch>=2.4 +transformers>=4.57 diff --git a/src/config.ts b/src/config.ts index af884b6..d6ceaa0 100644 --- a/src/config.ts +++ b/src/config.ts @@ -11,6 +11,11 @@ export interface Config { apiUrl: string; tableName: string; sessionsTableName: string; + graphNodesTableName: string; + graphEdgesTableName: string; + factsTableName: string; + entitiesTableName: string; + factEntityLinksTableName: string; memoryPath: string; } @@ -54,6 +59,11 @@ export function loadConfig(): Config | null { apiUrl: env.HIVEMIND_API_URL ?? env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", tableName: env.HIVEMIND_TABLE ?? env.DEEPLAKE_TABLE ?? "memory", sessionsTableName: env.HIVEMIND_SESSIONS_TABLE ?? env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + graphNodesTableName: env.HIVEMIND_GRAPH_NODES_TABLE ?? env.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", + graphEdgesTableName: env.HIVEMIND_GRAPH_EDGES_TABLE ?? env.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", + factsTableName: env.HIVEMIND_FACTS_TABLE ?? env.DEEPLAKE_FACTS_TABLE ?? "memory_facts", + entitiesTableName: env.HIVEMIND_ENTITIES_TABLE ?? env.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", + factEntityLinksTableName: env.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", memoryPath: env.HIVEMIND_MEMORY_PATH ?? env.DEEPLAKE_MEMORY_PATH ?? join(home, ".deeplake", "memory"), }; } diff --git a/src/deeplake-api.ts b/src/deeplake-api.ts index e3a72ec..27006c8 100644 --- a/src/deeplake-api.ts +++ b/src/deeplake-api.ts @@ -9,7 +9,7 @@ const log = (msg: string) => _log("sdk", msg); const TRACE_SQL = (process.env.HIVEMIND_TRACE_SQL ?? process.env.DEEPLAKE_TRACE_SQL) === "1" || (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; const DEBUG_FILE_LOG = (process.env.HIVEMIND_DEBUG ?? process.env.DEEPLAKE_DEBUG) === "1"; -function summarizeSql(sql: string, maxLen = 220): string { +export function summarizeSql(sql: string, maxLen = 220): string { const compact = sql.replace(/\s+/g, " ").trim(); return compact.length > maxLen ? `${compact.slice(0, maxLen)}...` : compact; } @@ -20,6 +20,29 @@ function traceSql(msg: string): void { if (DEBUG_FILE_LOG) log(msg); } +export class DeeplakeQueryError extends Error { + readonly sqlSummary: string; + readonly status?: number; + readonly responseBody?: string; + readonly sql?: string; + override cause?: unknown; + + constructor(message: string, args: { + sql?: string; + status?: number; + responseBody?: string; + cause?: unknown; + } = {}) { + super(message); + this.name = "DeeplakeQueryError"; + this.sql = args.sql; + this.sqlSummary = args.sql ? summarizeSql(args.sql) : ""; + this.status = args.status; + this.responseBody = args.responseBody; + this.cause = args.cause; + } +} + // ── Retry & concurrency primitives ────────────────────────────────────────── const RETRYABLE_CODES = new Set([429, 500, 502, 503, 504]); @@ -147,10 +170,12 @@ export class DeeplakeApi { } catch (e: unknown) { // Network-level failure (DNS, TCP reset, timeout, etc.) if (isTimeoutError(e)) { - lastError = new Error(`Query timeout after ${QUERY_TIMEOUT_MS}ms`); + lastError = new DeeplakeQueryError(`Query timeout after ${QUERY_TIMEOUT_MS}ms`, { sql, cause: e }); throw lastError; } - lastError = e instanceof Error ? e : new Error(String(e)); + lastError = e instanceof Error + ? new DeeplakeQueryError(e.message, { sql, cause: e }) + : new DeeplakeQueryError(String(e), { sql, cause: e }); if (attempt < MAX_RETRIES) { const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; log(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); @@ -176,9 +201,13 @@ export class DeeplakeApi { await sleep(delay); continue; } - throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); + throw new DeeplakeQueryError(`Query failed: ${resp.status}: ${text.slice(0, 200)}`, { + sql, + status: resp.status, + responseBody: text.slice(0, 4000), + }); } - throw lastError ?? new Error("Query failed: max retries exceeded"); + throw lastError ?? new DeeplakeQueryError("Query failed: max retries exceeded", { sql }); } // ── Writes ────────────────────────────────────────────────────────────────── @@ -450,4 +479,193 @@ export class DeeplakeApi { } await this.ensureLookupIndex(name, "path_creation_date_turn_index", `("path", "creation_date", "turn_index")`); } + + async ensureGraphNodesTable(name: string): Promise { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `node_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `node_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''`, + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`], + ] as const) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + // Best effort for older backends. + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "node_id", `("node_id")`); + } + + async ensureGraphEdgesTable(name: string): Promise { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `edge_id TEXT NOT NULL DEFAULT ''`, + `source_node_id TEXT NOT NULL DEFAULT ''`, + `target_node_id TEXT NOT NULL DEFAULT ''`, + `relation TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''`, + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) this._tablesCache = [...tables, name]; + } + for (const [column, ddl] of [ + ["source_session_ids", `TEXT NOT NULL DEFAULT ''`], + ["source_paths", `TEXT NOT NULL DEFAULT ''`], + ] as const) { + try { + await this.query(`ALTER TABLE "${name}" ADD COLUMN IF NOT EXISTS "${column}" ${ddl}`); + } catch { + // Best effort for older backends. + } + } + await this.ensureLookupIndex(name, "source_session_id", `("source_session_id")`); + await this.ensureLookupIndex(name, "source_target_relation", `("source_node_id", "target_node_id", "relation")`); + } + + async ensureFactsTable(name: string): Promise { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `subject_entity_id TEXT NOT NULL DEFAULT ''`, + `subject_name TEXT NOT NULL DEFAULT ''`, + `subject_type TEXT NOT NULL DEFAULT ''`, + `predicate TEXT NOT NULL DEFAULT ''`, + `object_entity_id TEXT NOT NULL DEFAULT ''`, + `object_name TEXT NOT NULL DEFAULT ''`, + `object_type TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `evidence TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `confidence TEXT NOT NULL DEFAULT ''`, + `valid_at TEXT NOT NULL DEFAULT ''`, + `valid_from TEXT NOT NULL DEFAULT ''`, + `valid_to TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''`, + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "session_predicate", `("source_session_id", "predicate")`); + await this.ensureLookupIndex(name, "subject_object", `("subject_entity_id", "object_entity_id")`); + } + + async ensureEntitiesTable(name: string): Promise { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `canonical_name TEXT NOT NULL DEFAULT ''`, + `entity_type TEXT NOT NULL DEFAULT ''`, + `aliases TEXT NOT NULL DEFAULT ''`, + `summary TEXT NOT NULL DEFAULT ''`, + `search_text TEXT NOT NULL DEFAULT ''`, + `source_session_ids TEXT NOT NULL DEFAULT ''`, + `source_paths TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''`, + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "canonical_name", `("canonical_name")`); + } + + async ensureFactEntityLinksTable(name: string): Promise { + const columns = [ + `id TEXT NOT NULL DEFAULT ''`, + `path TEXT NOT NULL DEFAULT ''`, + `filename TEXT NOT NULL DEFAULT ''`, + `link_id TEXT NOT NULL DEFAULT ''`, + `fact_id TEXT NOT NULL DEFAULT ''`, + `entity_id TEXT NOT NULL DEFAULT ''`, + `entity_role TEXT NOT NULL DEFAULT ''`, + `source_session_id TEXT NOT NULL DEFAULT ''`, + `source_path TEXT NOT NULL DEFAULT ''`, + `author TEXT NOT NULL DEFAULT ''`, + `mime_type TEXT NOT NULL DEFAULT 'application/json'`, + `size_bytes BIGINT NOT NULL DEFAULT 0`, + `project TEXT NOT NULL DEFAULT ''`, + `description TEXT NOT NULL DEFAULT ''`, + `agent TEXT NOT NULL DEFAULT ''`, + `creation_date TEXT NOT NULL DEFAULT ''`, + `last_update_date TEXT NOT NULL DEFAULT ''`, + ]; + const tables = await this.listTables(); + if (!tables.includes(name)) { + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (${columns.join(", ")}) USING deeplake`); + if (!tables.includes(name)) this._tablesCache = [...tables, name]; + } + await this.ensureLookupIndex(name, "fact_id", `("fact_id")`); + await this.ensureLookupIndex(name, "entity_id", `("entity_id")`); + await this.ensureLookupIndex(name, "session_entity_role", `("source_session_id", "entity_id", "entity_role")`); + } } diff --git a/src/embeddings/harrier.ts b/src/embeddings/harrier.ts new file mode 100644 index 0000000..b78174d --- /dev/null +++ b/src/embeddings/harrier.ts @@ -0,0 +1,166 @@ +import { + AutoModel, + AutoTokenizer, + LogLevel, + env, +} from "@huggingface/transformers"; + +const DEFAULT_MODEL_ID = "onnx-community/harrier-oss-v1-0.6b-ONNX"; +const DEFAULT_DOCUMENT_BATCH_SIZE = 8; +const DEFAULT_MAX_LENGTH = 32_768; + +export interface HarrierEmbedderOptions { + modelId?: string; + cacheDir?: string; + localModelPath?: string; + localFilesOnly?: boolean; + device?: string; + dtype?: string; + maxLength?: number; + batchSize?: number; +} + +export interface HarrierQueryOptions { + task?: string; +} + +type TokenizerLike = Awaited>; +type ModelLike = Awaited>; + +function toNumber(value: unknown): number { + return typeof value === "bigint" ? Number(value) : Number(value ?? 0); +} + +function tensorToRows(tensor: { data: ArrayLike; dims: number[] }): number[][] { + const [batchSize, width] = tensor.dims; + const rows: number[][] = []; + for (let batchIndex = 0; batchIndex < batchSize; batchIndex++) { + const offset = batchIndex * width; + const row: number[] = []; + for (let hiddenIndex = 0; hiddenIndex < width; hiddenIndex++) { + row.push(Number(tensor.data[offset + hiddenIndex] ?? 0)); + } + rows.push(row); + } + return rows; +} + +function l2Normalize(rows: number[][]): number[][] { + return rows.map((row) => { + let sumSquares = 0; + for (const value of row) sumSquares += value * value; + const norm = Math.sqrt(sumSquares) || 1; + return row.map((value) => value / norm); + }); +} + +function lastTokenPool(outputs: { data: ArrayLike; dims: number[] }, attentionMask: { data: ArrayLike; dims: number[] }): number[][] { + const [batchSize, sequenceLength, hiddenSize] = outputs.dims; + const rows: number[][] = []; + const maskData = attentionMask.data; + const hiddenData = outputs.data; + + for (let batchIndex = 0; batchIndex < batchSize; batchIndex++) { + let lastTokenIndex = sequenceLength - 1; + for (let tokenIndex = sequenceLength - 1; tokenIndex >= 0; tokenIndex--) { + const maskOffset = (batchIndex * sequenceLength) + tokenIndex; + if (toNumber(maskData[maskOffset]) > 0) { + lastTokenIndex = tokenIndex; + break; + } + } + + const row: number[] = []; + const hiddenOffset = ((batchIndex * sequenceLength) + lastTokenIndex) * hiddenSize; + for (let hiddenIndex = 0; hiddenIndex < hiddenSize; hiddenIndex++) { + row.push(Number(hiddenData[hiddenOffset + hiddenIndex] ?? 0)); + } + rows.push(row); + } + + return rows; +} + +function formatQuery(task: string, query: string): string { + return `Instruct: ${task}\nQuery: ${query}`; +} + +export class HarrierEmbedder { + readonly modelId: string; + private tokenizerPromise: Promise | null = null; + private modelPromise: Promise | null = null; + private readonly options: Required> & HarrierEmbedderOptions; + + constructor(options: HarrierEmbedderOptions = {}) { + this.modelId = options.modelId ?? DEFAULT_MODEL_ID; + this.options = { + ...options, + maxLength: options.maxLength ?? DEFAULT_MAX_LENGTH, + batchSize: options.batchSize ?? DEFAULT_DOCUMENT_BATCH_SIZE, + }; + if (options.cacheDir) env.cacheDir = options.cacheDir; + if (options.localModelPath) env.localModelPath = options.localModelPath; + env.logLevel = LogLevel.ERROR; + } + + async embedDocuments(texts: string[]): Promise { + return this.embedInternal(texts); + } + + async embedQueries(texts: string[], options: HarrierQueryOptions = {}): Promise { + const task = options.task ?? "Given a user query, retrieve relevant memory rows and session events"; + return this.embedInternal(texts.map((text) => formatQuery(task, text))); + } + + private async load(): Promise<{ tokenizer: TokenizerLike; model: ModelLike }> { + if (!this.tokenizerPromise) { + this.tokenizerPromise = AutoTokenizer.from_pretrained(this.modelId, { + local_files_only: this.options.localFilesOnly, + }); + } + if (!this.modelPromise) { + this.modelPromise = AutoModel.from_pretrained(this.modelId, { + local_files_only: this.options.localFilesOnly, + device: (this.options.device ?? "cpu") as any, + dtype: this.options.dtype as any, + }); + } + const [tokenizer, model] = await Promise.all([this.tokenizerPromise, this.modelPromise]); + return { tokenizer, model }; + } + + private async embedInternal(texts: string[]): Promise { + if (texts.length === 0) return []; + const { tokenizer, model } = await this.load(); + const rows: number[][] = []; + + for (let start = 0; start < texts.length; start += this.options.batchSize) { + const batch = texts.slice(start, start + this.options.batchSize); + const inputs = tokenizer(batch, { + padding: true, + truncation: true, + max_length: this.options.maxLength, + }) as Record; + const outputs = await model(inputs); + const sentenceEmbedding = (outputs as Record)["sentence_embedding"]; + if (sentenceEmbedding && typeof sentenceEmbedding === "object" && sentenceEmbedding !== null) { + rows.push(...l2Normalize(tensorToRows(sentenceEmbedding as { data: ArrayLike; dims: number[] }))); + continue; + } + + const lastHiddenState = (outputs as Record)["last_hidden_state"]; + const attentionMask = inputs["attention_mask"]; + if (!lastHiddenState || typeof lastHiddenState !== "object" || !attentionMask || typeof attentionMask !== "object") { + throw new Error(`Harrier model "${this.modelId}" did not return a usable embedding tensor`); + } + rows.push(...l2Normalize( + lastTokenPool( + lastHiddenState as { data: ArrayLike; dims: number[] }, + attentionMask as { data: ArrayLike; dims: number[] }, + ), + )); + } + + return rows; + } +} diff --git a/src/embeddings/text.ts b/src/embeddings/text.ts new file mode 100644 index 0000000..880ec2f --- /dev/null +++ b/src/embeddings/text.ts @@ -0,0 +1,138 @@ +import { createHash } from "node:crypto"; +import { normalizeContent } from "../shell/grep-core.js"; + +export interface MemoryEmbeddingRow { + path?: string; + filename?: string; + summary?: string; + description?: string; + project?: string; +} + +export interface SessionEmbeddingRow { + path?: string; + event_type?: string; + speaker?: string; + text?: string; + turn_summary?: string; + source_date_time?: string; + turn_index?: number; + message?: unknown; +} + +function compact(value: unknown): string { + if (typeof value !== "string") return ""; + return value.trim(); +} + +function joinSections(sections: Array<[label: string, value: string]>): string { + return sections + .filter(([, value]) => value.length > 0) + .map(([label, value]) => `${label}: ${value}`) + .join("\n"); +} + +function truncateText(text: string, maxChars: number): string { + const normalized = text.trim(); + if (normalized.length <= maxChars) return normalized; + return `${normalized.slice(0, maxChars).trimEnd()}\n[truncated ${normalized.length - maxChars} chars]`; +} + +function tryParseObject(value: unknown): Record | null { + if (!value) return null; + if (typeof value === "string") { + try { + const parsed = JSON.parse(value) as unknown; + return parsed && typeof parsed === "object" ? parsed as Record : null; + } catch { + return null; + } + } + return typeof value === "object" ? value as Record : null; +} + +function extractTranscriptText(message: unknown): string { + const payload = tryParseObject(message); + if (!payload) return ""; + const turns = Array.isArray(payload["turns"]) + ? payload["turns"] as Array> + : Array.isArray(payload["dialogue"]) + ? payload["dialogue"] as Array> + : null; + if (!turns || turns.length === 0) return ""; + + const intro = joinSections([ + ["Session path", compact(typeof payload["source_path"] === "string" ? payload["source_path"] : "")], + ["Conversation", compact(typeof payload["conversation_id"] === "string" ? payload["conversation_id"] : "")], + ["Date", compact(typeof payload["date_time"] === "string" ? payload["date_time"] : typeof payload["date"] === "string" ? payload["date"] : "")], + ]); + const transcript = turns + .map((turn) => { + const speaker = compact( + typeof turn["speaker"] === "string" + ? turn["speaker"] + : typeof turn["role"] === "string" + ? turn["role"] + : typeof turn["author"] === "string" + ? turn["author"] + : "", + ) || "speaker"; + const text = compact( + typeof turn["text"] === "string" + ? turn["text"] + : typeof turn["content"] === "string" + ? turn["content"] + : typeof turn["utterance"] === "string" + ? turn["utterance"] + : "", + ); + return text ? `[${speaker}] ${text}` : ""; + }) + .filter(Boolean) + .join("\n"); + + return [intro, transcript].filter(Boolean).join("\n"); +} + +function fallbackSessionText(row: SessionEmbeddingRow): string { + const transcriptText = extractTranscriptText(row.message); + if (transcriptText) return transcriptText; + + if (typeof row.message === "string") { + return normalizeContent(row.path ?? "/sessions/unknown.jsonl", row.message); + } + if (row.message && typeof row.message === "object") { + return normalizeContent(row.path ?? "/sessions/unknown.jsonl", JSON.stringify(row.message)); + } + return ""; +} + +export function buildMemoryEmbeddingText(row: MemoryEmbeddingRow, maxChars = 8_000): string { + return truncateText(joinSections([ + ["Path", compact(row.path)], + ["Filename", compact(row.filename)], + ["Project", compact(row.project)], + ["Description", compact(row.description)], + ["Summary", compact(row.summary)], + ]), maxChars); +} + +export function buildSessionEmbeddingText(row: SessionEmbeddingRow, maxChars = 8_000): string { + const text = compact(row.text); + const turnSummary = compact(row.turn_summary); + const fallback = (!text && !turnSummary) ? compact(fallbackSessionText(row)) : ""; + return truncateText(joinSections([ + ["Path", compact(row.path)], + ["Event", compact(row.event_type)], + ["Speaker", compact(row.speaker)], + ["Source time", compact(row.source_date_time)], + ["Turn index", Number.isFinite(row.turn_index) ? String(row.turn_index) : ""], + ["Text", text], + ["Turn summary", turnSummary], + ["Content", fallback], + ]), maxChars); +} + +export function stableEmbeddingSourceHash(text: string): string { + return createHash("sha256").update(text).digest("hex"); +} diff --git a/src/hooks/bash-command-compiler.ts b/src/hooks/bash-command-compiler.ts index afa51c1..0792966 100644 --- a/src/hooks/bash-command-compiler.ts +++ b/src/hooks/bash-command-compiler.ts @@ -1,5 +1,5 @@ import type { DeeplakeApi } from "../deeplake-api.js"; -import { sqlLike } from "../utils/sql.js"; +import { sqlLike, sqlStr } from "../utils/sql.js"; import { type GrepParams, handleGrepDirect, parseBashGrep } from "./grep-direct.js"; import { normalizeContent, refineGrepMatches } from "../shell/grep-core.js"; import { @@ -270,6 +270,23 @@ function normalizeSqlRef(ref: string): string { return ref.replace(/\s+/g, "").replace(/"/g, "").toLowerCase(); } +const INTERCEPTED_SQL_REFS = new Set([ + "memory", + "sessions", + "graph_nodes", + "graph_edges", + "memory_facts", + "memory_entities", + "fact_entity_links", + "hivemind.memory", + "hivemind.sessions", + "hivemind.graph_nodes", + "hivemind.graph_edges", + "hivemind.memory_facts", + "hivemind.memory_entities", + "hivemind.fact_entity_links", +]); + function extractSqlTableRefs(query: string): string[] { const refs: string[] = []; const regex = /\b(?:from|join)\s+((?:"[^"]+"|[a-zA-Z_][a-zA-Z0-9_]*)(?:\s*\.\s*(?:"[^"]+"|[a-zA-Z_][a-zA-Z0-9_]*))?)/gi; @@ -280,24 +297,23 @@ function extractSqlTableRefs(query: string): string[] { } export function queryReferencesInterceptedTables(query: string): boolean { - return extractSqlTableRefs(query).some((ref) => - ref === "memory" || - ref === "sessions" || - ref === "hivemind.memory" || - ref === "hivemind.sessions"); + return extractSqlTableRefs(query).some((ref) => INTERCEPTED_SQL_REFS.has(ref)); } export function queryUsesOnlyInterceptedTables(query: string): boolean { const refs = extractSqlTableRefs(query); - return refs.length > 0 && refs.every((ref) => - ref === "memory" || - ref === "sessions" || - ref === "hivemind.memory" || - ref === "hivemind.sessions"); + return refs.length > 0 && refs.every((ref) => INTERCEPTED_SQL_REFS.has(ref)); } export function queryUsesBareMemoryTables(query: string): boolean { - return extractSqlTableRefs(query).some((ref) => ref === "memory" || ref === "sessions"); + return extractSqlTableRefs(query).some((ref) => + ref === "memory" || + ref === "sessions" || + ref === "graph_nodes" || + ref === "graph_edges" || + ref === "memory_facts" || + ref === "memory_entities" || + ref === "fact_entity_links"); } function parsePsqlSegment(pipeline: string[], tokens: string[]): CompiledSegment | null { @@ -343,33 +359,88 @@ function parsePsqlSegment(pipeline: string[], tokens: string[]): CompiledSegment return { kind: "psql", query, lineLimit, tuplesOnly, fieldSeparator }; } -function normalizePsqlQuery(query: string, memoryTable: string, sessionsTable: string): string { +function normalizePsqlQuery( + query: string, + memoryTable: string, + sessionsTable: string, + graphNodesTable = process.env["HIVEMIND_GRAPH_NODES_TABLE"] ?? "graph_nodes", + graphEdgesTable = process.env["HIVEMIND_GRAPH_EDGES_TABLE"] ?? "graph_edges", + factsTable = process.env["HIVEMIND_FACTS_TABLE"] ?? "memory_facts", + entitiesTable = process.env["HIVEMIND_ENTITIES_TABLE"] ?? "memory_entities", + factEntityLinksTable = process.env["HIVEMIND_FACT_ENTITY_LINKS_TABLE"] ?? "fact_entity_links", +): string { let sql = query.trim().replace(/;+\s*$/, ""); sql = sql .replace(/\bFROM\s+"?memory"?\b/gi, `FROM "${memoryTable}"`) .replace(/\bJOIN\s+"?memory"?\b/gi, `JOIN "${memoryTable}"`) .replace(/\bFROM\s+"?sessions"?\b/gi, `FROM "${sessionsTable}"`) .replace(/\bJOIN\s+"?sessions"?\b/gi, `JOIN "${sessionsTable}"`) + .replace(/\bFROM\s+"?graph_nodes"?\b/gi, `FROM "${graphNodesTable}"`) + .replace(/\bJOIN\s+"?graph_nodes"?\b/gi, `JOIN "${graphNodesTable}"`) + .replace(/\bFROM\s+"?graph_edges"?\b/gi, `FROM "${graphEdgesTable}"`) + .replace(/\bJOIN\s+"?graph_edges"?\b/gi, `JOIN "${graphEdgesTable}"`) + .replace(/\bFROM\s+"?memory_facts"?\b/gi, `FROM "${factsTable}"`) + .replace(/\bJOIN\s+"?memory_facts"?\b/gi, `JOIN "${factsTable}"`) + .replace(/\bFROM\s+"?memory_entities"?\b/gi, `FROM "${entitiesTable}"`) + .replace(/\bJOIN\s+"?memory_entities"?\b/gi, `JOIN "${entitiesTable}"`) + .replace(/\bFROM\s+"?fact_entity_links"?\b/gi, `FROM "${factEntityLinksTable}"`) + .replace(/\bJOIN\s+"?fact_entity_links"?\b/gi, `JOIN "${factEntityLinksTable}"`) .replace(/\bFROM\s+"?hivemind"?\."?memory"?\b/gi, `FROM "${memoryTable}"`) .replace(/\bJOIN\s+"?hivemind"?\."?memory"?\b/gi, `JOIN "${memoryTable}"`) .replace(/\bFROM\s+"?hivemind"?\."?sessions"?\b/gi, `FROM "${sessionsTable}"`) - .replace(/\bJOIN\s+"?hivemind"?\."?sessions"?\b/gi, `JOIN "${sessionsTable}"`); + .replace(/\bJOIN\s+"?hivemind"?\."?sessions"?\b/gi, `JOIN "${sessionsTable}"`) + .replace(/\bFROM\s+"?hivemind"?\."?graph_nodes"?\b/gi, `FROM "${graphNodesTable}"`) + .replace(/\bJOIN\s+"?hivemind"?\."?graph_nodes"?\b/gi, `JOIN "${graphNodesTable}"`) + .replace(/\bFROM\s+"?hivemind"?\."?graph_edges"?\b/gi, `FROM "${graphEdgesTable}"`) + .replace(/\bJOIN\s+"?hivemind"?\."?graph_edges"?\b/gi, `JOIN "${graphEdgesTable}"`) + .replace(/\bFROM\s+"?hivemind"?\."?memory_facts"?\b/gi, `FROM "${factsTable}"`) + .replace(/\bJOIN\s+"?hivemind"?\."?memory_facts"?\b/gi, `JOIN "${factsTable}"`) + .replace(/\bFROM\s+"?hivemind"?\."?memory_entities"?\b/gi, `FROM "${entitiesTable}"`) + .replace(/\bJOIN\s+"?hivemind"?\."?memory_entities"?\b/gi, `JOIN "${entitiesTable}"`) + .replace(/\bFROM\s+"?hivemind"?\."?fact_entity_links"?\b/gi, `FROM "${factEntityLinksTable}"`) + .replace(/\bJOIN\s+"?hivemind"?\."?fact_entity_links"?\b/gi, `JOIN "${factEntityLinksTable}"`); return sql; } -function validatePsqlQuery(query: string, memoryTable: string, sessionsTable: string): string { +function validatePsqlQuery( + query: string, + memoryTable: string, + sessionsTable: string, + graphNodesTable = process.env["HIVEMIND_GRAPH_NODES_TABLE"] ?? "graph_nodes", + graphEdgesTable = process.env["HIVEMIND_GRAPH_EDGES_TABLE"] ?? "graph_edges", + factsTable = process.env["HIVEMIND_FACTS_TABLE"] ?? "memory_facts", + entitiesTable = process.env["HIVEMIND_ENTITIES_TABLE"] ?? "memory_entities", + factEntityLinksTable = process.env["HIVEMIND_FACT_ENTITY_LINKS_TABLE"] ?? "fact_entity_links", +): string { if (!queryUsesOnlyInterceptedTables(query)) { - throw new Error("psql queries must reference only memory, sessions, hivemind.memory, or hivemind.sessions"); + throw new Error("psql queries must reference only memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, fact_entity_links, or their hivemind.* aliases"); } - const sql = normalizePsqlQuery(query, memoryTable, sessionsTable); + const sql = normalizePsqlQuery( + query, + memoryTable, + sessionsTable, + graphNodesTable, + graphEdgesTable, + factsTable, + entitiesTable, + factEntityLinksTable, + ); const compact = sql.replace(/\s+/g, " ").trim(); if (!/^(select|with)\b/i.test(compact)) { throw new Error("psql mode only supports SELECT queries"); } - const allowedTables = new Set([memoryTable, sessionsTable]); + const allowedTables = new Set([ + memoryTable, + sessionsTable, + graphNodesTable, + graphEdgesTable, + factsTable, + entitiesTable, + factEntityLinksTable, + ]); const tableMatches = [...compact.matchAll(/\b(?:from|join)\s+"?([a-zA-Z_][a-zA-Z0-9_]*)"?/gi)]; if (tableMatches.length === 0) { - throw new Error("psql query must reference memory or sessions"); + throw new Error("psql query must reference an intercepted hivemind memory table"); } for (const match of tableMatches) { if (!allowedTables.has(match[1])) { @@ -379,6 +450,234 @@ function validatePsqlQuery(query: string, memoryTable: string, sessionsTable: st return sql; } +function decodeSqlLiteral(value: string): string { + return value.replace(/''/g, "'").trim(); +} + +function cleanSearchTerm(value: string): string { + return decodeSqlLiteral(value) + .replace(/^%+|%+$/g, "") + .replace(/^_+|_+$/g, "") + .trim(); +} + +function extractSqlSearchTerms(query: string): string[] { + const terms: string[] = []; + const push = (value: string) => { + const cleaned = cleanSearchTerm(value); + if (!cleaned) return; + if (cleaned.startsWith("/")) return; + if (/^\/summaries\/|^\/sessions\//.test(cleaned)) return; + if (!terms.includes(cleaned)) terms.push(cleaned); + }; + + for (const match of query.matchAll(/\b(?:i?like|=)\s+E?'((?:[^']|'')*)'/gi)) { + push(match[1] ?? ""); + } + for (const match of query.matchAll(/<\#>\s+E?'((?:[^']|'')*)'/gi)) { + push(match[1] ?? ""); + } + return terms; +} + +function chooseEntityTerms(terms: string[]): string[] { + const entityLike = terms.filter((term) => + /[A-Z]/.test(term) && + !/^\d+$/.test(term) && + term.split(/\s+/).length <= 4 + ); + return (entityLike.length > 0 ? entityLike : terms).slice(0, 2); +} + +interface GraphCandidateRow extends VirtualRow { + source_session_id?: string; + source_path?: string; + search_text?: string; +} + +function escapeRegex(value: string): string { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + +async function fetchGraphCandidates( + api: DeeplakeApi, + graphNodesTable: string, + graphEdgesTable: string, + terms: string[], +): Promise<{ sessionId: string; sourcePath: string }[]> { + const filteredTerms = [...new Set(terms.map((term) => term.trim()).filter(Boolean))].slice(0, 4); + if (filteredTerms.length === 0) return []; + + const entityTerms = chooseEntityTerms(filteredTerms); + const topicTerms = filteredTerms.filter((term) => !entityTerms.includes(term)); + const phrase = sqlStr(filteredTerms.join(" ")); + const nodeEntityClauses = entityTerms.map((term) => + `(canonical_name ILIKE '%${sqlLike(term)}%' OR aliases ILIKE '%${sqlLike(term)}%')` + ); + const nodeTextClauses = topicTerms.map((term) => + `search_text ILIKE '%${sqlLike(term)}%'` + ); + const edgeEntityClauses = entityTerms.map((term) => + `search_text ILIKE '%${sqlLike(term)}%'` + ); + const edgeTopicClauses = topicTerms.map((term) => + `(relation ILIKE '%${sqlLike(term)}%' OR summary ILIKE '%${sqlLike(term)}%' OR evidence ILIKE '%${sqlLike(term)}%' OR search_text ILIKE '%${sqlLike(term)}%')` + ); + const nodeWhere = entityTerms.length > 0 && topicTerms.length > 0 + ? `(${nodeEntityClauses.join(" OR ")}) AND (${nodeTextClauses.join(" OR ")})` + : entityTerms.length > 0 + ? `(${nodeEntityClauses.join(" OR ")})` + : topicTerms.length > 0 + ? `(${nodeTextClauses.join(" OR ")})` + : "FALSE"; + const edgeWhere = entityTerms.length > 0 && topicTerms.length > 0 + ? `(${edgeEntityClauses.join(" OR ")}) AND (${edgeTopicClauses.join(" OR ")})` + : topicTerms.length > 0 + ? `(${edgeTopicClauses.join(" OR ")})` + : entityTerms.length > 0 + ? `(${edgeEntityClauses.join(" OR ")})` + : "FALSE"; + + const sql = + `WITH node_candidates AS (` + + ` SELECT source_session_id, source_path, search_text, search_text <#> '${phrase}' AS score` + + ` FROM "${graphNodesTable}"` + + ` WHERE ${nodeWhere}` + + ` ORDER BY score DESC LIMIT 8` + + `), edge_candidates AS (` + + ` SELECT source_session_id, source_path, search_text, search_text <#> '${phrase}' AS score` + + ` FROM "${graphEdgesTable}"` + + ` WHERE ${edgeWhere}` + + ` ORDER BY score DESC LIMIT 8` + + `)` + + ` SELECT source_session_id, source_path, search_text, score` + + ` FROM (` + + ` SELECT source_session_id, source_path, search_text, score FROM node_candidates` + + ` UNION ALL` + + ` SELECT source_session_id, source_path, search_text, score FROM edge_candidates` + + ` ) AS graph_candidates` + + ` ORDER BY score ASC` + + ` LIMIT 12`; + + const rows = await api.query(sql) as GraphCandidateRow[]; + const expanded: Array<{ sessionId: string; sourcePath: string }> = []; + const seen = new Set(); + for (const row of rows) { + const searchText = typeof row["search_text"] === "string" ? row["search_text"] : ""; + const sessionIds = [ + ...(searchText.match(/conv_\d+_session_\d+/g) ?? []), + typeof row["source_session_id"] === "string" ? row["source_session_id"] : "", + ].map((value) => value.trim()).filter(Boolean); + const sourcePaths = [ + ...(searchText.match(/\/sessions\/conv_\d+_session_\d+\.json/g) ?? []), + typeof row["source_path"] === "string" ? row["source_path"] : "", + ...sessionIds.map((sessionId) => `/sessions/${sessionId}.json`), + ].map((value) => value.trim()).filter(Boolean); + for (let i = 0; i < sourcePaths.length; i++) { + const sourcePath = sourcePaths[i]; + const sessionId = sessionIds[i] || sessionIds[0] || sourcePath.match(/(conv_\d+_session_\d+)\.json$/)?.[1] || ""; + if (!sourcePath) continue; + const key = `${sessionId}@@${sourcePath}`; + if (seen.has(key)) continue; + seen.add(key); + expanded.push({ sessionId, sourcePath }); + if (expanded.length >= 12) return expanded; + } + } + return expanded; +} + +function prependCtes(sql: string, ctes: string[]): string { + if (ctes.length === 0) return sql; + if (/^with\b/i.test(sql)) { + return sql.replace(/^with\b/i, `WITH ${ctes.join(", ")},`); + } + return `WITH ${ctes.join(", ")} ${sql}`; +} + +function rewriteQueryWithRestrictedTables( + sql: string, + memoryTable: string, + sessionsTable: string, + restrictedMemoryAlias: string | null, + restrictedSessionsAlias: string | null, +): string { + let rewritten = sql; + if (restrictedMemoryAlias) { + const memoryPattern = escapeRegex(memoryTable); + rewritten = rewritten + .replace(new RegExp(`\\bFROM\\s+"?${memoryPattern}"?`, "gi"), `FROM "${restrictedMemoryAlias}"`) + .replace(new RegExp(`\\bJOIN\\s+"?${memoryPattern}"?`, "gi"), `JOIN "${restrictedMemoryAlias}"`); + } + if (restrictedSessionsAlias) { + const sessionsPattern = escapeRegex(sessionsTable); + rewritten = rewritten + .replace(new RegExp(`\\bFROM\\s+"?${sessionsPattern}"?`, "gi"), `FROM "${restrictedSessionsAlias}"`) + .replace(new RegExp(`\\bJOIN\\s+"?${sessionsPattern}"?`, "gi"), `JOIN "${restrictedSessionsAlias}"`); + } + return rewritten; +} + +async function applyGraphRestrictionsToPsqlQuery( + api: DeeplakeApi, + sql: string, + memoryTable: string, + sessionsTable: string, + graphNodesTable: string, + graphEdgesTable: string, +): Promise { + if (extractSqlTableRefs(sql).some((ref) => ref === normalizeSqlRef(graphNodesTable) || ref === normalizeSqlRef(graphEdgesTable))) { + return sql; + } + const refs = extractSqlTableRefs(sql); + const touchesMemory = refs.some((ref) => ref === normalizeSqlRef(memoryTable)); + const touchesSessions = refs.some((ref) => ref === normalizeSqlRef(sessionsTable)); + if (!touchesMemory && !touchesSessions) return sql; + + const terms = extractSqlSearchTerms(sql); + if (terms.length === 0) return sql; + + const candidates = await fetchGraphCandidates(api, graphNodesTable, graphEdgesTable, terms); + if (candidates.length === 0 || candidates.length > 16) return sql; + + const values = candidates.map((candidate) => + `('${sqlStr(candidate.sessionId)}', '${sqlStr(candidate.sourcePath)}')` + ); + const ctes = [ + `__hm_graph_candidates(source_session_id, source_path) AS (VALUES ${values.join(", ")})`, + ]; + let restrictedMemoryAlias: string | null = null; + let restrictedSessionsAlias: string | null = null; + + if (touchesMemory) { + restrictedMemoryAlias = "__hm_memory"; + ctes.push( + `"${restrictedMemoryAlias}" AS (` + + ` SELECT * FROM "${memoryTable}" m` + + ` WHERE EXISTS (` + + ` SELECT 1 FROM __hm_graph_candidates gc` + + ` WHERE (gc.source_path <> '' AND m.summary ILIKE '%' || gc.source_path || '%')` + + ` OR (gc.source_session_id <> '' AND m.path ILIKE '%' || gc.source_session_id || '%')` + + ` )` + + `)` + ); + } + if (touchesSessions) { + restrictedSessionsAlias = "__hm_sessions"; + ctes.push( + `"${restrictedSessionsAlias}" AS (` + + ` SELECT * FROM "${sessionsTable}" s` + + ` WHERE s.path IN (SELECT source_path FROM __hm_graph_candidates WHERE source_path <> '')` + + `)` + ); + } + + return prependCtes( + rewriteQueryWithRestrictedTables(sql, memoryTable, sessionsTable, restrictedMemoryAlias, restrictedSessionsAlias), + ctes, + ); +} + function formatPsqlValue(value: unknown): string { if (value === null || value === undefined) return ""; if (typeof value === "string") return value; @@ -712,7 +1011,18 @@ export async function executeCompiledBashCommand( } if (segment.kind === "psql") { - const rows = await api.query(validatePsqlQuery(segment.query, memoryTable, sessionsTable)); + const graphNodesTable = process.env["HIVEMIND_GRAPH_NODES_TABLE"] ?? "graph_nodes"; + const graphEdgesTable = process.env["HIVEMIND_GRAPH_EDGES_TABLE"] ?? "graph_edges"; + const validated = validatePsqlQuery(segment.query, memoryTable, sessionsTable, graphNodesTable, graphEdgesTable); + const prepared = await applyGraphRestrictionsToPsqlQuery( + api, + validated, + memoryTable, + sessionsTable, + graphNodesTable, + graphEdgesTable, + ); + const rows = await api.query(prepared); const formatted = formatPsqlRows(rows, segment.tuplesOnly, segment.fieldSeparator); const limited = segment.lineLimit > 0 ? formatted.split("\n").slice(0, segment.lineLimit).join("\n") : formatted; outputs.push(limited); diff --git a/src/hooks/codex/pre-tool-use.ts b/src/hooks/codex/pre-tool-use.ts index 5dda0f7..b2b57f0 100644 --- a/src/hooks/codex/pre-tool-use.ts +++ b/src/hooks/codex/pre-tool-use.ts @@ -95,12 +95,12 @@ export function buildUnsupportedGuidance(): string { export function buildPsqlOnlyGuidance(): string { return "Hivemind recall is SQL-only in this mode. " + - "Use psql with the memory and sessions tables only. " + + "Use psql with the memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links tables only. " + "Do NOT use grep, cat, ls, Read, Glob, or filesystem paths for memory lookups."; } export function buildPsqlSchemaGuidance(): string { - return "Only psql SELECT queries over memory and sessions are intercepted in SQL mode. " + + return "Only psql SELECT queries over memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links are intercepted in SQL mode. " + "Rewrite the query to reference only those tables with normal psql SELECT syntax."; } diff --git a/src/hooks/codex/session-start-setup.ts b/src/hooks/codex/session-start-setup.ts index 8645d98..8c720cc 100644 --- a/src/hooks/codex/session-start-setup.ts +++ b/src/hooks/codex/session-start-setup.ts @@ -177,6 +177,11 @@ export async function runCodexSessionStartSetup(input: CodexSessionStartInput, d } else { try { await api.ensureSessionsTable(config.sessionsTableName); + await api.ensureGraphNodesTable(config.graphNodesTableName); + await api.ensureGraphEdgesTable(config.graphEdgesTableName); + await api.ensureFactsTable(config.factsTableName); + await api.ensureEntitiesTable(config.entitiesTableName); + await api.ensureFactEntityLinksTable(config.factEntityLinksTableName); const drain = await drainSessionQueuesFn(api, { sessionsTable: config.sessionsTableName, }); diff --git a/src/hooks/codex/session-start.ts b/src/hooks/codex/session-start.ts index 1cd875f..f72fcce 100644 --- a/src/hooks/codex/session-start.ts +++ b/src/hooks/codex/session-start.ts @@ -78,25 +78,41 @@ export const CODEX_SESSION_START_CONTEXT_PSQL = `DEEPLAKE MEMORY SQL MODE: Use S Available tables: - memory(path, summary, project, description, creation_date, last_update_date) - sessions(path, creation_date, turn_index, event_type, dia_id, speaker, text, turn_summary, source_date_time, message) +- memory_facts(path, fact_id, subject_entity_id, subject_name, subject_type, predicate, object_entity_id, object_name, object_type, summary, evidence, search_text, confidence, valid_at, valid_from, valid_to, source_session_id, source_path) +- memory_entities(path, entity_id, canonical_name, entity_type, aliases, summary, search_text, source_session_ids, source_paths) +- fact_entity_links(path, link_id, fact_id, entity_id, entity_role, source_session_id, source_path) Use this command shape: - psql -At -F '|' -c "SELECT ..." Workflow: -1. Query memory first to identify likely summaries. +1. Query memory first to identify likely summaries and sessions. 2. In the first pass, combine the named person/entity term with one or more topic terms. Prefer narrow AND filters over broad OR filters. -3. Re-query memory by exact path for the small set of summary rows you selected. -4. Query sessions by exact path for transcript evidence or unresolved dates. -5. Prefer small targeted SELECTs with ORDER BY and LIMIT 5-10. -6. Do not use filesystem commands, grep, cat, ls, Read, or Glob for recall in this mode. -7. If the first summary query returns 0-3 weak rows or the answer still seems semantically off, retry with BM25 ranking on memory before concluding the data is absent. -8. Use sessions.text, sessions.speaker, sessions.turn_index, and sessions.source_date_time for transcript retrieval. Use sessions.message only when you need the raw JSON payload. -9. If a summary answer is vague or relative (for example "home country", "next month", "last week"), immediately open the linked sessions rows and convert it to the most concrete answer supported there. -10. For identity, origin, relationship, and "what did they decide" questions, prefer the exact self-description or named entity from sessions over a paraphrased summary label. +3. Graph-backed entity and relation resolution is applied automatically behind the scenes to narrow likely sessions before memory/sessions queries run. You do not need to query graph tables manually for normal recall. +3a. For stable person/project/place facts, use memory_facts first. Use memory_entities to resolve aliases or canonical names, then join through fact_entity_links when you need all facts connected to an entity. +4. Re-query memory by exact path for the small candidate set you selected. +5. Query sessions by exact path for transcript evidence or unresolved dates. +6. Prefer small targeted SELECTs with ORDER BY and LIMIT 5-10. +7. Do not use filesystem commands, grep, cat, ls, Read, or Glob for recall in this mode. +8. If the first literal query returns 0-3 weak rows or the answer still seems semantically off, retry with BM25 ranking on memory.summary. +9. Use sessions.text, sessions.speaker, sessions.turn_index, and sessions.source_date_time for transcript retrieval. Use sessions.message only when you need the raw JSON payload. +10. If a summary, node, or edge answer is vague or relative, immediately open the linked sessions rows and convert it to the most concrete answer supported there. +11. For identity, origin, relationship, preference, and "what did they decide" questions, prefer transcript grounding over paraphrased summary labels. +12. When memory_entities resolves a canonical entity, use fact_entity_links to expand the connected facts before deciding the fact layer is sparse. +13. For identity or relationship questions, prefer the narrowest explicit self-label or status label over broader biography or community descriptions. +14. For "when" questions, if the best evidence is already phrased relative to another dated event, return that relative phrase instead of inventing a different absolute date. +15. For list/profile questions, return a minimal comma-separated set of directly supported items. Do not pad the answer with adjacent hobbies, events, or explanations. +16. For artifact/title questions such as books, talks, projects, or artworks, prefer exact titled objects from facts or transcript over generic phrases like "a book" or "a speech". Good query patterns: - Candidate summaries: psql -At -F '|' -c "SELECT path, summary, creation_date FROM memory WHERE summary ILIKE '%%' AND (summary ILIKE '%%' OR summary ILIKE '%%') ORDER BY creation_date DESC LIMIT 5" +- Canonical entity lookup: + psql -At -F '|' -c "SELECT entity_id, canonical_name, entity_type, aliases, summary FROM memory_entities WHERE canonical_name ILIKE '%%' OR aliases ILIKE '%%' LIMIT 5" +- Fact lookup by entity: + psql -At -F '|' -c "SELECT fact_id, subject_name, predicate, object_name, summary, valid_at, valid_from, valid_to, source_session_id FROM memory_facts WHERE subject_name ILIKE '%%' AND (predicate ILIKE '%%' OR object_name ILIKE '%%') ORDER BY creation_date DESC LIMIT 10" +- Entity-linked fact expansion: + psql -At -F '|' -c "SELECT f.fact_id, f.subject_name, f.predicate, f.object_name, f.summary FROM fact_entity_links l JOIN memory_facts f ON f.fact_id = l.fact_id WHERE l.entity_id = '' ORDER BY f.creation_date DESC LIMIT 10" - Exact summary reread: psql -At -F '|' -c "SELECT path, summary FROM memory WHERE path IN ('/summaries/...', '/summaries/...')" - Transcript grounding by exact path: @@ -105,11 +121,18 @@ Good query patterns: psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') AND (speaker ILIKE '%%' OR text ILIKE '%%') ORDER BY path ASC, turn_index ASC" - If literal ILIKE retrieval is sparse or semantically weak, retry with BM25 text ranking on summaries: psql -At -F '|' -c "SELECT path, summary, summary <#> ' ' AS score FROM memory WHERE summary ILIKE '%%' ORDER BY score DESC LIMIT 5" +- If graph entity lookup is sparse or semantically weak, retry with BM25 on graph nodes: + psql -At -F '|' -c "SELECT node_id, canonical_name, node_type, summary, source_session_id, source_path, search_text <#> ' ' AS score FROM graph_nodes ORDER BY score DESC LIMIT 5" Avoid these mistakes: - Do NOT search person names via path ILIKE. Person names live in summary text, not session paths. - Do NOT filter sessions.message directly when sessions.text / sessions.speaker already contain the needed transcript fields. +- Do NOT use fact tables for exact quoted wording when a transcript row is available; use them to narrow and aggregate, then ground on sessions. +- Do NOT stop at graph rows alone when the question asks for exact wording or time grounding. Use graph rows to narrow the search, then open the linked sessions. - Do NOT blend multiple different events when the question asks about one specific event. Prefer the most direct supporting row. +- Do NOT replace an exact status or self-label with a broader biography. +- Do NOT recalculate a relative-time answer against today's date when the stored phrase already answers the question. +- Do NOT turn a short list question into a narrative list of loosely related activities. Answer rules: - Return the smallest exact answer supported by the data. @@ -117,11 +140,12 @@ Answer rules: - Do not answer "not found" until you have checked both memory and a likely sessions row. - Preserve direct relative-duration answers when they already match the question. - If the transcript already directly answers with a relative duration like "10 years ago", return that phrase instead of recalculating to today's date. +- If the transcript or fact row says something like "the week before June 9, 2023", return that phrase instead of converting it to June 9, 2023. - If a summary says something vague like "home country", search sessions for the exact named place before answering. - Aggregate across the small candidate set before answering profile or list questions. - For "likely", "would", or profile questions, a concise inference from strong summary evidence is allowed even if the exact final phrase is not quoted verbatim. -Only psql SELECT queries over memory and sessions are intercepted in this mode. Do NOT use python, python3, node, curl, or filesystem paths for recall in this mode.`; +Only psql SELECT queries over memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links are intercepted in this mode. For normal recall, query memory_facts for distilled claims, memory_entities for canonical names, and sessions for exact grounding; graph-based restriction is applied automatically where relevant. Do NOT use python, python3, node, curl, or filesystem paths for recall in this mode.`; export interface CodexSessionStartInput { session_id: string; diff --git a/src/hooks/codex/spawn-wiki-worker.ts b/src/hooks/codex/spawn-wiki-worker.ts index 0db3f15..dbd3bd6 100644 --- a/src/hooks/codex/spawn-wiki-worker.ts +++ b/src/hooks/codex/spawn-wiki-worker.ts @@ -9,6 +9,8 @@ import { dirname, join } from "node:path"; import { writeFileSync, mkdirSync, appendFileSync } from "node:fs"; import { homedir, tmpdir } from "node:os"; import type { Config } from "../../config.js"; +import { GRAPH_PROMPT_TEMPLATE } from "../knowledge-graph.js"; +import { MEMORY_FACT_PROMPT_TEMPLATE } from "../memory-facts.js"; const HOME = homedir(); export const WIKI_LOG = join(HOME, ".codex", "hooks", "deeplake-wiki.log"); @@ -113,6 +115,11 @@ export function spawnCodexWikiWorker(opts: SpawnOptions): void { workspaceId: config.workspaceId, memoryTable: config.tableName, sessionsTable: config.sessionsTableName, + graphNodesTable: config.graphNodesTableName, + graphEdgesTable: config.graphEdgesTableName, + factsTable: config.factsTableName, + entitiesTable: config.entitiesTableName, + factEntityLinksTable: config.factEntityLinksTableName, sessionId, userName: config.userName, project: projectName, @@ -121,6 +128,8 @@ export function spawnCodexWikiWorker(opts: SpawnOptions): void { wikiLog: WIKI_LOG, hooksDir: join(HOME, ".codex", "hooks"), promptTemplate: WIKI_PROMPT_TEMPLATE, + graphPromptTemplate: GRAPH_PROMPT_TEMPLATE, + factPromptTemplate: MEMORY_FACT_PROMPT_TEMPLATE, })); wikiLog(`${reason}: spawning summary worker for ${sessionId}`); diff --git a/src/hooks/codex/wiki-worker.ts b/src/hooks/codex/wiki-worker.ts index cf93218..7f4fbda 100644 --- a/src/hooks/codex/wiki-worker.ts +++ b/src/hooks/codex/wiki-worker.ts @@ -12,6 +12,16 @@ import { execFileSync } from "node:child_process"; import { join } from "node:path"; import { finalizeSummary, releaseLock } from "../summary-state.js"; import { uploadSummary } from "../upload-summary.js"; +import { + buildKnowledgeGraphPrompt, + parseGraphExtraction, + replaceSessionGraph, +} from "../knowledge-graph.js"; +import { + buildMemoryFactPrompt, + parseMemoryFactExtraction, + replaceSessionFacts, +} from "../memory-facts.js"; interface WorkerConfig { apiUrl: string; @@ -20,6 +30,11 @@ interface WorkerConfig { workspaceId: string; memoryTable: string; sessionsTable: string; + graphNodesTable: string; + graphEdgesTable: string; + factsTable: string; + entitiesTable: string; + factEntityLinksTable: string; sessionId: string; userName: string; project: string; @@ -28,6 +43,8 @@ interface WorkerConfig { wikiLog: string; hooksDir: string; promptTemplate: string; + graphPromptTemplate: string; + factPromptTemplate: string; } const cfg: WorkerConfig = JSON.parse(readFileSync(process.argv[2], "utf-8")); @@ -180,6 +197,75 @@ async function main(): Promise { }); wlog(`uploaded ${vpath} (summary=${result.summaryLength}, desc=${result.descLength})`); + try { + const graphPrompt = buildKnowledgeGraphPrompt({ + summaryText: text, + sessionId: cfg.sessionId, + sourcePath: jsonlServerPath, + project: cfg.project, + template: cfg.graphPromptTemplate, + }); + const graphRaw = execFileSync(cfg.codexBin, [ + "exec", + "--dangerously-bypass-approvals-and-sandbox", + graphPrompt, + ], { + stdio: ["ignore", "pipe", "pipe"], + timeout: 120_000, + env: { ...process.env, HIVEMIND_WIKI_WORKER: "1", HIVEMIND_CAPTURE: "false" }, + }).toString("utf-8"); + const graph = parseGraphExtraction(graphRaw); + const graphResult = await replaceSessionGraph({ + query, + nodesTable: cfg.graphNodesTable, + edgesTable: cfg.graphEdgesTable, + sessionId: cfg.sessionId, + userName: cfg.userName, + project: cfg.project, + agent: "codex", + sourcePath: jsonlServerPath, + graph, + }); + wlog(`graph updated nodes=${graphResult.nodes} edges=${graphResult.edges}`); + } catch (e: any) { + wlog(`graph update failed: ${e.message}`); + } + + try { + const factPrompt = buildMemoryFactPrompt({ + summaryText: text, + sessionId: cfg.sessionId, + sourcePath: jsonlServerPath, + project: cfg.project, + template: cfg.factPromptTemplate, + }); + const factsRaw = execFileSync(cfg.codexBin, [ + "exec", + "--dangerously-bypass-approvals-and-sandbox", + factPrompt, + ], { + stdio: ["ignore", "pipe", "pipe"], + timeout: 120_000, + env: { ...process.env, HIVEMIND_WIKI_WORKER: "1", HIVEMIND_CAPTURE: "false" }, + }).toString("utf-8"); + const extraction = parseMemoryFactExtraction(factsRaw); + const factResult = await replaceSessionFacts({ + query, + factsTable: cfg.factsTable, + entitiesTable: cfg.entitiesTable, + linksTable: cfg.factEntityLinksTable, + sessionId: cfg.sessionId, + userName: cfg.userName, + project: cfg.project, + agent: "codex", + sourcePath: jsonlServerPath, + extraction, + }); + wlog(`facts updated facts=${factResult.facts} entities=${factResult.entities} links=${factResult.links}`); + } catch (e: any) { + wlog(`fact update failed: ${e.message}`); + } + try { finalizeSummary(cfg.sessionId, jsonlLines); wlog(`sidecar updated: lastSummaryCount=${jsonlLines}`); diff --git a/src/hooks/knowledge-graph.ts b/src/hooks/knowledge-graph.ts new file mode 100644 index 0000000..af9a3f5 --- /dev/null +++ b/src/hooks/knowledge-graph.ts @@ -0,0 +1,285 @@ +import { randomUUID } from "node:crypto"; +import { buildSummaryBlurb } from "../utils/summary-format.js"; +import { esc, type QueryFn } from "./upload-summary.js"; + +export interface GraphNodeSpec { + name: string; + type?: string; + summary?: string; + aliases?: string[]; +} + +export interface GraphEdgeSpec { + source: string; + target: string; + relation: string; + summary?: string; + evidence?: string; +} + +export interface GraphExtraction { + nodes: GraphNodeSpec[]; + edges: GraphEdgeSpec[]; +} + +export interface ReplaceSessionGraphParams { + query: QueryFn; + nodesTable: string; + edgesTable: string; + sessionId: string; + userName: string; + project: string; + agent: string; + sourcePath: string; + graph: GraphExtraction; + ts?: string; +} + +export interface ReplaceSessionGraphResult { + nodes: number; + edges: number; +} + +export const GRAPH_PROMPT_TEMPLATE = `You are extracting a compact knowledge graph delta from a session summary. + +SESSION ID: __SESSION_ID__ +SOURCE PATH: __SOURCE_PATH__ +PROJECT: __PROJECT__ + +SUMMARY MARKDOWN: +__SUMMARY_TEXT__ + +Return ONLY valid JSON with this exact shape: +{"nodes":[{"name":"canonical entity name","type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","summary":"short factual description","aliases":["optional alias"]}],"edges":[{"source":"canonical source entity","target":"canonical target entity","relation":"snake_case_relation","summary":"short factual relation summary","evidence":"short supporting phrase"}]} + +Rules: +- Use canonical names for repeated entities. +- Include people, places, organizations, books/media, tools, files, goals, status labels, preferences, and notable events when they matter for future recall. +- Convert relationship/status/origin/preferences into edges when possible. Example relation shapes: home_country, relationship_status, enjoys, decided_to_pursue, works_on, uses_tool, located_in, recommended, plans, supports. +- Keep summaries short and factual. Do not invent facts beyond the summary. +- If a source or target appears in an edge but not in nodes, also include it in nodes. +- Prefer stable canonical names over pronouns. +- Return no markdown, no prose, no code fences, only JSON.`; + +function stripCodeFences(text: string): string { + const trimmed = text.trim(); + const fenceMatch = trimmed.match(/^```(?:json)?\s*([\s\S]*?)\s*```$/i); + return fenceMatch ? fenceMatch[1].trim() : trimmed; +} + +function normalizeString(value: unknown): string { + return typeof value === "string" ? value.trim() : ""; +} + +function normalizeAliasList(value: unknown): string[] { + if (!Array.isArray(value)) return []; + return value + .map(normalizeString) + .filter(Boolean) + .filter((item, index, arr) => arr.indexOf(item) === index); +} + +export function parseGraphExtraction(raw: string): GraphExtraction { + const cleaned = stripCodeFences(raw); + const parsed = JSON.parse(cleaned) as Record; + const nodes = Array.isArray(parsed["nodes"]) ? parsed["nodes"] as Array> : []; + const edges = Array.isArray(parsed["edges"]) ? parsed["edges"] as Array> : []; + return { + nodes: nodes + .map((node) => ({ + name: normalizeString(node["name"]), + type: normalizeString(node["type"]) || "other", + summary: normalizeString(node["summary"]), + aliases: normalizeAliasList(node["aliases"]), + })) + .filter((node) => node.name), + edges: edges + .map((edge) => ({ + source: normalizeString(edge["source"]), + target: normalizeString(edge["target"]), + relation: normalizeString(edge["relation"]).replace(/\s+/g, "_").toLowerCase(), + summary: normalizeString(edge["summary"]), + evidence: normalizeString(edge["evidence"]), + })) + .filter((edge) => edge.source && edge.target && edge.relation), + }; +} + +function slugify(value: string): string { + return value + .normalize("NFKD") + .replace(/[^\w\s-]/g, "") + .trim() + .toLowerCase() + .replace(/[\s-]+/g, "_") + .replace(/^_+|_+$/g, "") || "item"; +} + +export function buildGraphNodeId(name: string, _type = "other"): string { + return `entity:${slugify(name)}`; +} + +function buildNodeSearchText(node: GraphNodeSpec): string { + return [ + node.name, + node.type ?? "other", + ...(node.aliases ?? []), + node.summary ?? "", + ].filter(Boolean).join(" | "); +} + +function buildEdgeSearchText(edge: GraphEdgeSpec, sourceNodeId: string, targetNodeId: string): string { + return [ + edge.source, + edge.relation, + edge.target, + edge.summary ?? "", + edge.evidence ?? "", + sourceNodeId, + targetNodeId, + ].filter(Boolean).join(" | "); +} + +export function buildKnowledgeGraphPrompt(args: { + summaryText: string; + sessionId: string; + sourcePath: string; + project: string; + template?: string; +}): string { + return (args.template ?? GRAPH_PROMPT_TEMPLATE) + .replace(/__SUMMARY_TEXT__/g, args.summaryText) + .replace(/__SESSION_ID__/g, args.sessionId) + .replace(/__SOURCE_PATH__/g, args.sourcePath) + .replace(/__PROJECT__/g, args.project); +} + +function wrapGraphPhaseError(error: unknown, args: { + phase: "delete_nodes" | "delete_edges" | "insert_nodes" | "insert_edges"; + sessionId: string; + table: string; + sql: string; +}): Error { + const wrapped = new Error( + `graph ${args.phase} failed for session ${args.sessionId} on table ${args.table}: ${ + error instanceof Error ? error.message : String(error) + }` + ); + (wrapped as Error & Record).cause = error; + (wrapped as Error & Record).phase = args.phase; + (wrapped as Error & Record).sessionId = args.sessionId; + (wrapped as Error & Record).table = args.table; + (wrapped as Error & Record).sql = args.sql; + return wrapped; +} + +export async function replaceSessionGraph(params: ReplaceSessionGraphParams): Promise { + const ts = params.ts ?? new Date().toISOString(); + const nodePath = `/graphs/nodes/${params.userName}/${params.sessionId}.jsonl`; + const edgePath = `/graphs/edges/${params.userName}/${params.sessionId}.jsonl`; + const nodeFilename = `${params.sessionId}.jsonl`; + const edgeFilename = `${params.sessionId}.jsonl`; + + const nodeMap = new Map(); + for (const node of params.graph.nodes) { + const key = buildGraphNodeId(node.name, node.type); + nodeMap.set(key, { + name: node.name, + type: node.type || "other", + summary: node.summary || "", + aliases: node.aliases || [], + }); + } + for (const edge of params.graph.edges) { + const sourceKey = buildGraphNodeId(edge.source); + const targetKey = buildGraphNodeId(edge.target); + if (!nodeMap.has(sourceKey)) nodeMap.set(sourceKey, { name: edge.source, type: "other", summary: "", aliases: [] }); + if (!nodeMap.has(targetKey)) nodeMap.set(targetKey, { name: edge.target, type: "other", summary: "", aliases: [] }); + } + + const deleteNodesSql = `DELETE FROM "${params.nodesTable}" WHERE source_session_id = '${esc(params.sessionId)}'`; + const deleteEdgesSql = `DELETE FROM "${params.edgesTable}" WHERE source_session_id = '${esc(params.sessionId)}'`; + try { + await params.query(deleteNodesSql); + } catch (error) { + throw wrapGraphPhaseError(error, { + phase: "delete_nodes", + sessionId: params.sessionId, + table: params.nodesTable, + sql: deleteNodesSql, + }); + } + try { + await params.query(deleteEdgesSql); + } catch (error) { + throw wrapGraphPhaseError(error, { + phase: "delete_edges", + sessionId: params.sessionId, + table: params.edgesTable, + sql: deleteEdgesSql, + }); + } + + const nodeRows = [...nodeMap.entries()].map(([nodeId, node]) => { + const summary = node.summary || buildSummaryBlurb(`# Graph Node\n\n${node.name}`); + const aliases = (node.aliases ?? []).join(", "); + const searchText = buildNodeSearchText(node); + return ( + `('${randomUUID()}', '${esc(nodePath)}', '${esc(nodeFilename)}', '${esc(nodeId)}', ` + + `'${esc(node.name)}', '${esc(node.type || "other")}', E'${esc(summary)}', E'${esc(searchText)}', ` + + `'${esc(aliases)}', '${esc(params.sessionId)}', '${esc(params.sourcePath)}', '${esc(params.userName)}', ` + + `'application/json', ${Buffer.byteLength(searchText, "utf-8")}, '${esc(params.project)}', ` + + `E'${esc(buildSummaryBlurb(summary))}', '${esc(params.agent)}', '${ts}', '${ts}')` + ); + }); + + if (nodeRows.length > 0) { + const insertNodesSql = `INSERT INTO "${params.nodesTable}" ` + + `(id, path, filename, node_id, canonical_name, node_type, summary, search_text, aliases, source_session_id, source_path, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) ` + + `VALUES ${nodeRows.join(", ")}`; + try { + await params.query(insertNodesSql); + } catch (error) { + throw wrapGraphPhaseError(error, { + phase: "insert_nodes", + sessionId: params.sessionId, + table: params.nodesTable, + sql: insertNodesSql, + }); + } + } + + const edgeRows = params.graph.edges.map((edge) => { + const sourceNodeId = buildGraphNodeId(edge.source); + const targetNodeId = buildGraphNodeId(edge.target); + const searchText = buildEdgeSearchText(edge, sourceNodeId, targetNodeId); + const summary = edge.summary || `${edge.source} ${edge.relation} ${edge.target}`; + const evidence = edge.evidence || ""; + const edgeId = `${sourceNodeId}:${edge.relation}:${targetNodeId}`; + return ( + `('${randomUUID()}', '${esc(edgePath)}', '${esc(edgeFilename)}', '${esc(edgeId)}', ` + + `'${esc(sourceNodeId)}', '${esc(targetNodeId)}', '${esc(edge.relation)}', E'${esc(summary)}', ` + + `E'${esc(evidence)}', E'${esc(searchText)}', '${esc(params.sessionId)}', '${esc(params.sourcePath)}', ` + + `'${esc(params.userName)}', 'application/json', ${Buffer.byteLength(searchText, "utf-8")}, '${esc(params.project)}', ` + + `E'${esc(buildSummaryBlurb(summary))}', '${esc(params.agent)}', '${ts}', '${ts}')` + ); + }); + + if (edgeRows.length > 0) { + const insertEdgesSql = `INSERT INTO "${params.edgesTable}" ` + + `(id, path, filename, edge_id, source_node_id, target_node_id, relation, summary, evidence, search_text, source_session_id, source_path, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) ` + + `VALUES ${edgeRows.join(", ")}`; + try { + await params.query(insertEdgesSql); + } catch (error) { + throw wrapGraphPhaseError(error, { + phase: "insert_edges", + sessionId: params.sessionId, + table: params.edgesTable, + sql: insertEdgesSql, + }); + } + } + + return { nodes: nodeRows.length, edges: edgeRows.length }; +} diff --git a/src/hooks/memory-facts.ts b/src/hooks/memory-facts.ts new file mode 100644 index 0000000..7b0a30c --- /dev/null +++ b/src/hooks/memory-facts.ts @@ -0,0 +1,504 @@ +import { randomUUID } from "node:crypto"; +import { buildSummaryBlurb } from "../utils/summary-format.js"; +import { buildGraphNodeId } from "./knowledge-graph.js"; +import { esc, type QueryFn } from "./upload-summary.js"; + +export interface MemoryFactSpec { + subject: string; + subjectType?: string; + subjectAliases?: string[]; + predicate: string; + object: string; + objectType?: string; + objectAliases?: string[]; + summary?: string; + evidence?: string; + confidence?: number; + validAt?: string; + validFrom?: string; + validTo?: string; +} + +export interface MemoryFactExtraction { + facts: MemoryFactSpec[]; +} + +export interface ReplaceSessionFactsParams { + query: QueryFn; + factsTable: string; + entitiesTable: string; + linksTable: string; + sessionId: string; + userName: string; + project: string; + agent: string; + sourcePath: string; + extraction: MemoryFactExtraction; + ts?: string; +} + +export interface ReplaceSessionFactsResult { + facts: number; + entities: number; + links: number; +} + +export const MEMORY_FACT_PROMPT_TEMPLATE = `You are extracting durable long-term memory facts from a session summary. + +SESSION ID: __SESSION_ID__ +SOURCE PATH: __SOURCE_PATH__ +PROJECT: __PROJECT__ + +SUMMARY MARKDOWN: +__SUMMARY_TEXT__ + +Return ONLY valid JSON with this exact shape: +{"facts":[{"subject":"canonical entity","subject_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","subject_aliases":["optional alias"],"predicate":"snake_case_relation","object":"canonical object text","object_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","object_aliases":["optional alias"],"summary":"short factual claim","evidence":"short supporting phrase","confidence":0.0,"valid_at":"optional date/time text","valid_from":"optional date/time text","valid_to":"optional date/time text"}]} + +Rules: +- Extract atomic facts that are useful for later recall. One durable claim per fact. +- Prefer canonical names for repeated people, organizations, places, projects, tools, and artifacts. +- Use relation-style predicates such as works_on, home_country, relationship_status, prefers, plans, decided_to_pursue, located_in, uses_tool, recommended, supports, owns, read, attends, moved_from, moved_to. +- Facts should preserve temporal history instead of overwriting it. If the summary says something changed, emit the new fact and include timing in valid_at / valid_from / valid_to when the summary supports it. +- Include assistant-confirmed or tool-confirmed actions when they are stated as completed facts in the summary. +- Do not invent facts that are not supported by the summary. +- Avoid duplicates or near-duplicates. If two facts say the same thing, keep the more specific one. +- Return no markdown, no prose, no code fences, only JSON.`; + +interface EntityAggregate { + entityId: string; + canonicalName: string; + entityType: string; + aliases: Set; + summaries: Set; + searchTerms: Set; +} + +interface FactRowSpec { + factId: string; + subjectEntityId: string; + subjectName: string; + subjectType: string; + objectEntityId: string; + objectName: string; + objectType: string; + predicate: string; + summary: string; + evidence: string; + searchText: string; + confidence: string; + validAt: string; + validFrom: string; + validTo: string; +} + +function stripCodeFences(text: string): string { + const trimmed = text.trim(); + const fenceMatch = trimmed.match(/^```(?:json)?\s*([\s\S]*?)\s*```$/i); + return fenceMatch ? fenceMatch[1].trim() : trimmed; +} + +function normalizeString(value: unknown): string { + return typeof value === "string" ? value.trim() : ""; +} + +function normalizeAliases(value: unknown): string[] { + if (!Array.isArray(value)) return []; + return value + .map(normalizeString) + .filter(Boolean) + .filter((item, index, arr) => arr.indexOf(item) === index); +} + +function normalizeFactType(value: unknown): string { + return normalizeString(value) || "other"; +} + +function normalizeConfidence(value: unknown): number | undefined { + if (typeof value === "number" && Number.isFinite(value)) { + return Math.max(0, Math.min(1, value)); + } + if (typeof value === "string" && value.trim() !== "") { + const parsed = Number(value); + if (Number.isFinite(parsed)) return Math.max(0, Math.min(1, parsed)); + } + return undefined; +} + +function slugify(value: string): string { + return value + .normalize("NFKD") + .replace(/[^\w\s-]/g, "") + .trim() + .toLowerCase() + .replace(/[\s-]+/g, "_") + .replace(/^_+|_+$/g, "") || "item"; +} + +function buildFactId(sessionId: string, fact: MemoryFactSpec, index: number): string { + return [ + "fact", + slugify(sessionId), + String(index + 1), + slugify(fact.subject), + slugify(fact.predicate), + slugify(fact.object), + ].join(":"); +} + +function buildFactSearchText(fact: MemoryFactSpec): string { + return [ + fact.subject, + ...(fact.subjectAliases ?? []), + fact.predicate, + fact.object, + ...(fact.objectAliases ?? []), + fact.summary ?? "", + fact.evidence ?? "", + fact.validAt ?? "", + fact.validFrom ?? "", + fact.validTo ?? "", + ].filter(Boolean).join(" | "); +} + +function buildEntitySearchText(entity: EntityAggregate): string { + return [ + entity.canonicalName, + entity.entityType, + ...entity.aliases, + ...entity.searchTerms, + ...entity.summaries, + ].filter(Boolean).join(" | "); +} + +function mergeDelimited(existing: string, nextValues: Iterable): string { + const merged = new Set( + existing.split(",").map((value) => value.trim()).filter(Boolean), + ); + for (const value of nextValues) { + const trimmed = value.trim(); + if (!trimmed) continue; + merged.add(trimmed); + } + return [...merged].join(", "); +} + +function mergePipeDelimited(existing: string, nextValues: Iterable, maxItems = 8): string { + const merged = new Set( + existing.split("|").map((value) => value.trim()).filter(Boolean), + ); + for (const value of nextValues) { + const trimmed = value.trim(); + if (!trimmed) continue; + if (merged.has(trimmed)) continue; + if (merged.size >= maxItems) break; + merged.add(trimmed); + } + return [...merged].join(" | "); +} + +function wrapFactsPhaseError(error: unknown, args: { + phase: "delete_facts" | "delete_links" | "upsert_entities" | "insert_facts" | "insert_links"; + sessionId: string; + table: string; + sql: string; +}): Error { + const wrapped = new Error( + `facts ${args.phase} failed for session ${args.sessionId} on table ${args.table}: ${ + error instanceof Error ? error.message : String(error) + }`, + ); + (wrapped as Error & Record).cause = error; + (wrapped as Error & Record).phase = args.phase; + (wrapped as Error & Record).sessionId = args.sessionId; + (wrapped as Error & Record).table = args.table; + (wrapped as Error & Record).sql = args.sql; + return wrapped; +} + +function buildEntityAggregate( + entityMap: Map, + args: { name: string; type: string; aliases: string[]; summary: string; searchText: string }, +): EntityAggregate { + const entityId = buildGraphNodeId(args.name, args.type); + const existing = entityMap.get(entityId); + if (existing) { + for (const alias of args.aliases) existing.aliases.add(alias); + if (args.summary) existing.summaries.add(args.summary); + if (args.searchText) existing.searchTerms.add(args.searchText); + return existing; + } + const created: EntityAggregate = { + entityId, + canonicalName: args.name, + entityType: args.type || "other", + aliases: new Set(args.aliases), + summaries: new Set(args.summary ? [args.summary] : []), + searchTerms: new Set(args.searchText ? [args.searchText] : []), + }; + entityMap.set(entityId, created); + return created; +} + +async function upsertEntities(params: { + query: QueryFn; + entitiesTable: string; + entityMap: Map; + userName: string; + project: string; + agent: string; + sourcePath: string; + sessionId: string; + ts: string; +}): Promise { + let upserts = 0; + const path = `/facts/entities/${params.userName}.jsonl`; + const filename = `${params.userName}.jsonl`; + + for (const entity of params.entityMap.values()) { + const aliases = [...entity.aliases].filter((alias) => alias !== entity.canonicalName); + const entitySummary = [...entity.summaries].join(" | ") || entity.canonicalName; + const searchText = buildEntitySearchText(entity); + const existingRows = await params.query( + `SELECT id, aliases, summary, search_text, source_session_ids, source_paths, entity_type FROM "${params.entitiesTable}" ` + + `WHERE entity_id = '${esc(entity.entityId)}' LIMIT 1`, + ); + if (existingRows.length === 0) { + const insertSql = + `INSERT INTO "${params.entitiesTable}" ` + + `(id, path, filename, entity_id, canonical_name, entity_type, aliases, summary, search_text, source_session_ids, source_paths, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ` + + `('${randomUUID()}', '${esc(path)}', '${esc(filename)}', '${esc(entity.entityId)}', '${esc(entity.canonicalName)}', '${esc(entity.entityType)}', ` + + `'${esc(aliases.join(", "))}', E'${esc(entitySummary)}', E'${esc(searchText)}', '${esc(params.sessionId)}', '${esc(params.sourcePath)}', ` + + `'${esc(params.userName)}', 'application/json', ${Buffer.byteLength(searchText, "utf-8")}, '${esc(params.project)}', ` + + `E'${esc(buildSummaryBlurb(entitySummary))}', '${esc(params.agent)}', '${params.ts}', '${params.ts}')`; + await params.query(insertSql); + upserts += 1; + continue; + } + + const existing = existingRows[0]; + const mergedAliases = mergeDelimited(String(existing["aliases"] ?? ""), aliases); + const mergedSummary = mergePipeDelimited(String(existing["summary"] ?? ""), entity.summaries, 10) || entitySummary; + const mergedSearchText = mergePipeDelimited(String(existing["search_text"] ?? ""), [searchText], 12) || searchText; + const mergedSessionIds = mergeDelimited(String(existing["source_session_ids"] ?? ""), [params.sessionId]); + const mergedSourcePaths = mergeDelimited(String(existing["source_paths"] ?? ""), [params.sourcePath]); + const existingType = normalizeString(existing["entity_type"]); + const entityType = existingType && existingType !== "other" ? existingType : entity.entityType; + const updateSql = + `UPDATE "${params.entitiesTable}" SET ` + + `canonical_name = '${esc(entity.canonicalName)}', entity_type = '${esc(entityType)}', aliases = '${esc(mergedAliases)}', ` + + `summary = E'${esc(mergedSummary)}', search_text = E'${esc(mergedSearchText)}', ` + + `source_session_ids = '${esc(mergedSessionIds)}', source_paths = '${esc(mergedSourcePaths)}', ` + + `size_bytes = ${Buffer.byteLength(mergedSearchText, "utf-8")}, project = '${esc(params.project)}', ` + + `description = E'${esc(buildSummaryBlurb(mergedSummary))}', agent = '${esc(params.agent)}', last_update_date = '${params.ts}' ` + + `WHERE entity_id = '${esc(entity.entityId)}'`; + await params.query(updateSql); + upserts += 1; + } + return upserts; +} + +export function parseMemoryFactExtraction(raw: string): MemoryFactExtraction { + const cleaned = stripCodeFences(raw); + const parsed = JSON.parse(cleaned) as Record; + const facts = Array.isArray(parsed["facts"]) ? parsed["facts"] as Array> : []; + const dedupe = new Set(); + return { + facts: facts + .map((fact) => ({ + subject: normalizeString(fact["subject"]), + subjectType: normalizeFactType(fact["subject_type"]), + subjectAliases: normalizeAliases(fact["subject_aliases"]), + predicate: normalizeString(fact["predicate"]).replace(/\s+/g, "_").toLowerCase(), + object: normalizeString(fact["object"]), + objectType: normalizeFactType(fact["object_type"]), + objectAliases: normalizeAliases(fact["object_aliases"]), + summary: normalizeString(fact["summary"]), + evidence: normalizeString(fact["evidence"]), + confidence: normalizeConfidence(fact["confidence"]), + validAt: normalizeString(fact["valid_at"]), + validFrom: normalizeString(fact["valid_from"]), + validTo: normalizeString(fact["valid_to"]), + })) + .filter((fact) => fact.subject && fact.predicate && fact.object) + .filter((fact) => { + const key = `${fact.subject}::${fact.predicate}::${fact.object}`; + if (dedupe.has(key)) return false; + dedupe.add(key); + return true; + }), + }; +} + +export function buildMemoryFactPrompt(args: { + summaryText: string; + sessionId: string; + sourcePath: string; + project: string; + template?: string; +}): string { + return (args.template ?? MEMORY_FACT_PROMPT_TEMPLATE) + .replace(/__SUMMARY_TEXT__/g, args.summaryText) + .replace(/__SESSION_ID__/g, args.sessionId) + .replace(/__SOURCE_PATH__/g, args.sourcePath) + .replace(/__PROJECT__/g, args.project); +} + +export async function replaceSessionFacts(params: ReplaceSessionFactsParams): Promise { + const ts = params.ts ?? new Date().toISOString(); + const factPath = `/facts/${params.userName}/${params.sessionId}.jsonl`; + const linkPath = `/facts/links/${params.userName}/${params.sessionId}.jsonl`; + const factFilename = `${params.sessionId}.jsonl`; + const linkFilename = `${params.sessionId}.jsonl`; + + const deleteFactsSql = `DELETE FROM "${params.factsTable}" WHERE source_session_id = '${esc(params.sessionId)}'`; + const deleteLinksSql = `DELETE FROM "${params.linksTable}" WHERE source_session_id = '${esc(params.sessionId)}'`; + try { + await params.query(deleteFactsSql); + } catch (error) { + throw wrapFactsPhaseError(error, { + phase: "delete_facts", + sessionId: params.sessionId, + table: params.factsTable, + sql: deleteFactsSql, + }); + } + try { + await params.query(deleteLinksSql); + } catch (error) { + throw wrapFactsPhaseError(error, { + phase: "delete_links", + sessionId: params.sessionId, + table: params.linksTable, + sql: deleteLinksSql, + }); + } + + const entityMap = new Map(); + const factRows: FactRowSpec[] = params.extraction.facts.map((fact, index) => { + const summary = fact.summary || `${fact.subject} ${fact.predicate.replace(/_/g, " ")} ${fact.object}`; + const searchText = buildFactSearchText(fact); + const subjectEntity = buildEntityAggregate(entityMap, { + name: fact.subject, + type: fact.subjectType || "other", + aliases: fact.subjectAliases ?? [], + summary, + searchText, + }); + const objectEntity = buildEntityAggregate(entityMap, { + name: fact.object, + type: fact.objectType || "other", + aliases: fact.objectAliases ?? [], + summary, + searchText, + }); + return { + factId: buildFactId(params.sessionId, fact, index), + subjectEntityId: subjectEntity.entityId, + subjectName: fact.subject, + subjectType: fact.subjectType || "other", + objectEntityId: objectEntity.entityId, + objectName: fact.object, + objectType: fact.objectType || "other", + predicate: fact.predicate, + summary, + evidence: fact.evidence || "", + searchText, + confidence: fact.confidence == null ? "" : String(fact.confidence), + validAt: fact.validAt || "", + validFrom: fact.validFrom || "", + validTo: fact.validTo || "", + }; + }); + + try { + await upsertEntities({ + query: params.query, + entitiesTable: params.entitiesTable, + entityMap, + userName: params.userName, + project: params.project, + agent: params.agent, + sourcePath: params.sourcePath, + sessionId: params.sessionId, + ts, + }); + } catch (error) { + throw wrapFactsPhaseError(error, { + phase: "upsert_entities", + sessionId: params.sessionId, + table: params.entitiesTable, + sql: `UPSERT entities for ${params.sessionId}`, + }); + } + + if (factRows.length > 0) { + const values = factRows.map((row) => + `('${randomUUID()}', '${esc(factPath)}', '${esc(factFilename)}', '${esc(row.factId)}', ` + + `'${esc(row.subjectEntityId)}', '${esc(row.subjectName)}', '${esc(row.subjectType)}', '${esc(row.predicate)}', ` + + `'${esc(row.objectEntityId)}', '${esc(row.objectName)}', '${esc(row.objectType)}', E'${esc(row.summary)}', ` + + `E'${esc(row.evidence)}', E'${esc(row.searchText)}', '${esc(row.confidence)}', '${esc(row.validAt)}', ` + + `'${esc(row.validFrom)}', '${esc(row.validTo)}', '${esc(params.sessionId)}', '${esc(params.sourcePath)}', ` + + `'${esc(params.userName)}', 'application/json', ${Buffer.byteLength(row.searchText, "utf-8")}, '${esc(params.project)}', ` + + `E'${esc(buildSummaryBlurb(row.summary))}', '${esc(params.agent)}', '${ts}', '${ts}')`, + ); + const insertFactsSql = + `INSERT INTO "${params.factsTable}" ` + + `(id, path, filename, fact_id, subject_entity_id, subject_name, subject_type, predicate, object_entity_id, object_name, object_type, summary, evidence, search_text, confidence, valid_at, valid_from, valid_to, source_session_id, source_path, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) ` + + `VALUES ${values.join(", ")}`; + try { + await params.query(insertFactsSql); + } catch (error) { + throw wrapFactsPhaseError(error, { + phase: "insert_facts", + sessionId: params.sessionId, + table: params.factsTable, + sql: insertFactsSql, + }); + } + } + + const linkRows = factRows.flatMap((row) => ([ + { + linkId: `${row.factId}:subject:${row.subjectEntityId}`, + factId: row.factId, + entityId: row.subjectEntityId, + entityRole: "subject", + }, + { + linkId: `${row.factId}:object:${row.objectEntityId}`, + factId: row.factId, + entityId: row.objectEntityId, + entityRole: "object", + }, + ])); + + if (linkRows.length > 0) { + const values = linkRows.map((row) => + `('${randomUUID()}', '${esc(linkPath)}', '${esc(linkFilename)}', '${esc(row.linkId)}', ` + + `'${esc(row.factId)}', '${esc(row.entityId)}', '${esc(row.entityRole)}', ` + + `'${esc(params.sessionId)}', '${esc(params.sourcePath)}', '${esc(params.userName)}', 'application/json', ${Buffer.byteLength(row.linkId, "utf-8")}, ` + + `'${esc(params.project)}', 'fact entity link', '${esc(params.agent)}', '${ts}', '${ts}')`, + ); + const insertLinksSql = + `INSERT INTO "${params.linksTable}" ` + + `(id, path, filename, link_id, fact_id, entity_id, entity_role, source_session_id, source_path, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) ` + + `VALUES ${values.join(", ")}`; + try { + await params.query(insertLinksSql); + } catch (error) { + throw wrapFactsPhaseError(error, { + phase: "insert_links", + sessionId: params.sessionId, + table: params.linksTable, + sql: insertLinksSql, + }); + } + } + + return { + facts: factRows.length, + entities: entityMap.size, + links: linkRows.length, + }; +} diff --git a/src/hooks/pre-tool-use.ts b/src/hooks/pre-tool-use.ts index 961a8dd..8288c3e 100644 --- a/src/hooks/pre-tool-use.ts +++ b/src/hooks/pre-tool-use.ts @@ -50,12 +50,12 @@ function needsHivemindPsqlRewrite(cmd: string): boolean { function buildPsqlOnlyGuidance(): string { return "[RETRY REQUIRED] Hivemind recall is SQL-only in this mode. " + - "Use psql with the memory and sessions tables only. " + + "Use psql with the memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links tables only. " + "Do NOT use grep, cat, ls, Read, Glob, or filesystem paths for memory lookups."; } function buildPsqlSchemaGuidance(): string { - return "[RETRY REQUIRED] Only psql SELECT queries over memory and sessions are intercepted in SQL mode. " + + return "[RETRY REQUIRED] Only psql SELECT queries over memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links are intercepted in SQL mode. " + "Rewrite the query to reference only those tables with normal psql SELECT syntax."; } diff --git a/src/hooks/session-start-setup.ts b/src/hooks/session-start-setup.ts index a1cb722..bd20f82 100644 --- a/src/hooks/session-start-setup.ts +++ b/src/hooks/session-start-setup.ts @@ -173,6 +173,11 @@ export async function runSessionStartSetup(input: SessionStartInput, deps: Sessi } else { try { await api.ensureSessionsTable(config.sessionsTableName); + await api.ensureGraphNodesTable(config.graphNodesTableName); + await api.ensureGraphEdgesTable(config.graphEdgesTableName); + await api.ensureFactsTable(config.factsTableName); + await api.ensureEntitiesTable(config.entitiesTableName); + await api.ensureFactEntityLinksTable(config.factEntityLinksTableName); const drain = await drainSessionQueuesFn(api, { sessionsTable: config.sessionsTableName, }); diff --git a/src/hooks/session-start.ts b/src/hooks/session-start.ts index 3b723e1..752d059 100644 --- a/src/hooks/session-start.ts +++ b/src/hooks/session-start.ts @@ -152,25 +152,41 @@ export const CLAUDE_SESSION_START_CONTEXT_PSQL = `DEEPLAKE MEMORY SQL MODE: For Available Deeplake tables: - memory(path, summary, project, description, creation_date, last_update_date) - sessions(path, creation_date, turn_index, event_type, dia_id, speaker, text, turn_summary, source_date_time, message) +- memory_facts(path, fact_id, subject_entity_id, subject_name, subject_type, predicate, object_entity_id, object_name, object_type, summary, evidence, search_text, confidence, valid_at, valid_from, valid_to, source_session_id, source_path) +- memory_entities(path, entity_id, canonical_name, entity_type, aliases, summary, search_text, source_session_ids, source_paths) +- fact_entity_links(path, link_id, fact_id, entity_id, entity_role, source_session_id, source_path) Use this command shape: - psql -At -F '|' -c "SELECT ..." SQL strategy: -1. Start with targeted SELECTs against memory to find likely summaries. +1. Start with targeted SELECTs against memory to find likely sessions or summaries. 2. In the first pass, combine the named person/entity term with one or more topic terms. Prefer narrow AND filters over broad OR filters. -3. After finding candidate summary rows, re-query memory by exact path to inspect only those summaries. -4. If the answer needs exact wording, exact dates, or transcript grounding, query sessions by exact path for those candidate sessions. -5. Prefer precise WHERE filters, ORDER BY creation_date/last_update_date, and LIMIT 5-10. -6. Do not use filesystem commands, grep, cat, ls, Read, or Glob for recall in this mode. -7. If the first summary query returns 0-3 weak rows or the answer still seems semantically off, retry with BM25 ranking on memory before concluding the data is absent. -8. Use sessions.text, sessions.speaker, sessions.turn_index, and sessions.source_date_time for transcript retrieval. Use sessions.message only when you need the raw JSON payload. -9. If a summary answer is vague or relative (for example "home country", "next month", "last week"), immediately open the linked sessions rows and convert it to the most concrete answer supported there. -10. For identity, origin, relationship, and "what did they decide" questions, prefer the exact self-description or named entity from sessions over a paraphrased summary label. +3. Graph-backed entity and relation resolution is applied automatically behind the scenes to narrow likely sessions before memory/sessions queries run. You do not need to query graph tables manually for normal recall. +3a. For stable person/project/place facts, use memory_facts first. Use memory_entities to resolve aliases or canonical names, then join through fact_entity_links when you need all facts connected to an entity. +4. After finding candidate summary rows, re-query memory by exact path. +5. If the answer needs exact wording, exact dates, or transcript grounding, query sessions by exact path for those candidate sessions. +6. Prefer precise WHERE filters, ORDER BY creation_date/last_update_date, and LIMIT 5-10. +7. Do not use filesystem commands, grep, cat, ls, Read, or Glob for recall in this mode. +8. If the first literal query returns 0-3 weak rows or the answer still seems semantically off, retry with BM25 ranking on memory.summary before concluding the data is absent. +9. Use sessions.text, sessions.speaker, sessions.turn_index, and sessions.source_date_time for transcript retrieval. Use sessions.message only when you need the raw JSON payload. +10. If a summary, node, or edge answer is vague or relative (for example "home country", "next month", "last week"), immediately open the linked sessions rows and convert it to the most concrete answer supported there. +11. For identity, origin, relationship, preference, and "what did they decide" questions, prefer transcript grounding over a paraphrased summary label. +12. When memory_entities resolves a canonical entity, use fact_entity_links to expand the connected facts before deciding the fact layer is sparse. +13. For identity or relationship questions, prefer the narrowest explicit self-label or status label over broader biography or community descriptions. +14. For "when" questions, if the best evidence is already phrased relative to another dated event, return that relative phrase instead of inventing a different absolute date. +15. For list/profile questions, return a minimal comma-separated set of directly supported items. Do not pad the answer with adjacent hobbies, events, or explanations. +16. For artifact/title questions such as books, talks, projects, or artworks, prefer exact titled objects from facts or transcript over generic phrases like "a book" or "a speech". Good query patterns: - Candidate summaries: psql -At -F '|' -c "SELECT path, summary, creation_date FROM memory WHERE summary ILIKE '%%' AND (summary ILIKE '%%' OR summary ILIKE '%%') ORDER BY creation_date DESC LIMIT 5" +- Canonical entity lookup: + psql -At -F '|' -c "SELECT entity_id, canonical_name, entity_type, aliases, summary FROM memory_entities WHERE canonical_name ILIKE '%%' OR aliases ILIKE '%%' LIMIT 5" +- Fact lookup by entity: + psql -At -F '|' -c "SELECT fact_id, subject_name, predicate, object_name, summary, valid_at, valid_from, valid_to, source_session_id FROM memory_facts WHERE subject_name ILIKE '%%' AND (predicate ILIKE '%%' OR object_name ILIKE '%%') ORDER BY creation_date DESC LIMIT 10" +- Entity-linked fact expansion: + psql -At -F '|' -c "SELECT f.fact_id, f.subject_name, f.predicate, f.object_name, f.summary FROM fact_entity_links l JOIN memory_facts f ON f.fact_id = l.fact_id WHERE l.entity_id = '' ORDER BY f.creation_date DESC LIMIT 10" - Exact summary reread: psql -At -F '|' -c "SELECT path, summary FROM memory WHERE path IN ('/summaries/...', '/summaries/...')" - Transcript grounding by exact path: @@ -183,7 +199,12 @@ Good query patterns: Avoid these mistakes: - Do NOT search person names via path ILIKE. Person names live in summary text, not session paths. - Do NOT filter sessions.message directly when sessions.text / sessions.speaker already contain the needed transcript fields. +- Do NOT use fact tables for exact quoted wording when a transcript row is available; use them to narrow and aggregate, then ground on sessions. +- Do NOT stop at graph rows alone when the question asks for exact wording or time grounding. Use graph rows to narrow the search, then open the linked sessions. - Do NOT blend multiple different events when the question asks about one specific event. Prefer the most direct supporting row. +- Do NOT replace an exact status or self-label with a broader biography. +- Do NOT recalculate a relative-time answer against today's date when the stored phrase already answers the question. +- Do NOT turn a short list question into a narrative list of loosely related activities. Answer rules: - Return the smallest exact answer supported by the data. @@ -191,11 +212,12 @@ Answer rules: - Do not answer "not found" until you have checked both memory and a likely sessions row for the named person. - For duration or age-style answers, preserve the stored relative phrase when it directly answers the question instead of over-converting it. - If the transcript already directly answers with a relative duration like "10 years ago", return that phrase instead of recalculating to today's date. +- If the transcript or fact row says something like "the week before June 9, 2023", return that phrase instead of converting it to June 9, 2023. - If a summary says something vague like "home country", search sessions for the exact named place before answering. - For list or profile questions, aggregate across the small set of candidate sessions before answering. - For "likely", "would", or profile questions, a concise inference from strong summary evidence is allowed even if the exact final phrase is not quoted verbatim. -IMPORTANT: Only psql SELECT queries over memory and sessions are intercepted in this mode. Do NOT use python, python3, node, curl, or filesystem paths for recall in this mode. +IMPORTANT: Only psql SELECT queries over memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links are intercepted in this mode. For normal recall, query memory_facts for distilled claims, memory_entities for canonical names, and sessions for exact grounding; graph-based restriction is applied automatically where relevant. Do NOT use python, python3, node, curl, or filesystem paths for recall in this mode. Debugging: Set HIVEMIND_DEBUG=1 to enable verbose logging to ~/.deeplake/hook-debug.log`; diff --git a/src/hooks/spawn-wiki-worker.ts b/src/hooks/spawn-wiki-worker.ts index 2aed495..5fa611f 100644 --- a/src/hooks/spawn-wiki-worker.ts +++ b/src/hooks/spawn-wiki-worker.ts @@ -9,6 +9,8 @@ import { dirname, join } from "node:path"; import { writeFileSync, mkdirSync, appendFileSync } from "node:fs"; import { homedir, tmpdir } from "node:os"; import type { Config } from "../config.js"; +import { GRAPH_PROMPT_TEMPLATE } from "./knowledge-graph.js"; +import { MEMORY_FACT_PROMPT_TEMPLATE } from "./memory-facts.js"; import { utcTimestamp } from "../utils/debug.js"; const HOME = homedir(); @@ -117,6 +119,11 @@ export function spawnWikiWorker(opts: SpawnOptions): void { workspaceId: config.workspaceId, memoryTable: config.tableName, sessionsTable: config.sessionsTableName, + graphNodesTable: config.graphNodesTableName, + graphEdgesTable: config.graphEdgesTableName, + factsTable: config.factsTableName, + entitiesTable: config.entitiesTableName, + factEntityLinksTable: config.factEntityLinksTableName, sessionId, userName: config.userName, project: projectName, @@ -125,6 +132,8 @@ export function spawnWikiWorker(opts: SpawnOptions): void { wikiLog: WIKI_LOG, hooksDir: join(HOME, ".claude", "hooks"), promptTemplate: WIKI_PROMPT_TEMPLATE, + graphPromptTemplate: GRAPH_PROMPT_TEMPLATE, + factPromptTemplate: MEMORY_FACT_PROMPT_TEMPLATE, })); wikiLog(`${reason}: spawning summary worker for ${sessionId}`); diff --git a/src/hooks/wiki-worker.ts b/src/hooks/wiki-worker.ts index ddc8ec2..06f3fae 100644 --- a/src/hooks/wiki-worker.ts +++ b/src/hooks/wiki-worker.ts @@ -13,6 +13,16 @@ import { join } from "node:path"; import { utcTimestamp } from "../utils/debug.js"; import { finalizeSummary, releaseLock } from "./summary-state.js"; import { uploadSummary } from "./upload-summary.js"; +import { + buildKnowledgeGraphPrompt, + parseGraphExtraction, + replaceSessionGraph, +} from "./knowledge-graph.js"; +import { + buildMemoryFactPrompt, + parseMemoryFactExtraction, + replaceSessionFacts, +} from "./memory-facts.js"; interface WorkerConfig { apiUrl: string; @@ -21,6 +31,11 @@ interface WorkerConfig { workspaceId: string; memoryTable: string; sessionsTable: string; + graphNodesTable: string; + graphEdgesTable: string; + factsTable: string; + entitiesTable: string; + factEntityLinksTable: string; sessionId: string; userName: string; project: string; @@ -29,6 +44,8 @@ interface WorkerConfig { wikiLog: string; hooksDir: string; promptTemplate: string; + graphPromptTemplate: string; + factPromptTemplate: string; } const cfg: WorkerConfig = JSON.parse(readFileSync(process.argv[2], "utf-8")); @@ -186,6 +203,77 @@ async function main(): Promise { }); wlog(`uploaded ${vpath} (summary=${result.summaryLength}, desc=${result.descLength})`); + try { + const graphPrompt = buildKnowledgeGraphPrompt({ + summaryText: text, + sessionId: cfg.sessionId, + sourcePath: jsonlServerPath, + project: cfg.project, + template: cfg.graphPromptTemplate, + }); + const graphRaw = execFileSync(cfg.claudeBin, [ + "-p", graphPrompt, + "--no-session-persistence", + "--model", "haiku", + "--permission-mode", "bypassPermissions", + ], { + stdio: ["ignore", "pipe", "pipe"], + timeout: 120_000, + env: { ...process.env, HIVEMIND_WIKI_WORKER: "1", HIVEMIND_CAPTURE: "false" }, + }).toString("utf-8"); + const graph = parseGraphExtraction(graphRaw); + const graphResult = await replaceSessionGraph({ + query, + nodesTable: cfg.graphNodesTable, + edgesTable: cfg.graphEdgesTable, + sessionId: cfg.sessionId, + userName: cfg.userName, + project: cfg.project, + agent: "claude_code", + sourcePath: jsonlServerPath, + graph, + }); + wlog(`graph updated nodes=${graphResult.nodes} edges=${graphResult.edges}`); + } catch (e: any) { + wlog(`graph update failed: ${e.message}`); + } + + try { + const factPrompt = buildMemoryFactPrompt({ + summaryText: text, + sessionId: cfg.sessionId, + sourcePath: jsonlServerPath, + project: cfg.project, + template: cfg.factPromptTemplate, + }); + const factsRaw = execFileSync(cfg.claudeBin, [ + "-p", factPrompt, + "--no-session-persistence", + "--model", "haiku", + "--permission-mode", "bypassPermissions", + ], { + stdio: ["ignore", "pipe", "pipe"], + timeout: 120_000, + env: { ...process.env, HIVEMIND_WIKI_WORKER: "1", HIVEMIND_CAPTURE: "false" }, + }).toString("utf-8"); + const extraction = parseMemoryFactExtraction(factsRaw); + const factResult = await replaceSessionFacts({ + query, + factsTable: cfg.factsTable, + entitiesTable: cfg.entitiesTable, + linksTable: cfg.factEntityLinksTable, + sessionId: cfg.sessionId, + userName: cfg.userName, + project: cfg.project, + agent: "claude_code", + sourcePath: jsonlServerPath, + extraction, + }); + wlog(`facts updated facts=${factResult.facts} entities=${factResult.entities} links=${factResult.links}`); + } catch (e: any) { + wlog(`fact update failed: ${e.message}`); + } + try { finalizeSummary(cfg.sessionId, jsonlLines); wlog(`sidecar updated: lastSummaryCount=${jsonlLines}`); diff --git a/src/tools/backfill-harrier-embeddings.ts b/src/tools/backfill-harrier-embeddings.ts new file mode 100644 index 0000000..59b253a --- /dev/null +++ b/src/tools/backfill-harrier-embeddings.ts @@ -0,0 +1,433 @@ +#!/usr/bin/env node + +import { loadConfig } from "../config.js"; +import { loadCredentials } from "../commands/auth.js"; +import { DeeplakeApi } from "../deeplake-api.js"; +import { HarrierEmbedder } from "../embeddings/harrier.js"; +import { + buildMemoryEmbeddingText, + buildSessionEmbeddingText, + stableEmbeddingSourceHash, + type MemoryEmbeddingRow, + type SessionEmbeddingRow, +} from "../embeddings/text.js"; +import { sqlIdent, sqlStr } from "../utils/sql.js"; + +type TableKind = "memory" | "sessions"; + +interface Args { + table: TableKind | "all"; + memoryTable: string; + sessionsTable: string; + modelId: string; + startOffset: number; + maxRows?: number; + device?: string; + dtype?: string; + batchSize: number; + scanBatchSize: number; + limit?: number; + force: boolean; + localFilesOnly: boolean; + localModelPath?: string; + cacheDir?: string; + memoryMaxChars: number; + sessionsMaxChars: number; + embeddingColumn: string; + embeddingModelColumn: string; + embeddingSourceHashColumn: string; + embeddingUpdatedAtColumn: string; +} + +interface SqlColumnSpec { + name: string; + ddl: string; +} + +const DEFAULT_MODEL_ID = process.env.HIVEMIND_HARRIER_MODEL_ID + ?? process.env.DEEPLAKE_HARRIER_MODEL_ID + ?? "onnx-community/harrier-oss-v1-0.6b-ONNX"; +const DEFAULT_EMBEDDING_COLUMN = "embedding"; +const DEFAULT_BATCH_SIZE = 8; +const DEFAULT_SCAN_BATCH_SIZE = 64; + +function printUsage(): void { + process.stderr.write([ + "Usage: tsx src/tools/backfill-harrier-embeddings.ts [options]", + "", + "Options:", + " --table Tables to backfill (default: all)", + " --memory-table Memory table name", + " --sessions-table Sessions table name", + " --model-id Harrier model id (default: onnx-community/harrier-oss-v1-0.6b-ONNX)", + " --start-offset Start scanning at SQL offset n (default: 0)", + " --max-rows Process at most n scanned rows from the start offset", + " --device Transformers.js device (default: cpu)", + " --dtype Optional ONNX dtype override", + " --batch-size Embedding batch size (default: 8)", + " --scan-batch-size Rows read/write per scan batch (default: 64)", + " --limit Stop after n row updates", + " --force Recompute even when source hash matches", + " --local-files-only Refuse remote model downloads", + " --local-model-path Local model root for Transformers.js", + " --cache-dir Transformers.js cache directory", + " --memory-max-chars Max chars embedded per memory row (default: 8000)", + " --sessions-max-chars Max chars embedded per sessions row (default: 8000)", + "", + "Note: For local TypeScript inference, the practical default is the ONNX export", + " of microsoft/harrier-oss-v1-0.6b. Pass --local-files-only with a local model", + " cache if you want fully offline execution.", + "", + ].join("\n")); +} + +function parseInteger(value: string | undefined, fallback: number): number { + const parsed = Number.parseInt(value ?? "", 10); + return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback; +} + +function parseArgs(): Args { + const config = loadConfig(); + const args = process.argv.slice(2); + const opts: Args = { + table: "all", + memoryTable: config?.tableName ?? "memory", + sessionsTable: config?.sessionsTableName ?? "sessions", + modelId: DEFAULT_MODEL_ID, + startOffset: 0, + device: "cpu", + batchSize: DEFAULT_BATCH_SIZE, + scanBatchSize: DEFAULT_SCAN_BATCH_SIZE, + force: false, + localFilesOnly: false, + memoryMaxChars: 8_000, + sessionsMaxChars: 8_000, + embeddingColumn: DEFAULT_EMBEDDING_COLUMN, + embeddingModelColumn: "embedding_model", + embeddingSourceHashColumn: "embedding_source_hash", + embeddingUpdatedAtColumn: "embedding_updated_at", + }; + + for (let index = 0; index < args.length; index++) { + switch (args[index]) { + case "--help": + case "-h": + printUsage(); + process.exit(0); + case "--table": { + const value = args[++index]; + if (value === "memory" || value === "sessions" || value === "all") { + opts.table = value; + } else { + throw new Error(`Unsupported --table value: ${value}`); + } + break; + } + case "--memory-table": + opts.memoryTable = args[++index] ?? opts.memoryTable; + break; + case "--sessions-table": + opts.sessionsTable = args[++index] ?? opts.sessionsTable; + break; + case "--model-id": + opts.modelId = args[++index] ?? opts.modelId; + break; + case "--start-offset": + opts.startOffset = Math.max(0, parseInteger(args[++index], 0)); + break; + case "--max-rows": + opts.maxRows = parseInteger(args[++index], 0); + break; + case "--device": + opts.device = args[++index] ?? opts.device; + break; + case "--dtype": + opts.dtype = args[++index] ?? opts.dtype; + break; + case "--batch-size": + opts.batchSize = parseInteger(args[++index], opts.batchSize); + break; + case "--scan-batch-size": + opts.scanBatchSize = parseInteger(args[++index], opts.scanBatchSize); + break; + case "--limit": + opts.limit = parseInteger(args[++index], 0); + break; + case "--force": + opts.force = true; + break; + case "--local-files-only": + opts.localFilesOnly = true; + break; + case "--local-model-path": + opts.localModelPath = args[++index] ?? opts.localModelPath; + break; + case "--cache-dir": + opts.cacheDir = args[++index] ?? opts.cacheDir; + break; + case "--memory-max-chars": + opts.memoryMaxChars = parseInteger(args[++index], opts.memoryMaxChars); + break; + case "--sessions-max-chars": + opts.sessionsMaxChars = parseInteger(args[++index], opts.sessionsMaxChars); + break; + default: + throw new Error(`Unknown argument: ${args[index]}`); + } + } + + return opts; +} + +function asString(value: unknown): string { + return typeof value === "string" ? value : value == null ? "" : String(value); +} + +function hasVector(value: unknown): boolean { + return Array.isArray(value) && value.length > 0; +} + +function sqlFloat4Array(values: number[]): string { + return `ARRAY[${values.map((value) => Number.isFinite(value) ? Math.fround(value).toString() : "0").join(", ")}]::float4[]`; +} + +async function ensureSqlColumns(api: DeeplakeApi, tableName: string, specs: SqlColumnSpec[]): Promise { + const table = sqlIdent(tableName); + for (const spec of specs) { + const column = sqlIdent(spec.name); + try { + await api.query(`ALTER TABLE "${table}" ADD COLUMN IF NOT EXISTS "${column}" ${spec.ddl}`); + } catch { + // Older backends may reject IF NOT EXISTS or duplicate adds. + // Continue so repeated runs remain best-effort. + } + } +} + +async function ensureEmbeddingIndex(api: DeeplakeApi, tableName: string, columnName: string): Promise { + const table = sqlIdent(tableName); + const column = sqlIdent(columnName); + const indexName = sqlIdent(`idx_${tableName}_${columnName}`.replace(/[^a-zA-Z0-9_]/g, "_")); + await api.query( + `CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" USING deeplake_index ("${column}")` + ).catch(() => {}); +} + +async function fetchMemoryRows(api: DeeplakeApi, args: Args, offset: number): Promise[]> { + const table = sqlIdent(args.memoryTable); + const remainingRows = args.maxRows ? Math.max(0, (args.startOffset + args.maxRows) - offset) : args.scanBatchSize; + const limit = Math.min(args.scanBatchSize, remainingRows); + if (limit <= 0) return []; + return api.query( + `SELECT id, path, filename, summary, description, project, ` + + `"${sqlIdent(args.embeddingSourceHashColumn)}" AS embedding_source_hash, ` + + `"${sqlIdent(args.embeddingModelColumn)}" AS embedding_model ` + + `FROM "${table}" ORDER BY path ASC LIMIT ${limit} OFFSET ${offset}` + ); +} + +async function fetchSessionRows(api: DeeplakeApi, args: Args, offset: number): Promise[]> { + const table = sqlIdent(args.sessionsTable); + const remainingRows = args.maxRows ? Math.max(0, (args.startOffset + args.maxRows) - offset) : args.scanBatchSize; + const limit = Math.min(args.scanBatchSize, remainingRows); + if (limit <= 0) return []; + return api.query( + `SELECT id, path, event_type, speaker, text, turn_summary, source_date_time, turn_index, message ` + + `FROM "${table}" ` + + `ORDER BY path ASC, turn_index ASC, creation_date ASC LIMIT ${limit} OFFSET ${offset}` + ); +} + +async function updateEmbeddingRow( + api: DeeplakeApi, + tableName: string, + args: Args, + id: string, + vector: number[], + sourceHash: string, +): Promise { + const table = sqlIdent(tableName); + const updatedAt = new Date().toISOString(); + await api.query( + `UPDATE "${table}" SET ` + + `"${sqlIdent(args.embeddingColumn)}" = ${sqlFloat4Array(vector)}, ` + + `"${sqlIdent(args.embeddingModelColumn)}" = '${sqlStr(args.modelId)}', ` + + `"${sqlIdent(args.embeddingSourceHashColumn)}" = '${sqlStr(sourceHash)}', ` + + `"${sqlIdent(args.embeddingUpdatedAtColumn)}" = '${sqlStr(updatedAt)}' ` + + `WHERE id = '${sqlStr(id)}'` + ); +} + +async function backfillMemoryTable(api: DeeplakeApi, embedder: HarrierEmbedder, args: Args): Promise<{ updated: number; skipped: number }> { + await ensureSqlColumns(api, args.memoryTable, [ + { name: args.embeddingColumn, ddl: "float4[]" }, + { name: args.embeddingModelColumn, ddl: "TEXT NOT NULL DEFAULT ''" }, + { name: args.embeddingSourceHashColumn, ddl: "TEXT NOT NULL DEFAULT ''" }, + { name: args.embeddingUpdatedAtColumn, ddl: "TEXT NOT NULL DEFAULT ''" }, + ]); + + let updated = 0; + let skipped = 0; + + for (let offset = args.startOffset; ; offset += args.scanBatchSize) { + const rows = await fetchMemoryRows(api, args, offset); + if (rows.length === 0) break; + + const docs: string[] = []; + const ids: string[] = []; + const sourceHashes: string[] = []; + + for (const row of rows) { + const text = buildMemoryEmbeddingText({ + path: asString(row["path"]), + filename: asString(row["filename"]), + summary: asString(row["summary"]), + description: asString(row["description"]), + project: asString(row["project"]), + } satisfies MemoryEmbeddingRow, args.memoryMaxChars); + + if (!text) { + skipped++; + continue; + } + + docs.push(text); + ids.push(asString(row["id"])); + sourceHashes.push(stableEmbeddingSourceHash(text)); + } + + for (let batchStart = 0; batchStart < docs.length; batchStart += args.batchSize) { + const batchDocs = docs.slice(batchStart, batchStart + args.batchSize); + const batchIds = ids.slice(batchStart, batchStart + args.batchSize); + const batchHashes = sourceHashes.slice(batchStart, batchStart + args.batchSize); + const vectors = await embedder.embedDocuments(batchDocs); + + for (let index = 0; index < vectors.length; index++) { + await updateEmbeddingRow(api, args.memoryTable, args, batchIds[index], vectors[index], batchHashes[index]); + updated++; + } + + process.stderr.write(`[memory] updated ${updated}, skipped ${skipped}\n`); + if (args.limit && updated >= args.limit) { + await ensureEmbeddingIndex(api, args.memoryTable, args.embeddingColumn); + return { updated, skipped }; + } + } + } + + await ensureEmbeddingIndex(api, args.memoryTable, args.embeddingColumn); + return { updated, skipped }; +} + +async function backfillSessionsTable(api: DeeplakeApi, embedder: HarrierEmbedder, args: Args): Promise<{ updated: number; skipped: number }> { + await ensureSqlColumns(api, args.sessionsTable, [ + { name: args.embeddingColumn, ddl: "float4[]" }, + { name: args.embeddingModelColumn, ddl: "TEXT NOT NULL DEFAULT ''" }, + { name: args.embeddingSourceHashColumn, ddl: "TEXT NOT NULL DEFAULT ''" }, + { name: args.embeddingUpdatedAtColumn, ddl: "TEXT NOT NULL DEFAULT ''" }, + ]); + + let updated = 0; + let skipped = 0; + + for (let offset = args.startOffset; ; offset += args.scanBatchSize) { + const rows = await fetchSessionRows(api, args, offset); + if (rows.length === 0) break; + + const docs: string[] = []; + const ids: string[] = []; + const sourceHashes: string[] = []; + + for (const row of rows) { + const text = buildSessionEmbeddingText({ + path: asString(row["path"]), + event_type: asString(row["event_type"]), + speaker: asString(row["speaker"]), + text: asString(row["text"]), + turn_summary: asString(row["turn_summary"]), + source_date_time: asString(row["source_date_time"]), + turn_index: Number.isFinite(Number(row["turn_index"])) ? Number(row["turn_index"]) : undefined, + message: row["message"], + } satisfies SessionEmbeddingRow, args.sessionsMaxChars); + + if (!text) { + skipped++; + continue; + } + + const sourceHash = stableEmbeddingSourceHash(text); + const existingHash = asString(row["embedding_source_hash"]); + const existingModel = asString(row["embedding_model"]); + if (!args.force && existingHash === sourceHash && existingModel === embedder.modelId) { + skipped++; + continue; + } + + docs.push(text); + ids.push(asString(row["id"])); + sourceHashes.push(sourceHash); + } + + for (let batchStart = 0; batchStart < docs.length; batchStart += args.batchSize) { + const batchDocs = docs.slice(batchStart, batchStart + args.batchSize); + const batchIds = ids.slice(batchStart, batchStart + args.batchSize); + const batchHashes = sourceHashes.slice(batchStart, batchStart + args.batchSize); + const vectors = await embedder.embedDocuments(batchDocs); + + for (let index = 0; index < vectors.length; index++) { + await updateEmbeddingRow(api, args.sessionsTable, args, batchIds[index], vectors[index], batchHashes[index]); + updated++; + } + + process.stderr.write(`[sessions] updated ${updated}, skipped ${skipped}\n`); + if (args.limit && updated >= args.limit) { + await ensureEmbeddingIndex(api, args.sessionsTable, args.embeddingColumn); + return { updated, skipped }; + } + } + } + + await ensureEmbeddingIndex(api, args.sessionsTable, args.embeddingColumn); + return { updated, skipped }; +} + +async function main(): Promise { + const args = parseArgs(); + const creds = loadCredentials(); + const config = loadConfig(); + if (!creds?.token || !config) { + throw new Error("Missing Deeplake credentials. Run `deeplake login` first."); + } + + const api = new DeeplakeApi( + config.token, + config.apiUrl, + config.orgId, + config.workspaceId, + config.tableName, + ); + const embedder = new HarrierEmbedder({ + modelId: args.modelId, + cacheDir: args.cacheDir, + localModelPath: args.localModelPath, + localFilesOnly: args.localFilesOnly, + device: args.device, + dtype: args.dtype, + batchSize: args.batchSize, + }); + + if (args.table === "memory" || args.table === "all") { + const result = await backfillMemoryTable(api, embedder, args); + process.stderr.write(`[memory] complete: updated=${result.updated} skipped=${result.skipped}\n`); + } + if (args.table === "sessions" || args.table === "all") { + const result = await backfillSessionsTable(api, embedder, args); + process.stderr.write(`[sessions] complete: updated=${result.updated} skipped=${result.skipped}\n`); + } +} + +main().catch((error: unknown) => { + const message = error instanceof Error ? error.message : String(error); + process.stderr.write(`[backfill-harrier-embeddings] ${message}\n`); + process.exit(1); +}); diff --git a/src/tools/backfill-locomo-facts.ts b/src/tools/backfill-locomo-facts.ts new file mode 100644 index 0000000..b45c55d --- /dev/null +++ b/src/tools/backfill-locomo-facts.ts @@ -0,0 +1,268 @@ +#!/usr/bin/env node + +import { execFile } from "node:child_process"; +import { appendFileSync } from "node:fs"; +import { basename } from "node:path"; +import { promisify } from "node:util"; +import { loadCredentials } from "../commands/auth.js"; +import { DeeplakeApi, DeeplakeQueryError, summarizeSql } from "../deeplake-api.js"; +import { + buildMemoryFactPrompt, + parseMemoryFactExtraction, + replaceSessionFacts, +} from "../hooks/memory-facts.js"; +import { findClaudeBin } from "../hooks/spawn-wiki-worker.js"; + +const execFileAsync = promisify(execFile); + +interface Args { + memoryTable: string; + factsTable: string; + entitiesTable: string; + linksTable: string; + pathContains?: string; + concurrency: number; + model: string; + clearFacts: boolean; + clearEntities: boolean; + errorLogPath?: string; +} + +interface SummaryRow { + path: string; + summary: string; + project?: string; +} + +function parseArgs(): Args { + const args = process.argv.slice(2); + const opts: Args = { + memoryTable: "memory", + factsTable: "memory_facts", + entitiesTable: "memory_entities", + linksTable: "fact_entity_links", + pathContains: undefined, + concurrency: 4, + model: "haiku", + clearFacts: false, + clearEntities: false, + errorLogPath: undefined, + }; + for (let i = 0; i < args.length; i++) { + switch (args[i]) { + case "--memory-table": + opts.memoryTable = args[++i] ?? opts.memoryTable; + break; + case "--facts-table": + opts.factsTable = args[++i] ?? opts.factsTable; + break; + case "--entities-table": + opts.entitiesTable = args[++i] ?? opts.entitiesTable; + break; + case "--links-table": + opts.linksTable = args[++i] ?? opts.linksTable; + break; + case "--path-contains": + opts.pathContains = args[++i] ?? opts.pathContains; + break; + case "--concurrency": + opts.concurrency = Math.max(1, parseInt(args[++i] ?? "4", 10) || 4); + break; + case "--model": + opts.model = args[++i] ?? opts.model; + break; + case "--clear-facts": + opts.clearFacts = true; + break; + case "--clear-entities": + opts.clearEntities = true; + break; + case "--error-log": + opts.errorLogPath = args[++i] ?? opts.errorLogPath; + break; + } + } + return opts; +} + +function extractSummarySourcePath(summary: string): string { + const match = summary.match(/^- \*\*Source\*\*: (.+)$/m); + return match?.[1]?.trim() || ""; +} + +function sessionIdFromSummaryPath(path: string): string { + const base = basename(path).replace(/\.md$/, ""); + return base.endsWith("_summary") ? base.slice(0, -"_summary".length) : base; +} + +function serializeError(error: unknown): Record { + const err = error instanceof Error ? error : new Error(String(error)); + const out: Record = { + name: err.name, + message: err.message, + stack: err.stack, + }; + const record = err as Error & Record; + if (typeof record["phase"] === "string") out["phase"] = record["phase"]; + if (typeof record["sessionId"] === "string") out["sessionId"] = record["sessionId"]; + if (typeof record["table"] === "string") out["table"] = record["table"]; + if (typeof record["sql"] === "string") out["sql"] = record["sql"]; + if (error instanceof DeeplakeQueryError) { + out["sqlSummary"] = error.sqlSummary; + out["status"] = error.status; + out["responseBody"] = error.responseBody; + } else if (typeof record["sql"] === "string") { + out["sqlSummary"] = summarizeSql(record["sql"] as string); + } + const cause = record["cause"]; + if (cause instanceof DeeplakeQueryError) { + out["cause"] = { + name: cause.name, + message: cause.message, + sqlSummary: cause.sqlSummary, + status: cause.status, + responseBody: cause.responseBody, + stack: cause.stack, + }; + } else if (cause instanceof Error) { + out["cause"] = { + name: cause.name, + message: cause.message, + stack: cause.stack, + }; + } + return out; +} + +function appendErrorLog(logPath: string | undefined, payload: Record): void { + if (!logPath) return; + appendFileSync(logPath, `${JSON.stringify(payload)}\n`, "utf-8"); +} + +async function generateFacts( + summary: string, + sourcePath: string, + sessionId: string, + project: string, + claudeBin: string, + model: string, +) { + const prompt = buildMemoryFactPrompt({ + summaryText: summary, + sessionId, + sourcePath, + project, + }); + const { stdout } = await execFileAsync(claudeBin, [ + "-p", + prompt, + "--no-session-persistence", + "--model", + model, + "--permission-mode", + "bypassPermissions", + ], { + timeout: 120_000, + env: { + ...process.env, + DEEPLAKE_CAPTURE: "false", + HIVEMIND_CAPTURE: "false", + HIVEMIND_WIKI_WORKER: "1", + }, + }); + return parseMemoryFactExtraction(stdout); +} + +async function main(): Promise { + const opts = parseArgs(); + const creds = loadCredentials(); + if (!creds?.token || !creds.orgId) { + throw new Error("Missing Deeplake credentials. Run auth first."); + } + + const workspaceId = creds.workspaceId ?? "default"; + const apiUrl = process.env["HIVEMIND_API_URL"] ?? process.env["DEEPLAKE_API_URL"] ?? creds.apiUrl ?? "https://api.deeplake.ai"; + const api = new DeeplakeApi(creds.token, apiUrl, creds.orgId, workspaceId, opts.memoryTable); + await api.ensureFactsTable(opts.factsTable); + await api.ensureEntitiesTable(opts.entitiesTable); + await api.ensureFactEntityLinksTable(opts.linksTable); + + if (opts.clearFacts) { + await api.query(`DELETE FROM "${opts.factsTable}"`); + await api.query(`DELETE FROM "${opts.linksTable}"`); + } + if (opts.clearEntities) { + await api.query(`DELETE FROM "${opts.entitiesTable}"`); + } + + const summaryRows = await api.query( + `SELECT path, summary, project FROM "${opts.memoryTable}" WHERE path LIKE '/summaries/locomo/%' ORDER BY path ASC`, + ); + const summaries: SummaryRow[] = summaryRows.map((row) => ({ + path: String(row["path"] ?? ""), + summary: String(row["summary"] ?? ""), + project: row["project"] == null ? undefined : String(row["project"]), + })) + .filter((row) => row.path && row.summary) + .filter((row) => !opts.pathContains || row.path.includes(opts.pathContains)); + + const claudeBin = findClaudeBin(); + let nextIndex = 0; + let completed = 0; + let failures = 0; + let totalFacts = 0; + + async function worker(): Promise { + while (true) { + const index = nextIndex++; + if (index >= summaries.length) return; + const row = summaries[index]; + const sessionId = sessionIdFromSummaryPath(row.path); + const sourcePath = extractSummarySourcePath(row.summary) || `/sessions/${sessionId}.json`; + try { + const extraction = await generateFacts( + row.summary, + sourcePath, + sessionId, + row.project || "locomo", + claudeBin, + opts.model, + ); + const result = await replaceSessionFacts({ + query: (sql) => api.query(sql), + factsTable: opts.factsTable, + entitiesTable: opts.entitiesTable, + linksTable: opts.linksTable, + sessionId, + userName: "locomo", + project: row.project || "locomo", + agent: "claude_code", + sourcePath, + extraction, + }); + totalFacts += result.facts; + completed += 1; + process.stdout.write(`facts ${completed}/${summaries.length}: ${sessionId} facts=${result.facts} entities=${result.entities} links=${result.links}\n`); + } catch (error) { + failures += 1; + const payload = { + path: row.path, + sessionId, + sourcePath, + failureAt: new Date().toISOString(), + error: serializeError(error), + }; + appendErrorLog(opts.errorLogPath, payload); + process.stderr.write(`FAIL ${row.path}: ${error instanceof Error ? error.message : String(error)}\n`); + } + } + } + + await Promise.all(Array.from({ length: opts.concurrency }, () => worker())); + process.stdout.write(`Done. facts_summaries=${completed} failed=${failures} total_facts=${totalFacts}\n`); +} + +main().catch((error) => { + process.stderr.write(`${error instanceof Error ? error.stack ?? error.message : String(error)}\n`); + process.exit(1); +}); diff --git a/src/tools/backfill-locomo-graph.ts b/src/tools/backfill-locomo-graph.ts new file mode 100644 index 0000000..63d1eb3 --- /dev/null +++ b/src/tools/backfill-locomo-graph.ts @@ -0,0 +1,525 @@ +#!/usr/bin/env node + +import { execFile } from "node:child_process"; +import { randomUUID } from "node:crypto"; +import { appendFileSync, writeFileSync } from "node:fs"; +import { promisify } from "node:util"; +import { basename } from "node:path"; +import { loadCredentials } from "../commands/auth.js"; +import { DeeplakeApi, DeeplakeQueryError, summarizeSql } from "../deeplake-api.js"; +import { + buildGraphNodeId, + buildKnowledgeGraphPrompt, + type GraphExtraction, + parseGraphExtraction, +} from "../hooks/knowledge-graph.js"; +import { buildSummaryBlurb } from "../utils/summary-format.js"; +import { esc } from "../hooks/upload-summary.js"; +import { findClaudeBin } from "../hooks/spawn-wiki-worker.js"; + +const execFileAsync = promisify(execFile); + +interface Args { + memoryTable: string; + graphNodesTable: string; + graphEdgesTable: string; + concurrency: number; + model: string; + clearGraph: boolean; + errorLogPath?: string; +} + +interface SummaryRow { + path: string; + summary: string; + project?: string; +} + +interface AggregateNode { + nodeId: string; + canonicalName: string; + nodeType: string; + aliases: Set; + summaries: Set; + sourceSessionIds: Set; + sourcePaths: Set; + representativeSessionId: string; + representativeSourcePath: string; +} + +interface AggregateEdge { + edgeId: string; + sourceNodeId: string; + targetNodeId: string; + relation: string; + summaries: Set; + evidences: Set; + sourceSessionIds: Set; + sourcePaths: Set; + representativeSessionId: string; + representativeSourcePath: string; +} + +function parseArgs(): Args { + const args = process.argv.slice(2); + const opts: Args = { + memoryTable: "memory", + graphNodesTable: "graph_nodes", + graphEdgesTable: "graph_edges", + concurrency: 4, + model: "haiku", + clearGraph: false, + errorLogPath: undefined, + }; + for (let i = 0; i < args.length; i++) { + switch (args[i]) { + case "--memory-table": + opts.memoryTable = args[++i] ?? opts.memoryTable; + break; + case "--graph-nodes-table": + opts.graphNodesTable = args[++i] ?? opts.graphNodesTable; + break; + case "--graph-edges-table": + opts.graphEdgesTable = args[++i] ?? opts.graphEdgesTable; + break; + case "--concurrency": + opts.concurrency = Math.max(1, parseInt(args[++i] ?? "4", 10) || 4); + break; + case "--model": + opts.model = args[++i] ?? opts.model; + break; + case "--clear-graph": + opts.clearGraph = true; + break; + case "--error-log": + opts.errorLogPath = args[++i] ?? opts.errorLogPath; + break; + } + } + return opts; +} + +function extractSummarySourcePath(summary: string): string { + const match = summary.match(/^- \*\*Source\*\*: (.+)$/m); + return match?.[1]?.trim() || ""; +} + +function sessionIdFromSummaryPath(path: string): string { + const base = basename(path).replace(/\.md$/, ""); + return base.endsWith("_summary") ? base.slice(0, -"_summary".length) : base; +} + +async function generateGraph(summary: string, sourcePath: string, sessionId: string, project: string, claudeBin: string, model: string) { + const prompt = buildKnowledgeGraphPrompt({ + summaryText: summary, + sessionId, + sourcePath, + project, + }); + const { stdout } = await execFileAsync(claudeBin, [ + "-p", + prompt, + "--no-session-persistence", + "--model", + model, + "--permission-mode", + "bypassPermissions", + ], { + timeout: 120_000, + env: { + ...process.env, + DEEPLAKE_CAPTURE: "false", + HIVEMIND_CAPTURE: "false", + HIVEMIND_WIKI_WORKER: "1", + }, + }); + return parseGraphExtraction(stdout); +} + +function serializeError(error: unknown): Record { + const err = error instanceof Error ? error : new Error(String(error)); + const out: Record = { + name: err.name, + message: err.message, + stack: err.stack, + }; + const record = err as Error & Record; + if (typeof record["phase"] === "string") out["phase"] = record["phase"]; + if (typeof record["sessionId"] === "string") out["sessionId"] = record["sessionId"]; + if (typeof record["table"] === "string") out["table"] = record["table"]; + if (typeof record["sql"] === "string") out["sql"] = record["sql"]; + if (error instanceof DeeplakeQueryError) { + out["sqlSummary"] = error.sqlSummary; + out["status"] = error.status; + out["responseBody"] = error.responseBody; + } else if (typeof record["sql"] === "string") { + out["sqlSummary"] = summarizeSql(record["sql"] as string); + } + const cause = record["cause"]; + if (cause instanceof DeeplakeQueryError) { + out["cause"] = { + name: cause.name, + message: cause.message, + sqlSummary: cause.sqlSummary, + status: cause.status, + responseBody: cause.responseBody, + stack: cause.stack, + }; + } else if (cause instanceof Error) { + out["cause"] = { + name: cause.name, + message: cause.message, + stack: cause.stack, + }; + } else if (cause != null) { + out["cause"] = String(cause); + } + return out; +} + +function appendErrorLog(logPath: string | undefined, payload: Record): void { + if (!logPath) return; + appendFileSync(logPath, `${JSON.stringify(payload)}\n`, "utf-8"); +} + +const NODE_TYPE_PRIORITY = [ + "person", + "organization", + "place", + "event", + "project", + "artifact", + "tool", + "file", + "goal", + "status", + "preference", + "concept", + "other", +]; + +function nodeTypeRank(value: string): number { + const idx = NODE_TYPE_PRIORITY.indexOf(value); + return idx === -1 ? NODE_TYPE_PRIORITY.length : idx; +} + +function preferNodeType(a: string, b: string): string { + return nodeTypeRank(a) <= nodeTypeRank(b) ? a : b; +} + +function pushLimited(set: Set, value: string, max = 8): void { + const trimmed = value.trim(); + if (!trimmed) return; + if (set.has(trimmed)) return; + if (set.size >= max) return; + set.add(trimmed); +} + +function mergeSummarySet(set: Set): string { + return [...set].join(" | "); +} + +function chooseRepresentative(set: Set, fallback: string): string { + return [...set].at(-1) || fallback; +} + +function resolveNodeId(name: string, aliases: string[], aliasMap: Map): string { + const candidates = [name, ...aliases] + .map((value) => value.trim()) + .filter(Boolean) + .map((value) => buildGraphNodeId(value)); + for (const candidate of candidates) { + const existing = aliasMap.get(candidate); + if (existing) return existing; + } + return buildGraphNodeId(name); +} + +function mergeGraphIntoAggregate(args: { + graph: GraphExtraction; + sessionId: string; + sourcePath: string; + nodes: Map; + edges: Map; + aliasMap: Map; +}): void { + const localNodeIds = new Map(); + const ensureNode = (rawName: string, type = "other", summary = "", aliases: string[] = []): string => { + const name = rawName.trim(); + if (!name) return buildGraphNodeId("unknown"); + const nodeId = resolveNodeId(name, aliases, args.aliasMap); + const existing = args.nodes.get(nodeId); + if (existing) { + existing.nodeType = preferNodeType(existing.nodeType, type || "other"); + pushLimited(existing.summaries, summary); + existing.sourceSessionIds.add(args.sessionId); + existing.sourcePaths.add(args.sourcePath); + existing.representativeSessionId = args.sessionId; + existing.representativeSourcePath = args.sourcePath; + for (const alias of [name, ...aliases]) { + const trimmed = alias.trim(); + if (!trimmed) continue; + existing.aliases.add(trimmed); + args.aliasMap.set(buildGraphNodeId(trimmed), nodeId); + } + } else { + const node: AggregateNode = { + nodeId, + canonicalName: name, + nodeType: type || "other", + aliases: new Set(), + summaries: new Set(), + sourceSessionIds: new Set([args.sessionId]), + sourcePaths: new Set([args.sourcePath]), + representativeSessionId: args.sessionId, + representativeSourcePath: args.sourcePath, + }; + pushLimited(node.summaries, summary); + for (const alias of [name, ...aliases]) { + const trimmed = alias.trim(); + if (!trimmed) continue; + node.aliases.add(trimmed); + args.aliasMap.set(buildGraphNodeId(trimmed), nodeId); + } + args.nodes.set(nodeId, node); + } + localNodeIds.set(name, nodeId); + return nodeId; + }; + + for (const node of args.graph.nodes) { + ensureNode(node.name, node.type || "other", node.summary || "", node.aliases || []); + } + for (const edge of args.graph.edges) { + const sourceNodeId = localNodeIds.get(edge.source.trim()) || ensureNode(edge.source); + const targetNodeId = localNodeIds.get(edge.target.trim()) || ensureNode(edge.target); + const edgeId = `${sourceNodeId}:${edge.relation}:${targetNodeId}`; + const existing = args.edges.get(edgeId); + if (existing) { + pushLimited(existing.summaries, edge.summary || `${edge.source} ${edge.relation} ${edge.target}`); + pushLimited(existing.evidences, edge.evidence || ""); + existing.sourceSessionIds.add(args.sessionId); + existing.sourcePaths.add(args.sourcePath); + existing.representativeSessionId = args.sessionId; + existing.representativeSourcePath = args.sourcePath; + } else { + const aggregateEdge: AggregateEdge = { + edgeId, + sourceNodeId, + targetNodeId, + relation: edge.relation, + summaries: new Set(), + evidences: new Set(), + sourceSessionIds: new Set([args.sessionId]), + sourcePaths: new Set([args.sourcePath]), + representativeSessionId: args.sessionId, + representativeSourcePath: args.sourcePath, + }; + pushLimited(aggregateEdge.summaries, edge.summary || `${edge.source} ${edge.relation} ${edge.target}`); + pushLimited(aggregateEdge.evidences, edge.evidence || ""); + args.edges.set(edgeId, aggregateEdge); + } + } +} + +async function insertAggregatedGraph(args: { + api: DeeplakeApi; + nodesTable: string; + edgesTable: string; + project: string; + agent: string; + nodes: Map; + edges: Map; +}): Promise { + const ts = new Date().toISOString(); + const nodePath = "/graphs/nodes/locomo/global.jsonl"; + const edgePath = "/graphs/edges/locomo/global.jsonl"; + const nodeFilename = "global.jsonl"; + const edgeFilename = "global.jsonl"; + + await args.api.query(`DELETE FROM "${args.nodesTable}"`); + await args.api.query(`DELETE FROM "${args.edgesTable}"`); + + const nodeRows = [...args.nodes.values()].map((node) => { + const aliases = [...node.aliases].filter((alias) => alias !== node.canonicalName); + const sourceSessionIds = [...node.sourceSessionIds]; + const sourcePaths = [...node.sourcePaths]; + const summary = mergeSummarySet(node.summaries) || buildSummaryBlurb(`# Graph Node\n\n${node.canonicalName}`); + const searchText = [ + node.canonicalName, + node.nodeType, + ...aliases, + ...node.summaries, + ...sourceSessionIds, + ...sourcePaths, + ].join(" | "); + return ( + `('${randomUUID()}', '${esc(nodePath)}', '${esc(nodeFilename)}', '${esc(node.nodeId)}', ` + + `'${esc(node.canonicalName)}', '${esc(node.nodeType)}', E'${esc(summary)}', E'${esc(searchText)}', ` + + `'${esc(aliases.join(", "))}', '${esc(chooseRepresentative(node.sourceSessionIds, node.representativeSessionId))}', ` + + `'${esc(sourceSessionIds.join(" || "))}', '${esc(chooseRepresentative(node.sourcePaths, node.representativeSourcePath))}', ` + + `'${esc(sourcePaths.join(" || "))}', 'locomo', 'application/json', ` + + `${Buffer.byteLength(searchText, "utf-8")}, '${esc(args.project)}', E'${esc(buildSummaryBlurb(summary))}', ` + + `'${esc(args.agent)}', '${ts}', '${ts}')` + ); + }); + + for (let i = 0; i < nodeRows.length; i += 100) { + const chunk = nodeRows.slice(i, i + 100); + if (chunk.length === 0) continue; + await args.api.query( + `INSERT INTO "${args.nodesTable}" ` + + `(id, path, filename, node_id, canonical_name, node_type, summary, search_text, aliases, source_session_id, source_session_ids, source_path, source_paths, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) ` + + `VALUES ${chunk.join(", ")}` + ); + } + + const edgeRows = [...args.edges.values()].map((edge) => { + const sourceSessionIds = [...edge.sourceSessionIds]; + const sourcePaths = [...edge.sourcePaths]; + const summary = mergeSummarySet(edge.summaries) || edge.edgeId; + const evidence = mergeSummarySet(edge.evidences); + const searchText = [ + edge.sourceNodeId, + edge.relation, + edge.targetNodeId, + ...edge.summaries, + ...edge.evidences, + ...sourceSessionIds, + ...sourcePaths, + ].join(" | "); + return ( + `('${randomUUID()}', '${esc(edgePath)}', '${esc(edgeFilename)}', '${esc(edge.edgeId)}', ` + + `'${esc(edge.sourceNodeId)}', '${esc(edge.targetNodeId)}', '${esc(edge.relation)}', E'${esc(summary)}', ` + + `E'${esc(evidence)}', E'${esc(searchText)}', '${esc(chooseRepresentative(edge.sourceSessionIds, edge.representativeSessionId))}', ` + + `'${esc(sourceSessionIds.join(" || "))}', '${esc(chooseRepresentative(edge.sourcePaths, edge.representativeSourcePath))}', ` + + `'${esc(sourcePaths.join(" || "))}', 'locomo', 'application/json', ` + + `${Buffer.byteLength(searchText, "utf-8")}, '${esc(args.project)}', E'${esc(buildSummaryBlurb(summary))}', ` + + `'${esc(args.agent)}', '${ts}', '${ts}')` + ); + }); + + for (let i = 0; i < edgeRows.length; i += 100) { + const chunk = edgeRows.slice(i, i + 100); + if (chunk.length === 0) continue; + await args.api.query( + `INSERT INTO "${args.edgesTable}" ` + + `(id, path, filename, edge_id, source_node_id, target_node_id, relation, summary, evidence, search_text, source_session_id, source_session_ids, source_path, source_paths, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) ` + + `VALUES ${chunk.join(", ")}` + ); + } +} + +async function withConcurrency(items: T[], concurrency: number, fn: (item: T, idx: number) => Promise) { + let next = 0; + let running = 0; + await new Promise((resolve) => { + function launch() { + while (running < concurrency && next < items.length) { + const idx = next++; + running++; + fn(items[idx], idx).finally(() => { + running--; + if (next >= items.length && running === 0) resolve(); + else launch(); + }); + } + } + launch(); + }); +} + +async function main(): Promise { + const opts = parseArgs(); + const errorLogPath = opts.errorLogPath || `/tmp/locomo-graph-backfill-errors-${Date.now()}.jsonl`; + writeFileSync(errorLogPath, "", "utf-8"); + console.log(`error_log=${errorLogPath}`); + const creds = loadCredentials(); + if (!creds?.token) throw new Error("No Deeplake credentials found. Run hivemind login first."); + + const api = new DeeplakeApi( + creds.token, + creds.apiUrl ?? "https://api.deeplake.ai", + creds.orgId, + creds.workspaceId ?? "default", + opts.memoryTable, + ); + + await api.ensureGraphNodesTable(opts.graphNodesTable); + await api.ensureGraphEdgesTable(opts.graphEdgesTable); + if (opts.clearGraph) { + await api.query(`DELETE FROM "${opts.graphNodesTable}"`); + await api.query(`DELETE FROM "${opts.graphEdgesTable}"`); + } + + const summaryRows = (await api.query( + `SELECT path, summary, project FROM "${opts.memoryTable}" WHERE path LIKE '/summaries/locomo/%' ORDER BY path` + )) + .filter((row) => typeof row["path"] === "string" && typeof row["summary"] === "string") + .map((row) => ({ + path: row["path"] as string, + summary: row["summary"] as string, + project: typeof row["project"] === "string" ? row["project"] as string : undefined, + })) as SummaryRow[]; + const claudeBin = findClaudeBin(); + const aggregateNodes = new Map(); + const aggregateEdges = new Map(); + const aliasMap = new Map(); + + let completed = 0; + let failed = 0; + await withConcurrency(summaryRows, opts.concurrency, async (row) => { + const sessionId = sessionIdFromSummaryPath(row.path); + const sourcePath = extractSummarySourcePath(row.summary) || `/sessions/${sessionId}.jsonl`; + try { + const graph = await generateGraph(row.summary, sourcePath, sessionId, row.project || "locomo", claudeBin, opts.model); + mergeGraphIntoAggregate({ + graph, + sessionId, + sourcePath, + nodes: aggregateNodes, + edges: aggregateEdges, + aliasMap, + }); + completed++; + if (completed % 10 === 0 || completed === summaryRows.length) { + console.log(`graph ${completed}/${summaryRows.length}`); + } + } catch (error) { + failed++; + const serialized = serializeError(error); + appendErrorLog(errorLogPath, { + ts: new Date().toISOString(), + summaryPath: row.path, + sessionId, + sourcePath, + project: row.project || "locomo", + ...serialized, + }); + const phase = typeof serialized["phase"] === "string" ? ` phase=${serialized["phase"]}` : ""; + const status = typeof serialized["status"] === "number" ? ` status=${serialized["status"]}` : ""; + const sqlSummary = typeof serialized["sqlSummary"] === "string" ? ` sql=${serialized["sqlSummary"]}` : ""; + console.error(`FAIL ${row.path}:${phase}${status}${sqlSummary} ${serialized["message"]}`); + } + }); + + if (failed === 0 || completed > 0) { + await insertAggregatedGraph({ + api, + nodesTable: opts.graphNodesTable, + edgesTable: opts.graphEdgesTable, + project: "locomo", + agent: "claude_code", + nodes: aggregateNodes, + edges: aggregateEdges, + }); + console.log(`graph_rows nodes=${aggregateNodes.size} edges=${aggregateEdges.size}`); + } + + console.log(`Done. graph_summaries=${completed} failed=${failed}`); +} + +main().catch((error) => { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +}); diff --git a/uv.lock b/uv.lock new file mode 100644 index 0000000..1c54df7 --- /dev/null +++ b/uv.lock @@ -0,0 +1,1004 @@ +version = 1 +revision = 2 +requires-python = ">=3.11" + +[[package]] +name = "annotated-doc" +version = "0.0.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" }, +] + +[[package]] +name = "anyio" +version = "4.13.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "idna" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/19/14/2c5dd9f512b66549ae92767a9c7b330ae88e1932ca57876909410251fe13/anyio-4.13.0.tar.gz", hash = "sha256:334b70e641fd2221c1505b3890c69882fe4a2df910cba14d97019b90b24439dc", size = 231622, upload-time = "2026-03-24T12:59:09.671Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/da/42/e921fccf5015463e32a3cf6ee7f980a6ed0f395ceeaa45060b61d86486c2/anyio-4.13.0-py3-none-any.whl", hash = "sha256:08b310f9e24a9594186fd75b4f73f4a4152069e3853f1ed8bfbf58369f4ad708", size = 114353, upload-time = "2026-03-24T12:59:08.246Z" }, +] + +[[package]] +name = "certifi" +version = "2026.2.25" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/af/2d/7bf41579a8986e348fa033a31cdd0e4121114f6bce2457e8876010b092dd/certifi-2026.2.25.tar.gz", hash = "sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7", size = 155029, upload-time = "2026-02-25T02:54:17.342Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9a/3c/c17fb3ca2d9c3acff52e30b309f538586f9f5b9c9cf454f3845fc9af4881/certifi-2026.2.25-py3-none-any.whl", hash = "sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa", size = 153684, upload-time = "2026-02-25T02:54:15.766Z" }, +] + +[[package]] +name = "click" +version = "8.3.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/57/75/31212c6bf2503fdf920d87fee5d7a86a2e3bcf444984126f13d8e4016804/click-8.3.2.tar.gz", hash = "sha256:14162b8b3b3550a7d479eafa77dfd3c38d9dc8951f6f69c78913a8f9a7540fd5", size = 302856, upload-time = "2026-04-03T19:14:45.118Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e4/20/71885d8b97d4f3dde17b1fdb92dbd4908b00541c5a3379787137285f602e/click-8.3.2-py3-none-any.whl", hash = "sha256:1924d2c27c5653561cd2cae4548d1406039cb79b858b747cfea24924bbc1616d", size = 108379, upload-time = "2026-04-03T19:14:43.505Z" }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, +] + +[[package]] +name = "cuda-bindings" +version = "13.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cuda-pathfinder" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/a9/3a8241c6e19483ac1f1dcf5c10238205dcb8a6e9d0d4d4709240dff28ff4/cuda_bindings-13.2.0-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:721104c603f059780d287969be3d194a18d0cc3b713ed9049065a1107706759d", size = 5730273, upload-time = "2026-03-11T00:12:37.18Z" }, + { url = "https://files.pythonhosted.org/packages/e9/94/2748597f47bb1600cd466b20cab4159f1530a3a33fe7f70fee199b3abb9e/cuda_bindings-13.2.0-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1eba9504ac70667dd48313395fe05157518fd6371b532790e96fbb31bbb5a5e1", size = 6313924, upload-time = "2026-03-11T00:12:39.462Z" }, + { url = "https://files.pythonhosted.org/packages/52/c8/b2589d68acf7e3d63e2be330b84bc25712e97ed799affbca7edd7eae25d6/cuda_bindings-13.2.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e865447abfb83d6a98ad5130ed3c70b1fc295ae3eeee39fd07b4ddb0671b6788", size = 5722404, upload-time = "2026-03-11T00:12:44.041Z" }, + { url = "https://files.pythonhosted.org/packages/1f/92/f899f7bbb5617bb65ec52a6eac1e9a1447a86b916c4194f8a5001b8cde0c/cuda_bindings-13.2.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:46d8776a55d6d5da9dd6e9858fba2efcda2abe6743871dee47dd06eb8cb6d955", size = 6320619, upload-time = "2026-03-11T00:12:45.939Z" }, + { url = "https://files.pythonhosted.org/packages/df/93/eef988860a3ca985f82c4f3174fc0cdd94e07331ba9a92e8e064c260337f/cuda_bindings-13.2.0-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6629ca2df6f795b784752409bcaedbd22a7a651b74b56a165ebc0c9dcbd504d0", size = 5614610, upload-time = "2026-03-11T00:12:50.337Z" }, + { url = "https://files.pythonhosted.org/packages/18/23/6db3aba46864aee357ab2415135b3fe3da7e9f1fa0221fa2a86a5968099c/cuda_bindings-13.2.0-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7dca0da053d3b4cc4869eff49c61c03f3c5dbaa0bcd712317a358d5b8f3f385d", size = 6149914, upload-time = "2026-03-11T00:12:52.374Z" }, + { url = "https://files.pythonhosted.org/packages/c0/87/87a014f045b77c6de5c8527b0757fe644417b184e5367db977236a141602/cuda_bindings-13.2.0-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a6464b30f46692d6c7f65d4a0e0450d81dd29de3afc1bb515653973d01c2cd6e", size = 5685673, upload-time = "2026-03-11T00:12:56.371Z" }, + { url = "https://files.pythonhosted.org/packages/ee/5e/c0fe77a73aaefd3fff25ffaccaac69c5a63eafdf8b9a4c476626ef0ac703/cuda_bindings-13.2.0-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f4af9f3e1be603fa12d5ad6cfca7844c9d230befa9792b5abdf7dd79979c3626", size = 6191386, upload-time = "2026-03-11T00:12:58.965Z" }, + { url = "https://files.pythonhosted.org/packages/5f/58/ed2c3b39c8dd5f96aa7a4abef0d47a73932c7a988e30f5fa428f00ed0da1/cuda_bindings-13.2.0-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:df850a1ff8ce1b3385257b08e47b70e959932f5f432d0a4e46a355962b4e4771", size = 5507469, upload-time = "2026-03-11T00:13:04.063Z" }, + { url = "https://files.pythonhosted.org/packages/1f/01/0c941b112ceeb21439b05895eace78ca1aa2eaaf695c8521a068fd9b4c00/cuda_bindings-13.2.0-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e8a16384c6494e5485f39314b0b4afb04bee48d49edb16d5d8593fd35bbd231b", size = 6059693, upload-time = "2026-03-11T00:13:06.003Z" }, +] + +[[package]] +name = "cuda-pathfinder" +version = "1.5.3" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d3/d6/ac63065d33dd700fee7ebd7d287332401b54e31b9346e142f871e1f0b116/cuda_pathfinder-1.5.3-py3-none-any.whl", hash = "sha256:dff021123aedbb4117cc7ec81717bbfe198fb4e8b5f1ee57e0e084fec5c8577d", size = 49991, upload-time = "2026-04-14T20:09:27.037Z" }, +] + +[[package]] +name = "cuda-toolkit" +version = "13.0.2" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/57/b2/453099f5f3b698d7d0eab38916aac44c7f76229f451709e2eb9db6615dcd/cuda_toolkit-13.0.2-py2.py3-none-any.whl", hash = "sha256:b198824cf2f54003f50d64ada3a0f184b42ca0846c1c94192fa269ecd97a66eb", size = 2364, upload-time = "2025-12-19T23:24:07.328Z" }, +] + +[package.optional-dependencies] +cublas = [ + { name = "nvidia-cublas", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, +] +cudart = [ + { name = "nvidia-cuda-runtime", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, +] +cufft = [ + { name = "nvidia-cufft", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, +] +cufile = [ + { name = "nvidia-cufile", marker = "sys_platform == 'linux'" }, +] +cupti = [ + { name = "nvidia-cuda-cupti", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, +] +curand = [ + { name = "nvidia-curand", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, +] +cusolver = [ + { name = "nvidia-cusolver", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, +] +cusparse = [ + { name = "nvidia-cusparse", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, +] +nvjitlink = [ + { name = "nvidia-nvjitlink", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, +] +nvrtc = [ + { name = "nvidia-cuda-nvrtc", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, +] +nvtx = [ + { name = "nvidia-nvtx", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, +] + +[[package]] +name = "filelock" +version = "3.29.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/fe/997687a931ab51049acce6fa1f23e8f01216374ea81374ddee763c493db5/filelock-3.29.0.tar.gz", hash = "sha256:69974355e960702e789734cb4871f884ea6fe50bd8404051a3530bc07809cf90", size = 57571, upload-time = "2026-04-19T15:39:10.068Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/47/dd9a212ef6e343a6857485ffe25bba537304f1913bdbed446a23f7f592e1/filelock-3.29.0-py3-none-any.whl", hash = "sha256:96f5f6344709aa1572bbf631c640e4ebeeb519e08da902c39a001882f30ac258", size = 39812, upload-time = "2026-04-19T15:39:08.752Z" }, +] + +[[package]] +name = "fsspec" +version = "2026.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e1/cf/b50ddf667c15276a9ab15a70ef5f257564de271957933ffea49d2cdbcdfb/fsspec-2026.3.0.tar.gz", hash = "sha256:1ee6a0e28677557f8c2f994e3eea77db6392b4de9cd1f5d7a9e87a0ae9d01b41", size = 313547, upload-time = "2026-03-27T19:11:14.892Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d5/1f/5f4a3cd9e4440e9d9bc78ad0a91a1c8d46b4d429d5239ebe6793c9fe5c41/fsspec-2026.3.0-py3-none-any.whl", hash = "sha256:d2ceafaad1b3457968ed14efa28798162f1638dbb5d2a6868a2db002a5ee39a4", size = 202595, upload-time = "2026-03-27T19:11:13.595Z" }, +] + +[[package]] +name = "h11" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, +] + +[[package]] +name = "hf-xet" +version = "1.4.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/53/92/ec9ad04d0b5728dca387a45af7bc98fbb0d73b2118759f5f6038b61a57e8/hf_xet-1.4.3.tar.gz", hash = "sha256:8ddedb73c8c08928c793df2f3401ec26f95be7f7e516a7bee2fbb546f6676113", size = 670477, upload-time = "2026-03-31T22:40:07.874Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/72/43/724d307b34e353da0abd476e02f72f735cdd2bc86082dee1b32ea0bfee1d/hf_xet-1.4.3-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:7551659ba4f1e1074e9623996f28c3873682530aee0a846b7f2f066239228144", size = 3800935, upload-time = "2026-03-31T22:39:49.618Z" }, + { url = "https://files.pythonhosted.org/packages/2b/d2/8bee5996b699262edb87dbb54118d287c0e1b2fc78af7cdc41857ba5e3c4/hf_xet-1.4.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:bee693ada985e7045997f05f081d0e12c4c08bd7626dc397f8a7c487e6c04f7f", size = 3558942, upload-time = "2026-03-31T22:39:47.938Z" }, + { url = "https://files.pythonhosted.org/packages/c3/a1/e993d09cbe251196fb60812b09a58901c468127b7259d2bf0f68bf6088eb/hf_xet-1.4.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:21644b404bb0100fe3857892f752c4d09642586fd988e61501c95bbf44b393a3", size = 4207657, upload-time = "2026-03-31T22:39:39.69Z" }, + { url = "https://files.pythonhosted.org/packages/64/44/9eb6d21e5c34c63e5e399803a6932fa983cabdf47c0ecbcfe7ea97684b8c/hf_xet-1.4.3-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:987f09cfe418237812896a6736b81b1af02a3a6dcb4b4944425c4c4fca7a7cf8", size = 3986765, upload-time = "2026-03-31T22:39:37.936Z" }, + { url = "https://files.pythonhosted.org/packages/ea/7b/8ad6f16fdb82f5f7284a34b5ec48645bd575bdcd2f6f0d1644775909c486/hf_xet-1.4.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:60cf7fc43a99da0a853345cf86d23738c03983ee5249613a6305d3e57a5dca74", size = 4188162, upload-time = "2026-03-31T22:39:58.382Z" }, + { url = "https://files.pythonhosted.org/packages/1b/c4/39d6e136cbeea9ca5a23aad4b33024319222adbdc059ebcda5fc7d9d5ff4/hf_xet-1.4.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:2815a49a7a59f3e2edf0cf113ae88e8cb2ca2a221bf353fb60c609584f4884d4", size = 4424525, upload-time = "2026-03-31T22:40:00.225Z" }, + { url = "https://files.pythonhosted.org/packages/46/f2/adc32dae6bdbc367853118b9878139ac869419a4ae7ba07185dc31251b76/hf_xet-1.4.3-cp313-cp313t-win_amd64.whl", hash = "sha256:42ee323265f1e6a81b0e11094564fb7f7e0ec75b5105ffd91ae63f403a11931b", size = 3671610, upload-time = "2026-03-31T22:40:10.42Z" }, + { url = "https://files.pythonhosted.org/packages/e2/19/25d897dcc3f81953e0c2cde9ec186c7a0fee413eb0c9a7a9130d87d94d3a/hf_xet-1.4.3-cp313-cp313t-win_arm64.whl", hash = "sha256:27c976ba60079fb8217f485b9c5c7fcd21c90b0367753805f87cb9f3cdc4418a", size = 3528529, upload-time = "2026-03-31T22:40:09.106Z" }, + { url = "https://files.pythonhosted.org/packages/ec/36/3e8f85ca9fe09b8de2b2e10c63b3b3353d7dda88a0b3d426dffbe7b8313b/hf_xet-1.4.3-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:5251d5ece3a81815bae9abab41cf7ddb7bcb8f56411bce0827f4a3071c92fdc6", size = 3801019, upload-time = "2026-03-31T22:39:56.651Z" }, + { url = "https://files.pythonhosted.org/packages/b5/9c/defb6cb1de28bccb7bd8d95f6e60f72a3d3fa4cb3d0329c26fb9a488bfe7/hf_xet-1.4.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1feb0f3abeacee143367c326a128a2e2b60868ec12a36c225afb1d6c5a05e6d2", size = 3558746, upload-time = "2026-03-31T22:39:54.766Z" }, + { url = "https://files.pythonhosted.org/packages/c1/bd/8d001191893178ff8e826e46ad5299446e62b93cd164e17b0ffea08832ec/hf_xet-1.4.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8b301fc150290ca90b4fccd079829b84bb4786747584ae08b94b4577d82fb791", size = 4207692, upload-time = "2026-03-31T22:39:46.246Z" }, + { url = "https://files.pythonhosted.org/packages/ce/48/6790b402803250e9936435613d3a78b9aaeee7973439f0918848dde58309/hf_xet-1.4.3-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:d972fbe95ddc0d3c0fc49b31a8a69f47db35c1e3699bf316421705741aab6653", size = 3986281, upload-time = "2026-03-31T22:39:44.648Z" }, + { url = "https://files.pythonhosted.org/packages/51/56/ea62552fe53db652a9099eda600b032d75554d0e86c12a73824bfedef88b/hf_xet-1.4.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c5b48db1ee344a805a1b9bd2cda9b6b65fe77ed3787bd6e87ad5521141d317cd", size = 4187414, upload-time = "2026-03-31T22:40:04.951Z" }, + { url = "https://files.pythonhosted.org/packages/7d/f5/bc1456d4638061bea997e6d2db60a1a613d7b200e0755965ec312dc1ef79/hf_xet-1.4.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:22bdc1f5fb8b15bf2831440b91d1c9bbceeb7e10c81a12e8d75889996a5c9da8", size = 4424368, upload-time = "2026-03-31T22:40:06.347Z" }, + { url = "https://files.pythonhosted.org/packages/e4/76/ab597bae87e1f06d18d3ecb8ed7f0d3c9a37037fc32ce76233d369273c64/hf_xet-1.4.3-cp314-cp314t-win_amd64.whl", hash = "sha256:0392c79b7cf48418cd61478c1a925246cf10639f4cd9d94368d8ca1e8df9ea07", size = 3672280, upload-time = "2026-03-31T22:40:16.401Z" }, + { url = "https://files.pythonhosted.org/packages/62/05/2e462d34e23a09a74d73785dbed71cc5dbad82a72eee2ad60a72a554155d/hf_xet-1.4.3-cp314-cp314t-win_arm64.whl", hash = "sha256:681c92a07796325778a79d76c67011764ecc9042a8c3579332b61b63ae512075", size = 3528945, upload-time = "2026-03-31T22:40:14.995Z" }, + { url = "https://files.pythonhosted.org/packages/ac/9f/9c23e4a447b8f83120798f9279d0297a4d1360bdbf59ef49ebec78fe2545/hf_xet-1.4.3-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:d0da85329eaf196e03e90b84c2d0aca53bd4573d097a75f99609e80775f98025", size = 3805048, upload-time = "2026-03-31T22:39:53.105Z" }, + { url = "https://files.pythonhosted.org/packages/0b/f8/7aacb8e5f4a7899d39c787b5984e912e6c18b11be136ef13947d7a66d265/hf_xet-1.4.3-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:e23717ce4186b265f69afa66e6f0069fe7efbf331546f5c313d00e123dc84583", size = 3562178, upload-time = "2026-03-31T22:39:51.295Z" }, + { url = "https://files.pythonhosted.org/packages/df/9a/a24b26dc8a65f0ecc0fe5be981a19e61e7ca963b85e062c083f3a9100529/hf_xet-1.4.3-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fc360b70c815bf340ed56c7b8c63aacf11762a4b099b2fe2c9bd6d6068668c08", size = 4212320, upload-time = "2026-03-31T22:39:42.922Z" }, + { url = "https://files.pythonhosted.org/packages/53/60/46d493db155d2ee2801b71fb1b0fd67696359047fdd8caee2c914cc50c79/hf_xet-1.4.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:39f2d2e9654cd9b4319885733993807aab6de9dfbd34c42f0b78338d6617421f", size = 3991546, upload-time = "2026-03-31T22:39:41.335Z" }, + { url = "https://files.pythonhosted.org/packages/bc/f5/067363e1c96c6b17256910830d1b54099d06287e10f4ec6ec4e7e08371fc/hf_xet-1.4.3-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:49ad8a8cead2b56051aa84d7fce3e1335efe68df3cf6c058f22a65513885baac", size = 4193200, upload-time = "2026-03-31T22:40:01.936Z" }, + { url = "https://files.pythonhosted.org/packages/42/4b/53951592882d9c23080c7644542fda34a3813104e9e11fa1a7d82d419cb8/hf_xet-1.4.3-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:7716d62015477a70ea272d2d68cd7cad140f61c52ee452e133e139abfe2c17ba", size = 4429392, upload-time = "2026-03-31T22:40:03.492Z" }, + { url = "https://files.pythonhosted.org/packages/8a/21/75a6c175b4e79662ad8e62f46a40ce341d8d6b206b06b4320d07d55b188c/hf_xet-1.4.3-cp37-abi3-win_amd64.whl", hash = "sha256:6b591fcad34e272a5b02607485e4f2a1334aebf1bc6d16ce8eb1eb8978ac2021", size = 3677359, upload-time = "2026-03-31T22:40:13.619Z" }, + { url = "https://files.pythonhosted.org/packages/8a/7c/44314ecd0e89f8b2b51c9d9e5e7a60a9c1c82024ac471d415860557d3cd8/hf_xet-1.4.3-cp37-abi3-win_arm64.whl", hash = "sha256:7c2c7e20bcfcc946dc67187c203463f5e932e395845d098cc2a93f5b67ca0b47", size = 3533664, upload-time = "2026-03-31T22:40:12.152Z" }, +] + +[[package]] +name = "hivemind-harrier-backfill" +version = "0.1.0" +source = { virtual = "." } +dependencies = [ + { name = "numpy" }, + { name = "safetensors" }, + { name = "torch" }, + { name = "transformers" }, +] + +[package.metadata] +requires-dist = [ + { name = "numpy", specifier = ">=1.26" }, + { name = "safetensors", specifier = ">=0.4" }, + { name = "torch", specifier = ">=2.4" }, + { name = "transformers", specifier = ">=4.57" }, +] + +[[package]] +name = "httpcore" +version = "1.0.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" }, +] + +[[package]] +name = "httpx" +version = "0.28.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "certifi" }, + { name = "httpcore" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, +] + +[[package]] +name = "huggingface-hub" +version = "1.11.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "filelock" }, + { name = "fsspec" }, + { name = "hf-xet", marker = "platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" }, + { name = "httpx" }, + { name = "packaging" }, + { name = "pyyaml" }, + { name = "tqdm" }, + { name = "typer" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/dc/89/e7aa12d8a6b9259bed10671abb25ae6fa437c0f88a86ecbf59617bae7759/huggingface_hub-1.11.0.tar.gz", hash = "sha256:15fb3713c7f9cdff7b808a94fd91664f661ab142796bb48c9cd9493e8d166278", size = 761749, upload-time = "2026-04-16T13:07:39.73Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/37/02/4f3f8997d1ea7fe0146b343e5e14bd065fa87af790d07e5576d31b31cc18/huggingface_hub-1.11.0-py3-none-any.whl", hash = "sha256:42a6de0afbfeb5e022222d36398f029679db4eb4778801aafda32257ae9131ab", size = 645499, upload-time = "2026-04-16T13:07:37.716Z" }, +] + +[[package]] +name = "idna" +version = "3.11" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" }, +] + +[[package]] +name = "jinja2" +version = "3.1.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115, upload-time = "2025-03-05T20:05:02.478Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, +] + +[[package]] +name = "markdown-it-py" +version = "4.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mdurl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" }, +] + +[[package]] +name = "markupsafe" +version = "3.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313, upload-time = "2025-09-27T18:37:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/08/db/fefacb2136439fc8dd20e797950e749aa1f4997ed584c62cfb8ef7c2be0e/markupsafe-3.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1cc7ea17a6824959616c525620e387f6dd30fec8cb44f649e31712db02123dad", size = 11631, upload-time = "2025-09-27T18:36:18.185Z" }, + { url = "https://files.pythonhosted.org/packages/e1/2e/5898933336b61975ce9dc04decbc0a7f2fee78c30353c5efba7f2d6ff27a/markupsafe-3.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4bd4cd07944443f5a265608cc6aab442e4f74dff8088b0dfc8238647b8f6ae9a", size = 12058, upload-time = "2025-09-27T18:36:19.444Z" }, + { url = "https://files.pythonhosted.org/packages/1d/09/adf2df3699d87d1d8184038df46a9c80d78c0148492323f4693df54e17bb/markupsafe-3.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b5420a1d9450023228968e7e6a9ce57f65d148ab56d2313fcd589eee96a7a50", size = 24287, upload-time = "2025-09-27T18:36:20.768Z" }, + { url = "https://files.pythonhosted.org/packages/30/ac/0273f6fcb5f42e314c6d8cd99effae6a5354604d461b8d392b5ec9530a54/markupsafe-3.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0bf2a864d67e76e5c9a34dc26ec616a66b9888e25e7b9460e1c76d3293bd9dbf", size = 22940, upload-time = "2025-09-27T18:36:22.249Z" }, + { url = "https://files.pythonhosted.org/packages/19/ae/31c1be199ef767124c042c6c3e904da327a2f7f0cd63a0337e1eca2967a8/markupsafe-3.0.3-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc51efed119bc9cfdf792cdeaa4d67e8f6fcccab66ed4bfdd6bde3e59bfcbb2f", size = 21887, upload-time = "2025-09-27T18:36:23.535Z" }, + { url = "https://files.pythonhosted.org/packages/b2/76/7edcab99d5349a4532a459e1fe64f0b0467a3365056ae550d3bcf3f79e1e/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:068f375c472b3e7acbe2d5318dea141359e6900156b5b2ba06a30b169086b91a", size = 23692, upload-time = "2025-09-27T18:36:24.823Z" }, + { url = "https://files.pythonhosted.org/packages/a4/28/6e74cdd26d7514849143d69f0bf2399f929c37dc2b31e6829fd2045b2765/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:7be7b61bb172e1ed687f1754f8e7484f1c8019780f6f6b0786e76bb01c2ae115", size = 21471, upload-time = "2025-09-27T18:36:25.95Z" }, + { url = "https://files.pythonhosted.org/packages/62/7e/a145f36a5c2945673e590850a6f8014318d5577ed7e5920a4b3448e0865d/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f9e130248f4462aaa8e2552d547f36ddadbeaa573879158d721bbd33dfe4743a", size = 22923, upload-time = "2025-09-27T18:36:27.109Z" }, + { url = "https://files.pythonhosted.org/packages/0f/62/d9c46a7f5c9adbeeeda52f5b8d802e1094e9717705a645efc71b0913a0a8/markupsafe-3.0.3-cp311-cp311-win32.whl", hash = "sha256:0db14f5dafddbb6d9208827849fad01f1a2609380add406671a26386cdf15a19", size = 14572, upload-time = "2025-09-27T18:36:28.045Z" }, + { url = "https://files.pythonhosted.org/packages/83/8a/4414c03d3f891739326e1783338e48fb49781cc915b2e0ee052aa490d586/markupsafe-3.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:de8a88e63464af587c950061a5e6a67d3632e36df62b986892331d4620a35c01", size = 15077, upload-time = "2025-09-27T18:36:29.025Z" }, + { url = "https://files.pythonhosted.org/packages/35/73/893072b42e6862f319b5207adc9ae06070f095b358655f077f69a35601f0/markupsafe-3.0.3-cp311-cp311-win_arm64.whl", hash = "sha256:3b562dd9e9ea93f13d53989d23a7e775fdfd1066c33494ff43f5418bc8c58a5c", size = 13876, upload-time = "2025-09-27T18:36:29.954Z" }, + { url = "https://files.pythonhosted.org/packages/5a/72/147da192e38635ada20e0a2e1a51cf8823d2119ce8883f7053879c2199b5/markupsafe-3.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e", size = 11615, upload-time = "2025-09-27T18:36:30.854Z" }, + { url = "https://files.pythonhosted.org/packages/9a/81/7e4e08678a1f98521201c3079f77db69fb552acd56067661f8c2f534a718/markupsafe-3.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce", size = 12020, upload-time = "2025-09-27T18:36:31.971Z" }, + { url = "https://files.pythonhosted.org/packages/1e/2c/799f4742efc39633a1b54a92eec4082e4f815314869865d876824c257c1e/markupsafe-3.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d", size = 24332, upload-time = "2025-09-27T18:36:32.813Z" }, + { url = "https://files.pythonhosted.org/packages/3c/2e/8d0c2ab90a8c1d9a24f0399058ab8519a3279d1bd4289511d74e909f060e/markupsafe-3.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d", size = 22947, upload-time = "2025-09-27T18:36:33.86Z" }, + { url = "https://files.pythonhosted.org/packages/2c/54/887f3092a85238093a0b2154bd629c89444f395618842e8b0c41783898ea/markupsafe-3.0.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a", size = 21962, upload-time = "2025-09-27T18:36:35.099Z" }, + { url = "https://files.pythonhosted.org/packages/c9/2f/336b8c7b6f4a4d95e91119dc8521402461b74a485558d8f238a68312f11c/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b", size = 23760, upload-time = "2025-09-27T18:36:36.001Z" }, + { url = "https://files.pythonhosted.org/packages/32/43/67935f2b7e4982ffb50a4d169b724d74b62a3964bc1a9a527f5ac4f1ee2b/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f", size = 21529, upload-time = "2025-09-27T18:36:36.906Z" }, + { url = "https://files.pythonhosted.org/packages/89/e0/4486f11e51bbba8b0c041098859e869e304d1c261e59244baa3d295d47b7/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b", size = 23015, upload-time = "2025-09-27T18:36:37.868Z" }, + { url = "https://files.pythonhosted.org/packages/2f/e1/78ee7a023dac597a5825441ebd17170785a9dab23de95d2c7508ade94e0e/markupsafe-3.0.3-cp312-cp312-win32.whl", hash = "sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d", size = 14540, upload-time = "2025-09-27T18:36:38.761Z" }, + { url = "https://files.pythonhosted.org/packages/aa/5b/bec5aa9bbbb2c946ca2733ef9c4ca91c91b6a24580193e891b5f7dbe8e1e/markupsafe-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c", size = 15105, upload-time = "2025-09-27T18:36:39.701Z" }, + { url = "https://files.pythonhosted.org/packages/e5/f1/216fc1bbfd74011693a4fd837e7026152e89c4bcf3e77b6692fba9923123/markupsafe-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f", size = 13906, upload-time = "2025-09-27T18:36:40.689Z" }, + { url = "https://files.pythonhosted.org/packages/38/2f/907b9c7bbba283e68f20259574b13d005c121a0fa4c175f9bed27c4597ff/markupsafe-3.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e1cf1972137e83c5d4c136c43ced9ac51d0e124706ee1c8aa8532c1287fa8795", size = 11622, upload-time = "2025-09-27T18:36:41.777Z" }, + { url = "https://files.pythonhosted.org/packages/9c/d9/5f7756922cdd676869eca1c4e3c0cd0df60ed30199ffd775e319089cb3ed/markupsafe-3.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219", size = 12029, upload-time = "2025-09-27T18:36:43.257Z" }, + { url = "https://files.pythonhosted.org/packages/00/07/575a68c754943058c78f30db02ee03a64b3c638586fba6a6dd56830b30a3/markupsafe-3.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6", size = 24374, upload-time = "2025-09-27T18:36:44.508Z" }, + { url = "https://files.pythonhosted.org/packages/a9/21/9b05698b46f218fc0e118e1f8168395c65c8a2c750ae2bab54fc4bd4e0e8/markupsafe-3.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676", size = 22980, upload-time = "2025-09-27T18:36:45.385Z" }, + { url = "https://files.pythonhosted.org/packages/7f/71/544260864f893f18b6827315b988c146b559391e6e7e8f7252839b1b846a/markupsafe-3.0.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9", size = 21990, upload-time = "2025-09-27T18:36:46.916Z" }, + { url = "https://files.pythonhosted.org/packages/c2/28/b50fc2f74d1ad761af2f5dcce7492648b983d00a65b8c0e0cb457c82ebbe/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1", size = 23784, upload-time = "2025-09-27T18:36:47.884Z" }, + { url = "https://files.pythonhosted.org/packages/ed/76/104b2aa106a208da8b17a2fb72e033a5a9d7073c68f7e508b94916ed47a9/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc", size = 21588, upload-time = "2025-09-27T18:36:48.82Z" }, + { url = "https://files.pythonhosted.org/packages/b5/99/16a5eb2d140087ebd97180d95249b00a03aa87e29cc224056274f2e45fd6/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12", size = 23041, upload-time = "2025-09-27T18:36:49.797Z" }, + { url = "https://files.pythonhosted.org/packages/19/bc/e7140ed90c5d61d77cea142eed9f9c303f4c4806f60a1044c13e3f1471d0/markupsafe-3.0.3-cp313-cp313-win32.whl", hash = "sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed", size = 14543, upload-time = "2025-09-27T18:36:51.584Z" }, + { url = "https://files.pythonhosted.org/packages/05/73/c4abe620b841b6b791f2edc248f556900667a5a1cf023a6646967ae98335/markupsafe-3.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5", size = 15113, upload-time = "2025-09-27T18:36:52.537Z" }, + { url = "https://files.pythonhosted.org/packages/f0/3a/fa34a0f7cfef23cf9500d68cb7c32dd64ffd58a12b09225fb03dd37d5b80/markupsafe-3.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485", size = 13911, upload-time = "2025-09-27T18:36:53.513Z" }, + { url = "https://files.pythonhosted.org/packages/e4/d7/e05cd7efe43a88a17a37b3ae96e79a19e846f3f456fe79c57ca61356ef01/markupsafe-3.0.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73", size = 11658, upload-time = "2025-09-27T18:36:54.819Z" }, + { url = "https://files.pythonhosted.org/packages/99/9e/e412117548182ce2148bdeacdda3bb494260c0b0184360fe0d56389b523b/markupsafe-3.0.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37", size = 12066, upload-time = "2025-09-27T18:36:55.714Z" }, + { url = "https://files.pythonhosted.org/packages/bc/e6/fa0ffcda717ef64a5108eaa7b4f5ed28d56122c9a6d70ab8b72f9f715c80/markupsafe-3.0.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19", size = 25639, upload-time = "2025-09-27T18:36:56.908Z" }, + { url = "https://files.pythonhosted.org/packages/96/ec/2102e881fe9d25fc16cb4b25d5f5cde50970967ffa5dddafdb771237062d/markupsafe-3.0.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025", size = 23569, upload-time = "2025-09-27T18:36:57.913Z" }, + { url = "https://files.pythonhosted.org/packages/4b/30/6f2fce1f1f205fc9323255b216ca8a235b15860c34b6798f810f05828e32/markupsafe-3.0.3-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6", size = 23284, upload-time = "2025-09-27T18:36:58.833Z" }, + { url = "https://files.pythonhosted.org/packages/58/47/4a0ccea4ab9f5dcb6f79c0236d954acb382202721e704223a8aafa38b5c8/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f", size = 24801, upload-time = "2025-09-27T18:36:59.739Z" }, + { url = "https://files.pythonhosted.org/packages/6a/70/3780e9b72180b6fecb83a4814d84c3bf4b4ae4bf0b19c27196104149734c/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb", size = 22769, upload-time = "2025-09-27T18:37:00.719Z" }, + { url = "https://files.pythonhosted.org/packages/98/c5/c03c7f4125180fc215220c035beac6b9cb684bc7a067c84fc69414d315f5/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009", size = 23642, upload-time = "2025-09-27T18:37:01.673Z" }, + { url = "https://files.pythonhosted.org/packages/80/d6/2d1b89f6ca4bff1036499b1e29a1d02d282259f3681540e16563f27ebc23/markupsafe-3.0.3-cp313-cp313t-win32.whl", hash = "sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354", size = 14612, upload-time = "2025-09-27T18:37:02.639Z" }, + { url = "https://files.pythonhosted.org/packages/2b/98/e48a4bfba0a0ffcf9925fe2d69240bfaa19c6f7507b8cd09c70684a53c1e/markupsafe-3.0.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218", size = 15200, upload-time = "2025-09-27T18:37:03.582Z" }, + { url = "https://files.pythonhosted.org/packages/0e/72/e3cc540f351f316e9ed0f092757459afbc595824ca724cbc5a5d4263713f/markupsafe-3.0.3-cp313-cp313t-win_arm64.whl", hash = "sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287", size = 13973, upload-time = "2025-09-27T18:37:04.929Z" }, + { url = "https://files.pythonhosted.org/packages/33/8a/8e42d4838cd89b7dde187011e97fe6c3af66d8c044997d2183fbd6d31352/markupsafe-3.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe", size = 11619, upload-time = "2025-09-27T18:37:06.342Z" }, + { url = "https://files.pythonhosted.org/packages/b5/64/7660f8a4a8e53c924d0fa05dc3a55c9cee10bbd82b11c5afb27d44b096ce/markupsafe-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026", size = 12029, upload-time = "2025-09-27T18:37:07.213Z" }, + { url = "https://files.pythonhosted.org/packages/da/ef/e648bfd021127bef5fa12e1720ffed0c6cbb8310c8d9bea7266337ff06de/markupsafe-3.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737", size = 24408, upload-time = "2025-09-27T18:37:09.572Z" }, + { url = "https://files.pythonhosted.org/packages/41/3c/a36c2450754618e62008bf7435ccb0f88053e07592e6028a34776213d877/markupsafe-3.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97", size = 23005, upload-time = "2025-09-27T18:37:10.58Z" }, + { url = "https://files.pythonhosted.org/packages/bc/20/b7fdf89a8456b099837cd1dc21974632a02a999ec9bf7ca3e490aacd98e7/markupsafe-3.0.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d", size = 22048, upload-time = "2025-09-27T18:37:11.547Z" }, + { url = "https://files.pythonhosted.org/packages/9a/a7/591f592afdc734f47db08a75793a55d7fbcc6902a723ae4cfbab61010cc5/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda", size = 23821, upload-time = "2025-09-27T18:37:12.48Z" }, + { url = "https://files.pythonhosted.org/packages/7d/33/45b24e4f44195b26521bc6f1a82197118f74df348556594bd2262bda1038/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf", size = 21606, upload-time = "2025-09-27T18:37:13.485Z" }, + { url = "https://files.pythonhosted.org/packages/ff/0e/53dfaca23a69fbfbbf17a4b64072090e70717344c52eaaaa9c5ddff1e5f0/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe", size = 23043, upload-time = "2025-09-27T18:37:14.408Z" }, + { url = "https://files.pythonhosted.org/packages/46/11/f333a06fc16236d5238bfe74daccbca41459dcd8d1fa952e8fbd5dccfb70/markupsafe-3.0.3-cp314-cp314-win32.whl", hash = "sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9", size = 14747, upload-time = "2025-09-27T18:37:15.36Z" }, + { url = "https://files.pythonhosted.org/packages/28/52/182836104b33b444e400b14f797212f720cbc9ed6ba34c800639d154e821/markupsafe-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581", size = 15341, upload-time = "2025-09-27T18:37:16.496Z" }, + { url = "https://files.pythonhosted.org/packages/6f/18/acf23e91bd94fd7b3031558b1f013adfa21a8e407a3fdb32745538730382/markupsafe-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4", size = 14073, upload-time = "2025-09-27T18:37:17.476Z" }, + { url = "https://files.pythonhosted.org/packages/3c/f0/57689aa4076e1b43b15fdfa646b04653969d50cf30c32a102762be2485da/markupsafe-3.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab", size = 11661, upload-time = "2025-09-27T18:37:18.453Z" }, + { url = "https://files.pythonhosted.org/packages/89/c3/2e67a7ca217c6912985ec766c6393b636fb0c2344443ff9d91404dc4c79f/markupsafe-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175", size = 12069, upload-time = "2025-09-27T18:37:19.332Z" }, + { url = "https://files.pythonhosted.org/packages/f0/00/be561dce4e6ca66b15276e184ce4b8aec61fe83662cce2f7d72bd3249d28/markupsafe-3.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634", size = 25670, upload-time = "2025-09-27T18:37:20.245Z" }, + { url = "https://files.pythonhosted.org/packages/50/09/c419f6f5a92e5fadde27efd190eca90f05e1261b10dbd8cbcb39cd8ea1dc/markupsafe-3.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50", size = 23598, upload-time = "2025-09-27T18:37:21.177Z" }, + { url = "https://files.pythonhosted.org/packages/22/44/a0681611106e0b2921b3033fc19bc53323e0b50bc70cffdd19f7d679bb66/markupsafe-3.0.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e", size = 23261, upload-time = "2025-09-27T18:37:22.167Z" }, + { url = "https://files.pythonhosted.org/packages/5f/57/1b0b3f100259dc9fffe780cfb60d4be71375510e435efec3d116b6436d43/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5", size = 24835, upload-time = "2025-09-27T18:37:23.296Z" }, + { url = "https://files.pythonhosted.org/packages/26/6a/4bf6d0c97c4920f1597cc14dd720705eca0bf7c787aebc6bb4d1bead5388/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523", size = 22733, upload-time = "2025-09-27T18:37:24.237Z" }, + { url = "https://files.pythonhosted.org/packages/14/c7/ca723101509b518797fedc2fdf79ba57f886b4aca8a7d31857ba3ee8281f/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc", size = 23672, upload-time = "2025-09-27T18:37:25.271Z" }, + { url = "https://files.pythonhosted.org/packages/fb/df/5bd7a48c256faecd1d36edc13133e51397e41b73bb77e1a69deab746ebac/markupsafe-3.0.3-cp314-cp314t-win32.whl", hash = "sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d", size = 14819, upload-time = "2025-09-27T18:37:26.285Z" }, + { url = "https://files.pythonhosted.org/packages/1a/8a/0402ba61a2f16038b48b39bccca271134be00c5c9f0f623208399333c448/markupsafe-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9", size = 15426, upload-time = "2025-09-27T18:37:27.316Z" }, + { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" }, +] + +[[package]] +name = "mdurl" +version = "0.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, +] + +[[package]] +name = "mpmath" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/47/dd32fa426cc72114383ac549964eecb20ecfd886d1e5ccf5340b55b02f57/mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f", size = 508106, upload-time = "2023-03-07T16:47:11.061Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" }, +] + +[[package]] +name = "networkx" +version = "3.6.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6a/51/63fe664f3908c97be9d2e4f1158eb633317598cfa6e1fc14af5383f17512/networkx-3.6.1.tar.gz", hash = "sha256:26b7c357accc0c8cde558ad486283728b65b6a95d85ee1cd66bafab4c8168509", size = 2517025, upload-time = "2025-12-08T17:02:39.908Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/c9/b2622292ea83fbb4ec318f5b9ab867d0a28ab43c5717bb85b0a5f6b3b0a4/networkx-3.6.1-py3-none-any.whl", hash = "sha256:d47fbf302e7d9cbbb9e2555a0d267983d2aa476bac30e90dfbe5669bd57f3762", size = 2068504, upload-time = "2025-12-08T17:02:38.159Z" }, +] + +[[package]] +name = "numpy" +version = "2.4.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/9f/b8cef5bffa569759033adda9481211426f12f53299629b410340795c2514/numpy-2.4.4.tar.gz", hash = "sha256:2d390634c5182175533585cc89f3608a4682ccb173cc9bb940b2881c8d6f8fa0", size = 20731587, upload-time = "2026-03-29T13:22:01.298Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ef/c6/4218570d8c8ecc9704b5157a3348e486e84ef4be0ed3e38218ab473c83d2/numpy-2.4.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f983334aea213c99992053ede6168500e5f086ce74fbc4acc3f2b00f5762e9db", size = 16976799, upload-time = "2026-03-29T13:18:15.438Z" }, + { url = "https://files.pythonhosted.org/packages/dd/92/b4d922c4a5f5dab9ed44e6153908a5c665b71acf183a83b93b690996e39b/numpy-2.4.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:72944b19f2324114e9dc86a159787333b77874143efcf89a5167ef83cfee8af0", size = 14971552, upload-time = "2026-03-29T13:18:18.606Z" }, + { url = "https://files.pythonhosted.org/packages/8a/dc/df98c095978fa6ee7b9a9387d1d58cbb3d232d0e69ad169a4ce784bde4fd/numpy-2.4.4-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:86b6f55f5a352b48d7fbfd2dbc3d5b780b2d79f4d3c121f33eb6efb22e9a2015", size = 5476566, upload-time = "2026-03-29T13:18:21.532Z" }, + { url = "https://files.pythonhosted.org/packages/28/34/b3fdcec6e725409223dd27356bdf5a3c2cc2282e428218ecc9cb7acc9763/numpy-2.4.4-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:ba1f4fc670ed79f876f70082eff4f9583c15fb9a4b89d6188412de4d18ae2f40", size = 6806482, upload-time = "2026-03-29T13:18:23.634Z" }, + { url = "https://files.pythonhosted.org/packages/68/62/63417c13aa35d57bee1337c67446761dc25ea6543130cf868eace6e8157b/numpy-2.4.4-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8a87ec22c87be071b6bdbd27920b129b94f2fc964358ce38f3822635a3e2e03d", size = 15973376, upload-time = "2026-03-29T13:18:26.677Z" }, + { url = "https://files.pythonhosted.org/packages/cf/c5/9fcb7e0e69cef59cf10c746b84f7d58b08bc66a6b7d459783c5a4f6101a6/numpy-2.4.4-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:df3775294accfdd75f32c74ae39fcba920c9a378a2fc18a12b6820aa8c1fb502", size = 16925137, upload-time = "2026-03-29T13:18:30.14Z" }, + { url = "https://files.pythonhosted.org/packages/7e/43/80020edacb3f84b9efdd1591120a4296462c23fd8db0dde1666f6ef66f13/numpy-2.4.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0d4e437e295f18ec29bc79daf55e8a47a9113df44d66f702f02a293d93a2d6dd", size = 17329414, upload-time = "2026-03-29T13:18:33.733Z" }, + { url = "https://files.pythonhosted.org/packages/fd/06/af0658593b18a5f73532d377188b964f239eb0894e664a6c12f484472f97/numpy-2.4.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6aa3236c78803afbcb255045fbef97a9e25a1f6c9888357d205ddc42f4d6eba5", size = 18658397, upload-time = "2026-03-29T13:18:37.511Z" }, + { url = "https://files.pythonhosted.org/packages/e6/ce/13a09ed65f5d0ce5c7dd0669250374c6e379910f97af2c08c57b0608eee4/numpy-2.4.4-cp311-cp311-win32.whl", hash = "sha256:30caa73029a225b2d40d9fae193e008e24b2026b7ee1a867b7ee8d96ca1a448e", size = 6239499, upload-time = "2026-03-29T13:18:40.372Z" }, + { url = "https://files.pythonhosted.org/packages/bd/63/05d193dbb4b5eec1eca73822d80da98b511f8328ad4ae3ca4caf0f4db91d/numpy-2.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:6bbe4eb67390b0a0265a2c25458f6b90a409d5d069f1041e6aff1e27e3d9a79e", size = 12614257, upload-time = "2026-03-29T13:18:42.95Z" }, + { url = "https://files.pythonhosted.org/packages/87/c5/8168052f080c26fa984c413305012be54741c9d0d74abd7fbeeccae3889f/numpy-2.4.4-cp311-cp311-win_arm64.whl", hash = "sha256:fcfe2045fd2e8f3cb0ce9d4ba6dba6333b8fa05bb8a4939c908cd43322d14c7e", size = 10486775, upload-time = "2026-03-29T13:18:45.835Z" }, + { url = "https://files.pythonhosted.org/packages/28/05/32396bec30fb2263770ee910142f49c1476d08e8ad41abf8403806b520ce/numpy-2.4.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:15716cfef24d3a9762e3acdf87e27f58dc823d1348f765bbea6bef8c639bfa1b", size = 16689272, upload-time = "2026-03-29T13:18:49.223Z" }, + { url = "https://files.pythonhosted.org/packages/c5/f3/a983d28637bfcd763a9c7aafdb6d5c0ebf3d487d1e1459ffdb57e2f01117/numpy-2.4.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:23cbfd4c17357c81021f21540da84ee282b9c8fba38a03b7b9d09ba6b951421e", size = 14699573, upload-time = "2026-03-29T13:18:52.629Z" }, + { url = "https://files.pythonhosted.org/packages/9b/fd/e5ecca1e78c05106d98028114f5c00d3eddb41207686b2b7de3e477b0e22/numpy-2.4.4-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:8b3b60bb7cba2c8c81837661c488637eee696f59a877788a396d33150c35d842", size = 5204782, upload-time = "2026-03-29T13:18:55.579Z" }, + { url = "https://files.pythonhosted.org/packages/de/2f/702a4594413c1a8632092beae8aba00f1d67947389369b3777aed783fdca/numpy-2.4.4-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:e4a010c27ff6f210ff4c6ef34394cd61470d01014439b192ec22552ee867f2a8", size = 6552038, upload-time = "2026-03-29T13:18:57.769Z" }, + { url = "https://files.pythonhosted.org/packages/7f/37/eed308a8f56cba4d1fdf467a4fc67ef4ff4bf1c888f5fc980481890104b1/numpy-2.4.4-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f9e75681b59ddaa5e659898085ae0eaea229d054f2ac0c7e563a62205a700121", size = 15670666, upload-time = "2026-03-29T13:19:00.341Z" }, + { url = "https://files.pythonhosted.org/packages/0a/0d/0e3ecece05b7a7e87ab9fb587855548da437a061326fff64a223b6dcb78a/numpy-2.4.4-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:81f4a14bee47aec54f883e0cad2d73986640c1590eb9bfaaba7ad17394481e6e", size = 16645480, upload-time = "2026-03-29T13:19:03.63Z" }, + { url = "https://files.pythonhosted.org/packages/34/49/f2312c154b82a286758ee2f1743336d50651f8b5195db18cdb63675ff649/numpy-2.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:62d6b0f03b694173f9fcb1fb317f7222fd0b0b103e784c6549f5e53a27718c44", size = 17020036, upload-time = "2026-03-29T13:19:07.428Z" }, + { url = "https://files.pythonhosted.org/packages/7b/e9/736d17bd77f1b0ec4f9901aaec129c00d59f5d84d5e79bba540ef12c2330/numpy-2.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fbc356aae7adf9e6336d336b9c8111d390a05df88f1805573ebb0807bd06fd1d", size = 18368643, upload-time = "2026-03-29T13:19:10.775Z" }, + { url = "https://files.pythonhosted.org/packages/63/f6/d417977c5f519b17c8a5c3bc9e8304b0908b0e21136fe43bf628a1343914/numpy-2.4.4-cp312-cp312-win32.whl", hash = "sha256:0d35aea54ad1d420c812bfa0385c71cd7cc5bcf7c65fed95fc2cd02fe8c79827", size = 5961117, upload-time = "2026-03-29T13:19:13.464Z" }, + { url = "https://files.pythonhosted.org/packages/2d/5b/e1deebf88ff431b01b7406ca3583ab2bbb90972bbe1c568732e49c844f7e/numpy-2.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:b5f0362dc928a6ecd9db58868fca5e48485205e3855957bdedea308f8672ea4a", size = 12320584, upload-time = "2026-03-29T13:19:16.155Z" }, + { url = "https://files.pythonhosted.org/packages/58/89/e4e856ac82a68c3ed64486a544977d0e7bdd18b8da75b78a577ca31c4395/numpy-2.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:846300f379b5b12cc769334464656bc882e0735d27d9726568bc932fdc49d5ec", size = 10221450, upload-time = "2026-03-29T13:19:18.994Z" }, + { url = "https://files.pythonhosted.org/packages/14/1d/d0a583ce4fefcc3308806a749a536c201ed6b5ad6e1322e227ee4848979d/numpy-2.4.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:08f2e31ed5e6f04b118e49821397f12767934cfdd12a1ce86a058f91e004ee50", size = 16684933, upload-time = "2026-03-29T13:19:22.47Z" }, + { url = "https://files.pythonhosted.org/packages/c1/62/2b7a48fbb745d344742c0277f01286dead15f3f68e4f359fbfcf7b48f70f/numpy-2.4.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e823b8b6edc81e747526f70f71a9c0a07ac4e7ad13020aa736bb7c9d67196115", size = 14694532, upload-time = "2026-03-29T13:19:25.581Z" }, + { url = "https://files.pythonhosted.org/packages/e5/87/499737bfba066b4a3bebff24a8f1c5b2dee410b209bc6668c9be692580f0/numpy-2.4.4-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:4a19d9dba1a76618dd86b164d608566f393f8ec6ac7c44f0cc879011c45e65af", size = 5199661, upload-time = "2026-03-29T13:19:28.31Z" }, + { url = "https://files.pythonhosted.org/packages/cd/da/464d551604320d1491bc345efed99b4b7034143a85787aab78d5691d5a0e/numpy-2.4.4-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:d2a8490669bfe99a233298348acc2d824d496dee0e66e31b66a6022c2ad74a5c", size = 6547539, upload-time = "2026-03-29T13:19:30.97Z" }, + { url = "https://files.pythonhosted.org/packages/7d/90/8d23e3b0dafd024bf31bdec225b3bb5c2dbfa6912f8a53b8659f21216cbf/numpy-2.4.4-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:45dbed2ab436a9e826e302fcdcbe9133f9b0006e5af7168afb8963a6520da103", size = 15668806, upload-time = "2026-03-29T13:19:33.887Z" }, + { url = "https://files.pythonhosted.org/packages/d1/73/a9d864e42a01896bb5974475438f16086be9ba1f0d19d0bb7a07427c4a8b/numpy-2.4.4-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c901b15172510173f5cb310eae652908340f8dede90fff9e3bf6c0d8dfd92f83", size = 16632682, upload-time = "2026-03-29T13:19:37.336Z" }, + { url = "https://files.pythonhosted.org/packages/34/fb/14570d65c3bde4e202a031210475ae9cde9b7686a2e7dc97ee67d2833b35/numpy-2.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:99d838547ace2c4aace6c4f76e879ddfe02bb58a80c1549928477862b7a6d6ed", size = 17019810, upload-time = "2026-03-29T13:19:40.963Z" }, + { url = "https://files.pythonhosted.org/packages/8a/77/2ba9d87081fd41f6d640c83f26fb7351e536b7ce6dd9061b6af5904e8e46/numpy-2.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0aec54fd785890ecca25a6003fd9a5aed47ad607bbac5cd64f836ad8666f4959", size = 18357394, upload-time = "2026-03-29T13:19:44.859Z" }, + { url = "https://files.pythonhosted.org/packages/a2/23/52666c9a41708b0853fa3b1a12c90da38c507a3074883823126d4e9d5b30/numpy-2.4.4-cp313-cp313-win32.whl", hash = "sha256:07077278157d02f65c43b1b26a3886bce886f95d20aabd11f87932750dfb14ed", size = 5959556, upload-time = "2026-03-29T13:19:47.661Z" }, + { url = "https://files.pythonhosted.org/packages/57/fb/48649b4971cde70d817cf97a2a2fdc0b4d8308569f1dd2f2611959d2e0cf/numpy-2.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:5c70f1cc1c4efbe316a572e2d8b9b9cc44e89b95f79ca3331553fbb63716e2bf", size = 12317311, upload-time = "2026-03-29T13:19:50.67Z" }, + { url = "https://files.pythonhosted.org/packages/ba/d8/11490cddd564eb4de97b4579ef6bfe6a736cc07e94c1598590ae25415e01/numpy-2.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:ef4059d6e5152fa1a39f888e344c73fdc926e1b2dd58c771d67b0acfbf2aa67d", size = 10222060, upload-time = "2026-03-29T13:19:54.229Z" }, + { url = "https://files.pythonhosted.org/packages/99/5d/dab4339177a905aad3e2221c915b35202f1ec30d750dd2e5e9d9a72b804b/numpy-2.4.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4bbc7f303d125971f60ec0aaad5e12c62d0d2c925f0ab1273debd0e4ba37aba5", size = 14822302, upload-time = "2026-03-29T13:19:57.585Z" }, + { url = "https://files.pythonhosted.org/packages/eb/e4/0564a65e7d3d97562ed6f9b0fd0fb0a6f559ee444092f105938b50043876/numpy-2.4.4-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:4d6d57903571f86180eb98f8f0c839fa9ebbfb031356d87f1361be91e433f5b7", size = 5327407, upload-time = "2026-03-29T13:20:00.601Z" }, + { url = "https://files.pythonhosted.org/packages/29/8d/35a3a6ce5ad371afa58b4700f1c820f8f279948cca32524e0a695b0ded83/numpy-2.4.4-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:4636de7fd195197b7535f231b5de9e4b36d2c440b6e566d2e4e4746e6af0ca93", size = 6647631, upload-time = "2026-03-29T13:20:02.855Z" }, + { url = "https://files.pythonhosted.org/packages/f4/da/477731acbd5a58a946c736edfdabb2ac5b34c3d08d1ba1a7b437fa0884df/numpy-2.4.4-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ad2e2ef14e0b04e544ea2fa0a36463f847f113d314aa02e5b402fdf910ef309e", size = 15727691, upload-time = "2026-03-29T13:20:06.004Z" }, + { url = "https://files.pythonhosted.org/packages/e6/db/338535d9b152beabeb511579598418ba0212ce77cf9718edd70262cc4370/numpy-2.4.4-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5a285b3b96f951841799528cd1f4f01cd70e7e0204b4abebac9463eecfcf2a40", size = 16681241, upload-time = "2026-03-29T13:20:09.417Z" }, + { url = "https://files.pythonhosted.org/packages/e2/a9/ad248e8f58beb7a0219b413c9c7d8151c5d285f7f946c3e26695bdbbe2df/numpy-2.4.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:f8474c4241bc18b750be2abea9d7a9ec84f46ef861dbacf86a4f6e043401f79e", size = 17085767, upload-time = "2026-03-29T13:20:13.126Z" }, + { url = "https://files.pythonhosted.org/packages/b5/1a/3b88ccd3694681356f70da841630e4725a7264d6a885c8d442a697e1146b/numpy-2.4.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4e874c976154687c1f71715b034739b45c7711bec81db01914770373d125e392", size = 18403169, upload-time = "2026-03-29T13:20:17.096Z" }, + { url = "https://files.pythonhosted.org/packages/c2/c9/fcfd5d0639222c6eac7f304829b04892ef51c96a75d479214d77e3ce6e33/numpy-2.4.4-cp313-cp313t-win32.whl", hash = "sha256:9c585a1790d5436a5374bac930dad6ed244c046ed91b2b2a3634eb2971d21008", size = 6083477, upload-time = "2026-03-29T13:20:20.195Z" }, + { url = "https://files.pythonhosted.org/packages/d5/e3/3938a61d1c538aaec8ed6fd6323f57b0c2d2d2219512434c5c878db76553/numpy-2.4.4-cp313-cp313t-win_amd64.whl", hash = "sha256:93e15038125dc1e5345d9b5b68aa7f996ec33b98118d18c6ca0d0b7d6198b7e8", size = 12457487, upload-time = "2026-03-29T13:20:22.946Z" }, + { url = "https://files.pythonhosted.org/packages/97/6a/7e345032cc60501721ef94e0e30b60f6b0bd601f9174ebd36389a2b86d40/numpy-2.4.4-cp313-cp313t-win_arm64.whl", hash = "sha256:0dfd3f9d3adbe2920b68b5cd3d51444e13a10792ec7154cd0a2f6e74d4ab3233", size = 10292002, upload-time = "2026-03-29T13:20:25.909Z" }, + { url = "https://files.pythonhosted.org/packages/6e/06/c54062f85f673dd5c04cbe2f14c3acb8c8b95e3384869bb8cc9bff8cb9df/numpy-2.4.4-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:f169b9a863d34f5d11b8698ead99febeaa17a13ca044961aa8e2662a6c7766a0", size = 16684353, upload-time = "2026-03-29T13:20:29.504Z" }, + { url = "https://files.pythonhosted.org/packages/4c/39/8a320264a84404c74cc7e79715de85d6130fa07a0898f67fb5cd5bd79908/numpy-2.4.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:2483e4584a1cb3092da4470b38866634bafb223cbcd551ee047633fd2584599a", size = 14704914, upload-time = "2026-03-29T13:20:33.547Z" }, + { url = "https://files.pythonhosted.org/packages/91/fb/287076b2614e1d1044235f50f03748f31fa287e3dbe6abeb35cdfa351eca/numpy-2.4.4-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:2d19e6e2095506d1736b7d80595e0f252d76b89f5e715c35e06e937679ea7d7a", size = 5210005, upload-time = "2026-03-29T13:20:36.45Z" }, + { url = "https://files.pythonhosted.org/packages/63/eb/fcc338595309910de6ecabfcef2419a9ce24399680bfb149421fa2df1280/numpy-2.4.4-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:6a246d5914aa1c820c9443ddcee9c02bec3e203b0c080349533fae17727dfd1b", size = 6544974, upload-time = "2026-03-29T13:20:39.014Z" }, + { url = "https://files.pythonhosted.org/packages/44/5d/e7e9044032a716cdfaa3fba27a8e874bf1c5f1912a1ddd4ed071bf8a14a6/numpy-2.4.4-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:989824e9faf85f96ec9c7761cd8d29c531ad857bfa1daa930cba85baaecf1a9a", size = 15684591, upload-time = "2026-03-29T13:20:42.146Z" }, + { url = "https://files.pythonhosted.org/packages/98/7c/21252050676612625449b4807d6b695b9ce8a7c9e1c197ee6216c8a65c7c/numpy-2.4.4-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:27a8d92cd10f1382a67d7cf4db7ce18341b66438bdd9f691d7b0e48d104c2a9d", size = 16637700, upload-time = "2026-03-29T13:20:46.204Z" }, + { url = "https://files.pythonhosted.org/packages/b1/29/56d2bbef9465db24ef25393383d761a1af4f446a1df9b8cded4fe3a5a5d7/numpy-2.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e44319a2953c738205bf3354537979eaa3998ed673395b964c1176083dd46252", size = 17035781, upload-time = "2026-03-29T13:20:50.242Z" }, + { url = "https://files.pythonhosted.org/packages/e3/2b/a35a6d7589d21f44cea7d0a98de5ddcbb3d421b2622a5c96b1edf18707c3/numpy-2.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e892aff75639bbef0d2a2cfd55535510df26ff92f63c92cd84ef8d4ba5a5557f", size = 18362959, upload-time = "2026-03-29T13:20:54.019Z" }, + { url = "https://files.pythonhosted.org/packages/64/c9/d52ec581f2390e0f5f85cbfd80fb83d965fc15e9f0e1aec2195faa142cde/numpy-2.4.4-cp314-cp314-win32.whl", hash = "sha256:1378871da56ca8943c2ba674530924bb8ca40cd228358a3b5f302ad60cf875fc", size = 6008768, upload-time = "2026-03-29T13:20:56.912Z" }, + { url = "https://files.pythonhosted.org/packages/fa/22/4cc31a62a6c7b74a8730e31a4274c5dc80e005751e277a2ce38e675e4923/numpy-2.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:715d1c092715954784bc79e1174fc2a90093dc4dc84ea15eb14dad8abdcdeb74", size = 12449181, upload-time = "2026-03-29T13:20:59.548Z" }, + { url = "https://files.pythonhosted.org/packages/70/2e/14cda6f4d8e396c612d1bf97f22958e92148801d7e4f110cabebdc0eef4b/numpy-2.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:2c194dd721e54ecad9ad387c1d35e63dce5c4450c6dc7dd5611283dda239aabb", size = 10496035, upload-time = "2026-03-29T13:21:02.524Z" }, + { url = "https://files.pythonhosted.org/packages/b1/e8/8fed8c8d848d7ecea092dc3469643f9d10bc3a134a815a3b033da1d2039b/numpy-2.4.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2aa0613a5177c264ff5921051a5719d20095ea586ca88cc802c5c218d1c67d3e", size = 14824958, upload-time = "2026-03-29T13:21:05.671Z" }, + { url = "https://files.pythonhosted.org/packages/05/1a/d8007a5138c179c2bf33ef44503e83d70434d2642877ee8fbb230e7c0548/numpy-2.4.4-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:42c16925aa5a02362f986765f9ebabf20de75cdefdca827d14315c568dcab113", size = 5330020, upload-time = "2026-03-29T13:21:08.635Z" }, + { url = "https://files.pythonhosted.org/packages/99/64/ffb99ac6ae93faf117bcbd5c7ba48a7f45364a33e8e458545d3633615dda/numpy-2.4.4-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:874f200b2a981c647340f841730fc3a2b54c9d940566a3c4149099591e2c4c3d", size = 6650758, upload-time = "2026-03-29T13:21:10.949Z" }, + { url = "https://files.pythonhosted.org/packages/6e/6e/795cc078b78a384052e73b2f6281ff7a700e9bf53bcce2ee579d4f6dd879/numpy-2.4.4-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c9b39d38a9bd2ae1becd7eac1303d031c5c110ad31f2b319c6e7d98b135c934d", size = 15729948, upload-time = "2026-03-29T13:21:14.047Z" }, + { url = "https://files.pythonhosted.org/packages/5f/86/2acbda8cc2af5f3d7bfc791192863b9e3e19674da7b5e533fded124d1299/numpy-2.4.4-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b268594bccac7d7cf5844c7732e3f20c50921d94e36d7ec9b79e9857694b1b2f", size = 16679325, upload-time = "2026-03-29T13:21:17.561Z" }, + { url = "https://files.pythonhosted.org/packages/bc/59/cafd83018f4aa55e0ac6fa92aa066c0a1877b77a615ceff1711c260ffae8/numpy-2.4.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ac6b31e35612a26483e20750126d30d0941f949426974cace8e6b5c58a3657b0", size = 17084883, upload-time = "2026-03-29T13:21:21.106Z" }, + { url = "https://files.pythonhosted.org/packages/f0/85/a42548db84e65ece46ab2caea3d3f78b416a47af387fcbb47ec28e660dc2/numpy-2.4.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8e3ed142f2728df44263aaf5fb1f5b0b99f4070c553a0d7f033be65338329150", size = 18403474, upload-time = "2026-03-29T13:21:24.828Z" }, + { url = "https://files.pythonhosted.org/packages/ed/ad/483d9e262f4b831000062e5d8a45e342166ec8aaa1195264982bca267e62/numpy-2.4.4-cp314-cp314t-win32.whl", hash = "sha256:dddbbd259598d7240b18c9d87c56a9d2fb3b02fe266f49a7c101532e78c1d871", size = 6155500, upload-time = "2026-03-29T13:21:28.205Z" }, + { url = "https://files.pythonhosted.org/packages/c7/03/2fc4e14c7bd4ff2964b74ba90ecb8552540b6315f201df70f137faa5c589/numpy-2.4.4-cp314-cp314t-win_amd64.whl", hash = "sha256:a7164afb23be6e37ad90b2f10426149fd75aee07ca55653d2aa41e66c4ef697e", size = 12637755, upload-time = "2026-03-29T13:21:31.107Z" }, + { url = "https://files.pythonhosted.org/packages/58/78/548fb8e07b1a341746bfbecb32f2c268470f45fa028aacdbd10d9bc73aab/numpy-2.4.4-cp314-cp314t-win_arm64.whl", hash = "sha256:ba203255017337d39f89bdd58417f03c4426f12beed0440cfd933cb15f8669c7", size = 10566643, upload-time = "2026-03-29T13:21:34.339Z" }, + { url = "https://files.pythonhosted.org/packages/6b/33/8fae8f964a4f63ed528264ddf25d2b683d0b663e3cba26961eb838a7c1bd/numpy-2.4.4-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:58c8b5929fcb8287cbd6f0a3fae19c6e03a5c48402ae792962ac465224a629a4", size = 16854491, upload-time = "2026-03-29T13:21:38.03Z" }, + { url = "https://files.pythonhosted.org/packages/bc/d0/1aabee441380b981cf8cdda3ae7a46aa827d1b5a8cce84d14598bc94d6d9/numpy-2.4.4-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:eea7ac5d2dce4189771cedb559c738a71512768210dc4e4753b107a2048b3d0e", size = 14895830, upload-time = "2026-03-29T13:21:41.509Z" }, + { url = "https://files.pythonhosted.org/packages/a5/b8/aafb0d1065416894fccf4df6b49ef22b8db045187949545bced89c034b8e/numpy-2.4.4-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:51fc224f7ca4d92656d5a5eb315f12eb5fe2c97a66249aa7b5f562528a3be38c", size = 5400927, upload-time = "2026-03-29T13:21:44.747Z" }, + { url = "https://files.pythonhosted.org/packages/d6/77/063baa20b08b431038c7f9ff5435540c7b7265c78cf56012a483019ca72d/numpy-2.4.4-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:28a650663f7314afc3e6ec620f44f333c386aad9f6fc472030865dc0ebb26ee3", size = 6715557, upload-time = "2026-03-29T13:21:47.406Z" }, + { url = "https://files.pythonhosted.org/packages/c7/a8/379542d45a14f149444c5c4c4e7714707239ce9cc1de8c2803958889da14/numpy-2.4.4-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:19710a9ca9992d7174e9c52f643d4272dcd1558c5f7af7f6f8190f633bd651a7", size = 15804253, upload-time = "2026-03-29T13:21:50.753Z" }, + { url = "https://files.pythonhosted.org/packages/a2/c8/f0a45426d6d21e7ea3310a15cf90c43a14d9232c31a837702dba437f3373/numpy-2.4.4-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9b2aec6af35c113b05695ebb5749a787acd63cafc83086a05771d1e1cd1e555f", size = 16753552, upload-time = "2026-03-29T13:21:54.344Z" }, + { url = "https://files.pythonhosted.org/packages/04/74/f4c001f4714c3ad9ce037e18cf2b9c64871a84951eaa0baf683a9ca9301c/numpy-2.4.4-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:f2cf083b324a467e1ab358c105f6cad5ea950f50524668a80c486ff1db24e119", size = 12509075, upload-time = "2026-03-29T13:21:57.644Z" }, +] + +[[package]] +name = "nvidia-cublas" +version = "13.1.0.3" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e1/a5/fce49e2ae977e0ccc084e5adafceb4f0ac0c8333cb6863501618a7277f67/nvidia_cublas-13.1.0.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c86fc7f7ae36d7528288c5d88098edcb7b02c633d262e7ddbb86b0ad91be5df2", size = 542851226, upload-time = "2025-10-09T08:59:04.818Z" }, + { url = "https://files.pythonhosted.org/packages/e7/44/423ac00af4dd95a5aeb27207e2c0d9b7118702149bf4704c3ddb55bb7429/nvidia_cublas-13.1.0.3-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:ee8722c1f0145ab246bccb9e452153b5e0515fd094c3678df50b2a0888b8b171", size = 423133236, upload-time = "2025-10-09T08:59:32.536Z" }, +] + +[[package]] +name = "nvidia-cuda-cupti" +version = "13.0.85" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/2a/80353b103fc20ce05ef51e928daed4b6015db4aaa9162ed0997090fe2250/nvidia_cuda_cupti-13.0.85-py3-none-manylinux_2_25_aarch64.whl", hash = "sha256:796bd679890ee55fb14a94629b698b6db54bcfd833d391d5e94017dd9d7d3151", size = 10310827, upload-time = "2025-09-04T08:26:42.012Z" }, + { url = "https://files.pythonhosted.org/packages/33/6d/737d164b4837a9bbd202f5ae3078975f0525a55730fe871d8ed4e3b952b0/nvidia_cuda_cupti-13.0.85-py3-none-manylinux_2_25_x86_64.whl", hash = "sha256:4eb01c08e859bf924d222250d2e8f8b8ff6d3db4721288cf35d14252a4d933c8", size = 10715597, upload-time = "2025-09-04T08:26:51.312Z" }, +] + +[[package]] +name = "nvidia-cuda-nvrtc" +version = "13.0.88" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c3/68/483a78f5e8f31b08fb1bb671559968c0ca3a065ac7acabfc7cee55214fd6/nvidia_cuda_nvrtc-13.0.88-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:ad9b6d2ead2435f11cbb6868809d2adeeee302e9bb94bcf0539c7a40d80e8575", size = 90215200, upload-time = "2025-09-04T08:28:44.204Z" }, + { url = "https://files.pythonhosted.org/packages/b7/dc/6bb80850e0b7edd6588d560758f17e0550893a1feaf436807d64d2da040f/nvidia_cuda_nvrtc-13.0.88-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d27f20a0ca67a4bb34268a5e951033496c5b74870b868bacd046b1b8e0c3267b", size = 43015449, upload-time = "2025-09-04T08:28:20.239Z" }, +] + +[[package]] +name = "nvidia-cuda-runtime" +version = "13.0.96" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/87/4f/17d7b9b8e285199c58ce28e31b5c5bbaa4d8271af06a89b6405258245de2/nvidia_cuda_runtime-13.0.96-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ef9bcbe90493a2b9d810e43d249adb3d02e98dd30200d86607d8d02687c43f55", size = 2261060, upload-time = "2025-10-09T08:55:15.78Z" }, + { url = "https://files.pythonhosted.org/packages/2e/24/d1558f3b68b1d26e706813b1d10aa1d785e4698c425af8db8edc3dced472/nvidia_cuda_runtime-13.0.96-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7f82250d7782aa23b6cfe765ecc7db554bd3c2870c43f3d1821f1d18aebf0548", size = 2243632, upload-time = "2025-10-09T08:55:36.117Z" }, +] + +[[package]] +name = "nvidia-cudnn-cu13" +version = "9.19.0.56" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "nvidia-cublas" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/f1/84/26025437c1e6b61a707442184fa0c03d083b661adf3a3eecfd6d21677740/nvidia_cudnn_cu13-9.19.0.56-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:6ed29ffaee1176c612daf442e4dd6cfeb6a0caa43ddcbeb59da94953030b1be4", size = 433781201, upload-time = "2026-02-03T20:40:53.805Z" }, + { url = "https://files.pythonhosted.org/packages/a3/22/0b4b932655d17a6da1b92fa92ab12844b053bb2ac2475e179ba6f043da1e/nvidia_cudnn_cu13-9.19.0.56-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:d20e1734305e9d68889a96e3f35094d733ff1f83932ebe462753973e53a572bf", size = 366066321, upload-time = "2026-02-03T20:44:52.837Z" }, +] + +[[package]] +name = "nvidia-cufft" +version = "12.0.0.61" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "nvidia-nvjitlink" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/8b/ae/f417a75c0259e85c1d2f83ca4e960289a5f814ed0cea74d18c353d3e989d/nvidia_cufft-12.0.0.61-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2708c852ef8cd89d1d2068bdbece0aa188813a0c934db3779b9b1faa8442e5f5", size = 214053554, upload-time = "2025-09-04T08:31:38.196Z" }, + { url = "https://files.pythonhosted.org/packages/a8/2f/7b57e29836ea8714f81e9898409196f47d772d5ddedddf1592eadb8ab743/nvidia_cufft-12.0.0.61-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6c44f692dce8fd5ffd3e3df134b6cdb9c2f72d99cf40b62c32dde45eea9ddad3", size = 214085489, upload-time = "2025-09-04T08:31:56.044Z" }, +] + +[[package]] +name = "nvidia-cufile" +version = "1.15.1.6" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3f/70/4f193de89a48b71714e74602ee14d04e4019ad36a5a9f20c425776e72cd6/nvidia_cufile-1.15.1.6-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:08a3ecefae5a01c7f5117351c64f17c7c62efa5fffdbe24fc7d298da19cd0b44", size = 1223672, upload-time = "2025-09-04T08:32:22.779Z" }, + { url = "https://files.pythonhosted.org/packages/ab/73/cc4a14c9813a8a0d509417cf5f4bdaba76e924d58beb9864f5a7baceefbf/nvidia_cufile-1.15.1.6-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:bdc0deedc61f548bddf7733bdc216456c2fdb101d020e1ab4b88d232d5e2f6d1", size = 1136992, upload-time = "2025-09-04T08:32:14.119Z" }, +] + +[[package]] +name = "nvidia-curand" +version = "10.4.0.35" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/72/7c2ae24fb6b63a32e6ae5d241cc65263ea18d08802aaae087d9f013335a2/nvidia_curand-10.4.0.35-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:133df5a7509c3e292aaa2b477afd0194f06ce4ea24d714d616ff36439cee349a", size = 61962106, upload-time = "2025-08-04T10:21:41.128Z" }, + { url = "https://files.pythonhosted.org/packages/a5/9f/be0a41ca4a4917abf5cb9ae0daff1a6060cc5de950aec0396de9f3b52bc5/nvidia_curand-10.4.0.35-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:1aee33a5da6e1db083fe2b90082def8915f30f3248d5896bcec36a579d941bfc", size = 59544258, upload-time = "2025-08-04T10:22:03.992Z" }, +] + +[[package]] +name = "nvidia-cusolver" +version = "12.0.4.66" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "nvidia-cublas" }, + { name = "nvidia-cusparse" }, + { name = "nvidia-nvjitlink" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/c8/c3/b30c9e935fc01e3da443ec0116ed1b2a009bb867f5324d3f2d7e533e776b/nvidia_cusolver-12.0.4.66-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:02c2457eaa9e39de20f880f4bd8820e6a1cfb9f9a34f820eb12a155aa5bc92d2", size = 223467760, upload-time = "2025-09-04T08:33:04.222Z" }, + { url = "https://files.pythonhosted.org/packages/5f/67/cba3777620cdacb99102da4042883709c41c709f4b6323c10781a9c3aa34/nvidia_cusolver-12.0.4.66-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:0a759da5dea5c0ea10fd307de75cdeb59e7ea4fcb8add0924859b944babf1112", size = 200941980, upload-time = "2025-09-04T08:33:22.767Z" }, +] + +[[package]] +name = "nvidia-cusparse" +version = "12.6.3.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "nvidia-nvjitlink" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/f8/94/5c26f33738ae35276672f12615a64bd008ed5be6d1ebcb23579285d960a9/nvidia_cusparse-12.6.3.3-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:80bcc4662f23f1054ee334a15c72b8940402975e0eab63178fc7e670aa59472c", size = 162155568, upload-time = "2025-09-04T08:33:42.864Z" }, + { url = "https://files.pythonhosted.org/packages/fa/18/623c77619c31d62efd55302939756966f3ecc8d724a14dab2b75f1508850/nvidia_cusparse-12.6.3.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2b3c89c88d01ee0e477cb7f82ef60a11a4bcd57b6b87c33f789350b59759360b", size = 145942937, upload-time = "2025-09-04T08:33:58.029Z" }, +] + +[[package]] +name = "nvidia-cusparselt-cu13" +version = "0.8.0" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/46/10/8dcd1175260706a2fc92a16a52e306b71d4c1ea0b0cc4a9484183399818a/nvidia_cusparselt_cu13-0.8.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:400c6ed1cf6780fc6efedd64ec9f1345871767e6a1a0a552a1ea0578117ea77c", size = 220791277, upload-time = "2025-08-13T19:22:40.982Z" }, + { url = "https://files.pythonhosted.org/packages/fd/53/43b0d71f4e702fa9733f8b4571fdca50a8813f1e450b656c239beff12315/nvidia_cusparselt_cu13-0.8.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:25e30a8a7323935d4ad0340b95a0b69926eee755767e8e0b1cf8dd85b197d3fd", size = 169884119, upload-time = "2025-08-13T19:23:41.967Z" }, +] + +[[package]] +name = "nvidia-nccl-cu13" +version = "2.28.9" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/39/55/1920646a2e43ffd4fc958536b276197ed740e9e0c54105b4bb3521591fc7/nvidia_nccl_cu13-2.28.9-py3-none-manylinux_2_18_aarch64.whl", hash = "sha256:01c873ba1626b54caa12272ed228dc5b2781545e0ae8ba3f432a8ef1c6d78643", size = 196561677, upload-time = "2025-11-18T05:49:03.45Z" }, + { url = "https://files.pythonhosted.org/packages/b0/b4/878fefaad5b2bcc6fcf8d474a25e3e3774bc5133e4b58adff4d0bca238bc/nvidia_nccl_cu13-2.28.9-py3-none-manylinux_2_18_x86_64.whl", hash = "sha256:e4553a30f34195f3fa1da02a6da3d6337d28f2003943aa0a3d247bbc25fefc42", size = 196493177, upload-time = "2025-11-18T05:49:17.677Z" }, +] + +[[package]] +name = "nvidia-nvjitlink" +version = "13.0.88" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/56/7a/123e033aaff487c77107195fa5a2b8686795ca537935a24efae476c41f05/nvidia_nvjitlink-13.0.88-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:13a74f429e23b921c1109976abefacc69835f2f433ebd323d3946e11d804e47b", size = 40713933, upload-time = "2025-09-04T08:35:43.553Z" }, + { url = "https://files.pythonhosted.org/packages/ab/2c/93c5250e64df4f894f1cbb397c6fd71f79813f9fd79d7cd61de3f97b3c2d/nvidia_nvjitlink-13.0.88-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e931536ccc7d467a98ba1d8b89ff7fa7f1fa3b13f2b0069118cd7f47bff07d0c", size = 38768748, upload-time = "2025-09-04T08:35:20.008Z" }, +] + +[[package]] +name = "nvidia-nvshmem-cu13" +version = "3.4.5" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/0f/05cc9c720236dcd2db9c1ab97fff629e96821be2e63103569da0c9b72f19/nvidia_nvshmem_cu13-3.4.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6dc2a197f38e5d0376ad52cd1a2a3617d3cdc150fd5966f4aee9bcebb1d68fe9", size = 60215947, upload-time = "2025-09-06T00:32:20.022Z" }, + { url = "https://files.pythonhosted.org/packages/3c/35/a9bf80a609e74e3b000fef598933235c908fcefcef9026042b8e6dfde2a9/nvidia_nvshmem_cu13-3.4.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:290f0a2ee94c9f3687a02502f3b9299a9f9fe826e6d0287ee18482e78d495b80", size = 60412546, upload-time = "2025-09-06T00:32:41.564Z" }, +] + +[[package]] +name = "nvidia-nvtx" +version = "13.0.85" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c2/f3/d86c845465a2723ad7e1e5c36dcd75ddb82898b3f53be47ebd429fb2fa5d/nvidia_nvtx-13.0.85-py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:4936d1d6780fbe68db454f5e72a42ff64d1fd6397df9f363ae786930fd5c1cd4", size = 148047, upload-time = "2025-09-04T08:29:01.761Z" }, + { url = "https://files.pythonhosted.org/packages/a8/64/3708a90d1ebe202ffdeb7185f878a3c84d15c2b2c31858da2ce0583e2def/nvidia_nvtx-13.0.85-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cb7780edb6b14107373c835bf8b72e7a178bac7367e23da7acb108f973f157a6", size = 148878, upload-time = "2025-09-04T08:28:53.627Z" }, +] + +[[package]] +name = "packaging" +version = "26.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/df/de/0d2b39fb4af88a0258f3bac87dfcbb48e73fbdea4a2ed0e2213f9a4c2f9a/packaging-26.1.tar.gz", hash = "sha256:f042152b681c4bfac5cae2742a55e103d27ab2ec0f3d88037136b6bfe7c9c5de", size = 215519, upload-time = "2026-04-14T21:12:49.362Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7a/c2/920ef838e2f0028c8262f16101ec09ebd5969864e5a64c4c05fad0617c56/packaging-26.1-py3-none-any.whl", hash = "sha256:5d9c0669c6285e491e0ced2eee587eaf67b670d94a19e94e3984a481aba6802f", size = 95831, upload-time = "2026-04-14T21:12:47.56Z" }, +] + +[[package]] +name = "pygments" +version = "2.20.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c3/b2/bc9c9196916376152d655522fdcebac55e66de6603a76a02bca1b6414f6c/pygments-2.20.0.tar.gz", hash = "sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f", size = 4955991, upload-time = "2026-03-29T13:29:33.898Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f4/7e/a72dd26f3b0f4f2bf1dd8923c85f7ceb43172af56d63c7383eb62b332364/pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176", size = 1231151, upload-time = "2026-03-29T13:29:30.038Z" }, +] + +[[package]] +name = "pyyaml" +version = "6.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6d/16/a95b6757765b7b031c9374925bb718d55e0a9ba8a1b6a12d25962ea44347/pyyaml-6.0.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:44edc647873928551a01e7a563d7452ccdebee747728c1080d881d68af7b997e", size = 185826, upload-time = "2025-09-25T21:31:58.655Z" }, + { url = "https://files.pythonhosted.org/packages/16/19/13de8e4377ed53079ee996e1ab0a9c33ec2faf808a4647b7b4c0d46dd239/pyyaml-6.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824", size = 175577, upload-time = "2025-09-25T21:32:00.088Z" }, + { url = "https://files.pythonhosted.org/packages/0c/62/d2eb46264d4b157dae1275b573017abec435397aa59cbcdab6fc978a8af4/pyyaml-6.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:10892704fc220243f5305762e276552a0395f7beb4dbf9b14ec8fd43b57f126c", size = 775556, upload-time = "2025-09-25T21:32:01.31Z" }, + { url = "https://files.pythonhosted.org/packages/10/cb/16c3f2cf3266edd25aaa00d6c4350381c8b012ed6f5276675b9eba8d9ff4/pyyaml-6.0.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:850774a7879607d3a6f50d36d04f00ee69e7fc816450e5f7e58d7f17f1ae5c00", size = 882114, upload-time = "2025-09-25T21:32:03.376Z" }, + { url = "https://files.pythonhosted.org/packages/71/60/917329f640924b18ff085ab889a11c763e0b573da888e8404ff486657602/pyyaml-6.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b8bb0864c5a28024fac8a632c443c87c5aa6f215c0b126c449ae1a150412f31d", size = 806638, upload-time = "2025-09-25T21:32:04.553Z" }, + { url = "https://files.pythonhosted.org/packages/dd/6f/529b0f316a9fd167281a6c3826b5583e6192dba792dd55e3203d3f8e655a/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37d57ad971609cf3c53ba6a7e365e40660e3be0e5175fa9f2365a379d6095a", size = 767463, upload-time = "2025-09-25T21:32:06.152Z" }, + { url = "https://files.pythonhosted.org/packages/f2/6a/b627b4e0c1dd03718543519ffb2f1deea4a1e6d42fbab8021936a4d22589/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4", size = 794986, upload-time = "2025-09-25T21:32:07.367Z" }, + { url = "https://files.pythonhosted.org/packages/45/91/47a6e1c42d9ee337c4839208f30d9f09caa9f720ec7582917b264defc875/pyyaml-6.0.3-cp311-cp311-win32.whl", hash = "sha256:8098f252adfa6c80ab48096053f512f2321f0b998f98150cea9bd23d83e1467b", size = 142543, upload-time = "2025-09-25T21:32:08.95Z" }, + { url = "https://files.pythonhosted.org/packages/da/e3/ea007450a105ae919a72393cb06f122f288ef60bba2dc64b26e2646fa315/pyyaml-6.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf", size = 158763, upload-time = "2025-09-25T21:32:09.96Z" }, + { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063, upload-time = "2025-09-25T21:32:11.445Z" }, + { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973, upload-time = "2025-09-25T21:32:12.492Z" }, + { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116, upload-time = "2025-09-25T21:32:13.652Z" }, + { url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011, upload-time = "2025-09-25T21:32:15.21Z" }, + { url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870, upload-time = "2025-09-25T21:32:16.431Z" }, + { url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089, upload-time = "2025-09-25T21:32:17.56Z" }, + { url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181, upload-time = "2025-09-25T21:32:18.834Z" }, + { url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658, upload-time = "2025-09-25T21:32:20.209Z" }, + { url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003, upload-time = "2025-09-25T21:32:21.167Z" }, + { url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344, upload-time = "2025-09-25T21:32:22.617Z" }, + { url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669, upload-time = "2025-09-25T21:32:23.673Z" }, + { url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252, upload-time = "2025-09-25T21:32:25.149Z" }, + { url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081, upload-time = "2025-09-25T21:32:26.575Z" }, + { url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159, upload-time = "2025-09-25T21:32:27.727Z" }, + { url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626, upload-time = "2025-09-25T21:32:28.878Z" }, + { url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613, upload-time = "2025-09-25T21:32:30.178Z" }, + { url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115, upload-time = "2025-09-25T21:32:31.353Z" }, + { url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427, upload-time = "2025-09-25T21:32:32.58Z" }, + { url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090, upload-time = "2025-09-25T21:32:33.659Z" }, + { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246, upload-time = "2025-09-25T21:32:34.663Z" }, + { url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814, upload-time = "2025-09-25T21:32:35.712Z" }, + { url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809, upload-time = "2025-09-25T21:32:36.789Z" }, + { url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454, upload-time = "2025-09-25T21:32:37.966Z" }, + { url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355, upload-time = "2025-09-25T21:32:39.178Z" }, + { url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175, upload-time = "2025-09-25T21:32:40.865Z" }, + { url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228, upload-time = "2025-09-25T21:32:42.084Z" }, + { url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194, upload-time = "2025-09-25T21:32:43.362Z" }, + { url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429, upload-time = "2025-09-25T21:32:57.844Z" }, + { url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912, upload-time = "2025-09-25T21:32:59.247Z" }, + { url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108, upload-time = "2025-09-25T21:32:44.377Z" }, + { url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641, upload-time = "2025-09-25T21:32:45.407Z" }, + { url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901, upload-time = "2025-09-25T21:32:48.83Z" }, + { url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132, upload-time = "2025-09-25T21:32:50.149Z" }, + { url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261, upload-time = "2025-09-25T21:32:51.808Z" }, + { url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272, upload-time = "2025-09-25T21:32:52.941Z" }, + { url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923, upload-time = "2025-09-25T21:32:54.537Z" }, + { url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062, upload-time = "2025-09-25T21:32:55.767Z" }, + { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" }, +] + +[[package]] +name = "regex" +version = "2026.4.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cb/0e/3a246dbf05666918bd3664d9d787f84a9108f6f43cc953a077e4a7dfdb7e/regex-2026.4.4.tar.gz", hash = "sha256:e08270659717f6973523ce3afbafa53515c4dc5dcad637dc215b6fd50f689423", size = 416000, upload-time = "2026-04-03T20:56:28.155Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/7a/617356cbecdb452812a5d42f720d6d5096b360d4a4c1073af700ea140ad2/regex-2026.4.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:b4c36a85b00fadb85db9d9e90144af0a980e1a3d2ef9cd0f8a5bef88054657c6", size = 489415, upload-time = "2026-04-03T20:53:11.645Z" }, + { url = "https://files.pythonhosted.org/packages/20/e6/bf057227144d02e3ba758b66649e87531d744dda5f3254f48660f18ae9d8/regex-2026.4.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:dcb5453ecf9cd58b562967badd1edbf092b0588a3af9e32ee3d05c985077ce87", size = 291205, upload-time = "2026-04-03T20:53:13.289Z" }, + { url = "https://files.pythonhosted.org/packages/eb/3b/637181b787dd1a820ba1c712cee2b4144cd84a32dc776ca067b12b2d70c8/regex-2026.4.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6aa809ed4dc3706cc38594d67e641601bd2f36d5555b2780ff074edfcb136cf8", size = 289225, upload-time = "2026-04-03T20:53:16.002Z" }, + { url = "https://files.pythonhosted.org/packages/05/21/bac05d806ed02cd4b39d9c8e5b5f9a2998c94c3a351b7792e80671fa5315/regex-2026.4.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:33424f5188a7db12958246a54f59a435b6cb62c5cf9c8d71f7cc49475a5fdada", size = 792434, upload-time = "2026-04-03T20:53:17.414Z" }, + { url = "https://files.pythonhosted.org/packages/d9/17/c65d1d8ae90b772d5758eb4014e1e011bb2db353fc4455432e6cc9100df7/regex-2026.4.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7d346fccdde28abba117cc9edc696b9518c3307fbfcb689e549d9b5979018c6d", size = 861730, upload-time = "2026-04-03T20:53:18.903Z" }, + { url = "https://files.pythonhosted.org/packages/ad/64/933321aa082a2c6ee2785f22776143ba89840189c20d3b6b1d12b6aae16b/regex-2026.4.4-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:415a994b536440f5011aa77e50a4274d15da3245e876e5c7f19da349caaedd87", size = 906495, upload-time = "2026-04-03T20:53:20.561Z" }, + { url = "https://files.pythonhosted.org/packages/01/ea/4c8d306e9c36ac22417336b1e02e7b358152c34dc379673f2d331143725f/regex-2026.4.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:21e5eb86179b4c67b5759d452ea7c48eb135cd93308e7a260aa489ed2eb423a4", size = 799810, upload-time = "2026-04-03T20:53:22.961Z" }, + { url = "https://files.pythonhosted.org/packages/29/ce/7605048f00e1379eba89d610c7d644d8f695dc9b26d3b6ecfa3132b872ff/regex-2026.4.4-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:312ec9dd1ae7d96abd8c5a36a552b2139931914407d26fba723f9e53c8186f86", size = 774242, upload-time = "2026-04-03T20:53:25.015Z" }, + { url = "https://files.pythonhosted.org/packages/e9/77/283e0d5023fde22cd9e86190d6d9beb21590a452b195ffe00274de470691/regex-2026.4.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a0d2b28aa1354c7cd7f71b7658c4326f7facac106edd7f40eda984424229fd59", size = 781257, upload-time = "2026-04-03T20:53:26.918Z" }, + { url = "https://files.pythonhosted.org/packages/8b/fb/7f3b772be101373c8626ed34c5d727dcbb8abd42a7b1219bc25fd9a3cc04/regex-2026.4.4-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:349d7310eddff40429a099c08d995c6d4a4bfaf3ff40bd3b5e5cb5a5a3c7d453", size = 854490, upload-time = "2026-04-03T20:53:29.065Z" }, + { url = "https://files.pythonhosted.org/packages/85/30/56547b80f34f4dd2986e1cdd63b1712932f63b6c4ce2f79c50a6cd79d1c2/regex-2026.4.4-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:e7ab63e9fe45a9ec3417509e18116b367e89c9ceb6219222a3396fa30b147f80", size = 763544, upload-time = "2026-04-03T20:53:30.917Z" }, + { url = "https://files.pythonhosted.org/packages/ac/2f/ce060fdfea8eff34a8997603532e44cdb7d1f35e3bc253612a8707a90538/regex-2026.4.4-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:fe896e07a5a2462308297e515c0054e9ec2dd18dfdc9427b19900b37dfe6f40b", size = 844442, upload-time = "2026-04-03T20:53:32.463Z" }, + { url = "https://files.pythonhosted.org/packages/e5/44/810cb113096a1dacbe82789fbfab2823f79d19b7f1271acecb7009ba9b88/regex-2026.4.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:eb59c65069498dbae3c0ef07bbe224e1eaa079825a437fb47a479f0af11f774f", size = 789162, upload-time = "2026-04-03T20:53:34.039Z" }, + { url = "https://files.pythonhosted.org/packages/20/96/9647dd7f2ecf6d9ce1fb04dfdb66910d094e10d8fe53e9c15096d8aa0bd2/regex-2026.4.4-cp311-cp311-win32.whl", hash = "sha256:2a5d273181b560ef8397c8825f2b9d57013de744da9e8257b8467e5da8599351", size = 266227, upload-time = "2026-04-03T20:53:35.601Z" }, + { url = "https://files.pythonhosted.org/packages/33/80/74e13262460530c3097ff343a17de9a34d040a5dc4de9cf3a8241faab51c/regex-2026.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:9542ccc1e689e752594309444081582f7be2fdb2df75acafea8a075108566735", size = 278399, upload-time = "2026-04-03T20:53:37.021Z" }, + { url = "https://files.pythonhosted.org/packages/1c/3c/39f19f47f19dcefa3403f09d13562ca1c0fd07ab54db2bc03148f3f6b46a/regex-2026.4.4-cp311-cp311-win_arm64.whl", hash = "sha256:b5f9fb784824a042be3455b53d0b112655686fdb7a91f88f095f3fee1e2a2a54", size = 270473, upload-time = "2026-04-03T20:53:38.633Z" }, + { url = "https://files.pythonhosted.org/packages/e5/28/b972a4d3df61e1d7bcf1b59fdb3cddef22f88b6be43f161bb41ebc0e4081/regex-2026.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:c07ab8794fa929e58d97a0e1796b8b76f70943fa39df225ac9964615cf1f9d52", size = 490434, upload-time = "2026-04-03T20:53:40.219Z" }, + { url = "https://files.pythonhosted.org/packages/84/20/30041446cf6dc3e0eab344fc62770e84c23b6b68a3b657821f9f80cb69b4/regex-2026.4.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2c785939dc023a1ce4ec09599c032cc9933d258a998d16ca6f2b596c010940eb", size = 292061, upload-time = "2026-04-03T20:53:41.862Z" }, + { url = "https://files.pythonhosted.org/packages/62/c8/3baa06d75c98c46d4cc4262b71fd2edb9062b5665e868bca57859dadf93a/regex-2026.4.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1b1ce5c81c9114f1ce2f9288a51a8fd3aeea33a0cc440c415bf02da323aa0a76", size = 289628, upload-time = "2026-04-03T20:53:43.701Z" }, + { url = "https://files.pythonhosted.org/packages/31/87/3accf55634caad8c0acab23f5135ef7d4a21c39f28c55c816ae012931408/regex-2026.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:760ef21c17d8e6a4fe8cf406a97cf2806a4df93416ccc82fc98d25b1c20425be", size = 796651, upload-time = "2026-04-03T20:53:45.379Z" }, + { url = "https://files.pythonhosted.org/packages/f6/0c/aaa2c83f34efedbf06f61cb1942c25f6cf1ee3b200f832c4d05f28306c2e/regex-2026.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7088fcdcb604a4417c208e2169715800d28838fefd7455fbe40416231d1d47c1", size = 865916, upload-time = "2026-04-03T20:53:47.064Z" }, + { url = "https://files.pythonhosted.org/packages/d9/f6/8c6924c865124643e8f37823eca845dc27ac509b2ee58123685e71cd0279/regex-2026.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:07edca1ba687998968f7db5bc355288d0c6505caa7374f013d27356d93976d13", size = 912287, upload-time = "2026-04-03T20:53:49.422Z" }, + { url = "https://files.pythonhosted.org/packages/11/0e/a9f6f81013e0deaf559b25711623864970fe6a098314e374ccb1540a4152/regex-2026.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:993f657a7c1c6ec51b5e0ba97c9817d06b84ea5fa8d82e43b9405de0defdc2b9", size = 801126, upload-time = "2026-04-03T20:53:51.096Z" }, + { url = "https://files.pythonhosted.org/packages/71/61/3a0cc8af2dc0c8deb48e644dd2521f173f7e6513c6e195aad9aa8dd77ac5/regex-2026.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:2b69102a743e7569ebee67e634a69c4cb7e59d6fa2e1aa7d3bdbf3f61435f62d", size = 776788, upload-time = "2026-04-03T20:53:52.889Z" }, + { url = "https://files.pythonhosted.org/packages/64/0b/8bb9cbf21ef7dee58e49b0fdb066a7aded146c823202e16494a36777594f/regex-2026.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dac006c8b6dda72d86ea3d1333d45147de79a3a3f26f10c1cf9287ca4ca0ac3", size = 785184, upload-time = "2026-04-03T20:53:55.627Z" }, + { url = "https://files.pythonhosted.org/packages/99/c2/d3e80e8137b25ee06c92627de4e4d98b94830e02b3e6f81f3d2e3f504cf5/regex-2026.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:50a766ee2010d504554bfb5f578ed2e066898aa26411d57e6296230627cdefa0", size = 859913, upload-time = "2026-04-03T20:53:57.249Z" }, + { url = "https://files.pythonhosted.org/packages/bc/e6/9d5d876157d969c804622456ef250017ac7a8f83e0e14f903b9e6df5ce95/regex-2026.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:9e2f5217648f68e3028c823df58663587c1507a5ba8419f4fdfc8a461be76043", size = 765732, upload-time = "2026-04-03T20:53:59.428Z" }, + { url = "https://files.pythonhosted.org/packages/82/80/b568935b4421388561c8ed42aff77247285d3ae3bb2a6ca22af63bae805e/regex-2026.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:39d8de85a08e32632974151ba59c6e9140646dcc36c80423962b1c5c0a92e244", size = 852152, upload-time = "2026-04-03T20:54:01.505Z" }, + { url = "https://files.pythonhosted.org/packages/39/29/f0f81217e21cd998245da047405366385d5c6072048038a3d33b37a79dc0/regex-2026.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:55d9304e0e7178dfb1e106c33edf834097ddf4a890e2f676f6c5118f84390f73", size = 789076, upload-time = "2026-04-03T20:54:03.323Z" }, + { url = "https://files.pythonhosted.org/packages/49/1d/1d957a61976ab9d4e767dd4f9d04b66cc0c41c5e36cf40e2d43688b5ae6f/regex-2026.4.4-cp312-cp312-win32.whl", hash = "sha256:04bb679bc0bde8a7bfb71e991493d47314e7b98380b083df2447cda4b6edb60f", size = 266700, upload-time = "2026-04-03T20:54:05.639Z" }, + { url = "https://files.pythonhosted.org/packages/c5/5c/bf575d396aeb58ea13b06ef2adf624f65b70fafef6950a80fc3da9cae3bc/regex-2026.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:db0ac18435a40a2543dbb3d21e161a6c78e33e8159bd2e009343d224bb03bb1b", size = 277768, upload-time = "2026-04-03T20:54:07.312Z" }, + { url = "https://files.pythonhosted.org/packages/c9/27/049df16ec6a6828ccd72add3c7f54b4df029669bea8e9817df6fff58be90/regex-2026.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:4ce255cc05c1947a12989c6db801c96461947adb7a59990f1360b5983fab4983", size = 270568, upload-time = "2026-04-03T20:54:09.484Z" }, + { url = "https://files.pythonhosted.org/packages/9d/83/c4373bc5f31f2cf4b66f9b7c31005bd87fe66f0dce17701f7db4ee79ee29/regex-2026.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:62f5519042c101762509b1d717b45a69c0139d60414b3c604b81328c01bd1943", size = 490273, upload-time = "2026-04-03T20:54:11.202Z" }, + { url = "https://files.pythonhosted.org/packages/46/f8/fe62afbcc3cf4ad4ac9adeaafd98aa747869ae12d3e8e2ac293d0593c435/regex-2026.4.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3790ba9fb5dd76715a7afe34dbe603ba03f8820764b1dc929dd08106214ed031", size = 291954, upload-time = "2026-04-03T20:54:13.412Z" }, + { url = "https://files.pythonhosted.org/packages/5a/92/4712b9fe6a33d232eeb1c189484b80c6c4b8422b90e766e1195d6e758207/regex-2026.4.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8fae3c6e795d7678963f2170152b0d892cf6aee9ee8afc8c45e6be38d5107fe7", size = 289487, upload-time = "2026-04-03T20:54:15.824Z" }, + { url = "https://files.pythonhosted.org/packages/88/2c/f83b93f85e01168f1070f045a42d4c937b69fdb8dd7ae82d307253f7e36e/regex-2026.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:298c3ec2d53225b3bf91142eb9691025bab610e0c0c51592dde149db679b3d17", size = 796646, upload-time = "2026-04-03T20:54:18.229Z" }, + { url = "https://files.pythonhosted.org/packages/df/55/61a2e17bf0c4dc57e11caf8dd11771280d8aaa361785f9e3bc40d653f4a7/regex-2026.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e9638791082eaf5b3ac112c587518ee78e083a11c4b28012d8fe2a0f536dfb17", size = 865904, upload-time = "2026-04-03T20:54:20.019Z" }, + { url = "https://files.pythonhosted.org/packages/45/32/1ac8ed1b5a346b5993a3d256abe0a0f03b0b73c8cc88d928537368ac65b6/regex-2026.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ae3e764bd4c5ff55035dc82a8d49acceb42a5298edf6eb2fc4d328ee5dd7afae", size = 912304, upload-time = "2026-04-03T20:54:22.403Z" }, + { url = "https://files.pythonhosted.org/packages/26/47/2ee5c613ab546f0eddebf9905d23e07beb933416b1246c2d8791d01979b4/regex-2026.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ffa81f81b80047ba89a3c69ae6a0f78d06f4a42ce5126b0eb2a0a10ad44e0b2e", size = 801126, upload-time = "2026-04-03T20:54:24.308Z" }, + { url = "https://files.pythonhosted.org/packages/75/cd/41dacd129ca9fd20bd7d02f83e0fad83e034ac8a084ec369c90f55ef37e2/regex-2026.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f56ebf9d70305307a707911b88469213630aba821e77de7d603f9d2f0730687d", size = 776772, upload-time = "2026-04-03T20:54:26.319Z" }, + { url = "https://files.pythonhosted.org/packages/89/6d/5af0b588174cb5f46041fa7dd64d3fd5cd2fe51f18766703d1edc387f324/regex-2026.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:773d1dfd652bbffb09336abf890bfd64785c7463716bf766d0eb3bc19c8b7f27", size = 785228, upload-time = "2026-04-03T20:54:28.387Z" }, + { url = "https://files.pythonhosted.org/packages/b7/3b/f5a72b7045bd59575fc33bf1345f156fcfd5a8484aea6ad84b12c5a82114/regex-2026.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:d51d20befd5275d092cdffba57ded05f3c436317ee56466c8928ac32d960edaf", size = 860032, upload-time = "2026-04-03T20:54:30.641Z" }, + { url = "https://files.pythonhosted.org/packages/39/a4/72a317003d6fcd7a573584a85f59f525dfe8f67e355ca74eb6b53d66a5e2/regex-2026.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:0a51cdb3c1e9161154f976cb2bef9894bc063ac82f31b733087ffb8e880137d0", size = 765714, upload-time = "2026-04-03T20:54:32.789Z" }, + { url = "https://files.pythonhosted.org/packages/25/1e/5672e16f34dbbcb2560cc7e6a2fbb26dfa8b270711e730101da4423d3973/regex-2026.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:ae5266a82596114e41fb5302140e9630204c1b5f325c770bec654b95dd54b0aa", size = 852078, upload-time = "2026-04-03T20:54:34.546Z" }, + { url = "https://files.pythonhosted.org/packages/f7/0d/c813f0af7c6cc7ed7b9558bac2e5120b60ad0fa48f813e4d4bd55446f214/regex-2026.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:c882cd92ec68585e9c1cf36c447ec846c0d94edd706fe59e0c198e65822fd23b", size = 789181, upload-time = "2026-04-03T20:54:36.642Z" }, + { url = "https://files.pythonhosted.org/packages/ea/6d/a344608d1adbd2a95090ddd906cec09a11be0e6517e878d02a5123e0917f/regex-2026.4.4-cp313-cp313-win32.whl", hash = "sha256:05568c4fbf3cb4fa9e28e3af198c40d3237cf6041608a9022285fe567ec3ad62", size = 266690, upload-time = "2026-04-03T20:54:38.343Z" }, + { url = "https://files.pythonhosted.org/packages/31/07/54049f89b46235ca6f45cd6c88668a7050e77d4a15555e47dd40fde75263/regex-2026.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:3384df51ed52db0bea967e21458ab0a414f67cdddfd94401688274e55147bb81", size = 277733, upload-time = "2026-04-03T20:54:40.11Z" }, + { url = "https://files.pythonhosted.org/packages/0e/21/61366a8e20f4d43fb597708cac7f0e2baadb491ecc9549b4980b2be27d16/regex-2026.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:acd38177bd2c8e69a411d6521760806042e244d0ef94e2dd03ecdaa8a3c99427", size = 270565, upload-time = "2026-04-03T20:54:41.883Z" }, + { url = "https://files.pythonhosted.org/packages/f1/1e/3a2b9672433bef02f5d39aa1143ca2c08f311c1d041c464a42be9ae648dc/regex-2026.4.4-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:f94a11a9d05afcfcfa640e096319720a19cc0c9f7768e1a61fceee6a3afc6c7c", size = 494126, upload-time = "2026-04-03T20:54:43.602Z" }, + { url = "https://files.pythonhosted.org/packages/4e/4b/c132a4f4fe18ad3340d89fcb56235132b69559136036b845be3c073142ed/regex-2026.4.4-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:36bcb9d6d1307ab629edc553775baada2aefa5c50ccc0215fbfd2afcfff43141", size = 293882, upload-time = "2026-04-03T20:54:45.41Z" }, + { url = "https://files.pythonhosted.org/packages/f4/5f/eaa38092ce7a023656280f2341dbbd4ad5f05d780a70abba7bb4f4bea54c/regex-2026.4.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:261c015b3e2ed0919157046d768774ecde57f03d8fa4ba78d29793447f70e717", size = 292334, upload-time = "2026-04-03T20:54:47.051Z" }, + { url = "https://files.pythonhosted.org/packages/5f/f6/dd38146af1392dac33db7074ab331cec23cced3759167735c42c5460a243/regex-2026.4.4-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c228cf65b4a54583763645dcd73819b3b381ca8b4bb1b349dee1c135f4112c07", size = 811691, upload-time = "2026-04-03T20:54:49.074Z" }, + { url = "https://files.pythonhosted.org/packages/7a/f0/dc54c2e69f5eeec50601054998ec3690d5344277e782bd717e49867c1d29/regex-2026.4.4-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:dd2630faeb6876fb0c287f664d93ddce4d50cd46c6e88e60378c05c9047e08ca", size = 871227, upload-time = "2026-04-03T20:54:51.035Z" }, + { url = "https://files.pythonhosted.org/packages/a1/af/cb16bd5dc61621e27df919a4449bbb7e5a1034c34d307e0a706e9cc0f3e3/regex-2026.4.4-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6a50ab11b7779b849472337191f3a043e27e17f71555f98d0092fa6d73364520", size = 917435, upload-time = "2026-04-03T20:54:52.994Z" }, + { url = "https://files.pythonhosted.org/packages/5c/71/8b260897f22996b666edd9402861668f45a2ca259f665ac029e6104a2d7d/regex-2026.4.4-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0734f63afe785138549fbe822a8cfeaccd1bae814c5057cc0ed5b9f2de4fc883", size = 816358, upload-time = "2026-04-03T20:54:54.884Z" }, + { url = "https://files.pythonhosted.org/packages/1c/60/775f7f72a510ef238254906c2f3d737fc80b16ca85f07d20e318d2eea894/regex-2026.4.4-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c4ee50606cb1967db7e523224e05f32089101945f859928e65657a2cbb3d278b", size = 785549, upload-time = "2026-04-03T20:54:57.01Z" }, + { url = "https://files.pythonhosted.org/packages/58/42/34d289b3627c03cf381e44da534a0021664188fa49ba41513da0b4ec6776/regex-2026.4.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6c1818f37be3ca02dcb76d63f2c7aaba4b0dc171b579796c6fbe00148dfec6b1", size = 801364, upload-time = "2026-04-03T20:54:58.981Z" }, + { url = "https://files.pythonhosted.org/packages/fc/20/f6ecf319b382a8f1ab529e898b222c3f30600fcede7834733c26279e7465/regex-2026.4.4-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:f5bfc2741d150d0be3e4a0401a5c22b06e60acb9aa4daa46d9e79a6dcd0f135b", size = 866221, upload-time = "2026-04-03T20:55:00.88Z" }, + { url = "https://files.pythonhosted.org/packages/92/6a/9f16d3609d549bd96d7a0b2aee1625d7512ba6a03efc01652149ef88e74d/regex-2026.4.4-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:504ffa8a03609a087cad81277a629b6ce884b51a24bd388a7980ad61748618ff", size = 772530, upload-time = "2026-04-03T20:55:03.213Z" }, + { url = "https://files.pythonhosted.org/packages/fa/f6/aa9768bc96a4c361ac96419fbaf2dcdc33970bb813df3ba9b09d5d7b6d96/regex-2026.4.4-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:70aadc6ff12e4b444586e57fc30771f86253f9f0045b29016b9605b4be5f7dfb", size = 856989, upload-time = "2026-04-03T20:55:05.087Z" }, + { url = "https://files.pythonhosted.org/packages/4d/b4/c671db3556be2473ae3e4bb7a297c518d281452871501221251ea4ecba57/regex-2026.4.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f4f83781191007b6ef43b03debc35435f10cad9b96e16d147efe84a1d48bdde4", size = 803241, upload-time = "2026-04-03T20:55:07.162Z" }, + { url = "https://files.pythonhosted.org/packages/2a/5c/83e3b1d89fa4f6e5a1bc97b4abd4a9a97b3c1ac7854164f694f5f0ba98a0/regex-2026.4.4-cp313-cp313t-win32.whl", hash = "sha256:e014a797de43d1847df957c0a2a8e861d1c17547ee08467d1db2c370b7568baa", size = 269921, upload-time = "2026-04-03T20:55:09.62Z" }, + { url = "https://files.pythonhosted.org/packages/28/07/077c387121f42cdb4d92b1301133c0d93b5709d096d1669ab847dda9fe2e/regex-2026.4.4-cp313-cp313t-win_amd64.whl", hash = "sha256:b15b88b0d52b179712632832c1d6e58e5774f93717849a41096880442da41ab0", size = 281240, upload-time = "2026-04-03T20:55:11.521Z" }, + { url = "https://files.pythonhosted.org/packages/9d/22/ead4a4abc7c59a4d882662aa292ca02c8b617f30b6e163bc1728879e9353/regex-2026.4.4-cp313-cp313t-win_arm64.whl", hash = "sha256:586b89cdadf7d67bf86ae3342a4dcd2b8d70a832d90c18a0ae955105caf34dbe", size = 272440, upload-time = "2026-04-03T20:55:13.365Z" }, + { url = "https://files.pythonhosted.org/packages/f0/f5/ed97c2dc47b5fbd4b73c0d7d75f9ebc8eca139f2bbef476bba35f28c0a77/regex-2026.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:2da82d643fa698e5e5210e54af90181603d5853cf469f5eedf9bfc8f59b4b8c7", size = 490343, upload-time = "2026-04-03T20:55:15.241Z" }, + { url = "https://files.pythonhosted.org/packages/80/e9/de4828a7385ec166d673a5790ad06ac48cdaa98bc0960108dd4b9cc1aef7/regex-2026.4.4-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:54a1189ad9d9357760557c91103d5e421f0a2dabe68a5cdf9103d0dcf4e00752", size = 291909, upload-time = "2026-04-03T20:55:17.558Z" }, + { url = "https://files.pythonhosted.org/packages/b4/d6/5cfbfc97f3201a4d24b596a77957e092030dcc4205894bc035cedcfce62f/regex-2026.4.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:76d67d5afb1fe402d10a6403bae668d000441e2ab115191a804287d53b772951", size = 289692, upload-time = "2026-04-03T20:55:20.561Z" }, + { url = "https://files.pythonhosted.org/packages/8e/ac/f2212d9fd56fe897e36d0110ba30ba2d247bd6410c5bd98499c7e5a1e1f2/regex-2026.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e7cd3e4ee8d80447a83bbc9ab0c8459781fa77087f856c3e740d7763be0df27f", size = 796979, upload-time = "2026-04-03T20:55:22.56Z" }, + { url = "https://files.pythonhosted.org/packages/c9/e3/a016c12675fbac988a60c7e1c16e67823ff0bc016beb27bd7a001dbdabc6/regex-2026.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2e19e18c568d2866d8b6a6dfad823db86193503f90823a8f66689315ba28fbe8", size = 866744, upload-time = "2026-04-03T20:55:24.646Z" }, + { url = "https://files.pythonhosted.org/packages/af/a4/0b90ca4cf17adc3cb43de80ec71018c37c88ad64987e8d0d481a95ca60b5/regex-2026.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:7698a6f38730fd1385d390d1ed07bb13dce39aa616aca6a6d89bea178464b9a4", size = 911613, upload-time = "2026-04-03T20:55:27.033Z" }, + { url = "https://files.pythonhosted.org/packages/8e/3b/2b3dac0b82d41ab43aa87c6ecde63d71189d03fe8854b8ca455a315edac3/regex-2026.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:173a66f3651cdb761018078e2d9487f4cf971232c990035ec0eb1cdc6bf929a9", size = 800551, upload-time = "2026-04-03T20:55:29.532Z" }, + { url = "https://files.pythonhosted.org/packages/25/fe/5365eb7aa0e753c4b5957815c321519ecab033c279c60e1b1ae2367fa810/regex-2026.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fa7922bbb2cc84fa062d37723f199d4c0cd200245ce269c05db82d904db66b83", size = 776911, upload-time = "2026-04-03T20:55:31.526Z" }, + { url = "https://files.pythonhosted.org/packages/aa/b3/7fb0072156bba065e3b778a7bc7b0a6328212be5dd6a86fd207e0c4f2dab/regex-2026.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:59f67cd0a0acaf0e564c20bbd7f767286f23e91e2572c5703bf3e56ea7557edb", size = 785751, upload-time = "2026-04-03T20:55:33.797Z" }, + { url = "https://files.pythonhosted.org/packages/02/1a/9f83677eb699273e56e858f7bd95acdbee376d42f59e8bfca2fd80d79df3/regex-2026.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:475e50f3f73f73614f7cba5524d6de49dee269df00272a1b85e3d19f6d498465", size = 860484, upload-time = "2026-04-03T20:55:35.745Z" }, + { url = "https://files.pythonhosted.org/packages/3b/7a/93937507b61cfcff8b4c5857f1b452852b09f741daa9acae15c971d8554e/regex-2026.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:a1c0c7d67b64d85ac2e1879923bad2f08a08f3004055f2f406ef73c850114bd4", size = 765939, upload-time = "2026-04-03T20:55:37.972Z" }, + { url = "https://files.pythonhosted.org/packages/86/ea/81a7f968a351c6552b1670ead861e2a385be730ee28402233020c67f9e0f/regex-2026.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:1371c2ccbb744d66ee63631cc9ca12aa233d5749972626b68fe1a649dd98e566", size = 851417, upload-time = "2026-04-03T20:55:39.92Z" }, + { url = "https://files.pythonhosted.org/packages/4c/7e/323c18ce4b5b8f44517a36342961a0306e931e499febbd876bb149d900f0/regex-2026.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:59968142787042db793348a3f5b918cf24ced1f23247328530e063f89c128a95", size = 789056, upload-time = "2026-04-03T20:55:42.303Z" }, + { url = "https://files.pythonhosted.org/packages/c0/af/e7510f9b11b1913b0cd44eddb784b2d650b2af6515bfce4cffcc5bfd1d38/regex-2026.4.4-cp314-cp314-win32.whl", hash = "sha256:59efe72d37fd5a91e373e5146f187f921f365f4abc1249a5ab446a60f30dd5f8", size = 272130, upload-time = "2026-04-03T20:55:44.995Z" }, + { url = "https://files.pythonhosted.org/packages/9a/51/57dae534c915e2d3a21490e88836fa2ae79dde3b66255ecc0c0a155d2c10/regex-2026.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:e0aab3ff447845049d676827d2ff714aab4f73f340e155b7de7458cf53baa5a4", size = 280992, upload-time = "2026-04-03T20:55:47.316Z" }, + { url = "https://files.pythonhosted.org/packages/0a/5e/abaf9f4c3792e34edb1434f06717fae2b07888d85cb5cec29f9204931bf8/regex-2026.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:a7a5bb6aa0cf62208bb4fa079b0c756734f8ad0e333b425732e8609bd51ee22f", size = 273563, upload-time = "2026-04-03T20:55:49.273Z" }, + { url = "https://files.pythonhosted.org/packages/ff/06/35da85f9f217b9538b99cbb170738993bcc3b23784322decb77619f11502/regex-2026.4.4-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:97850d0638391bdc7d35dc1c1039974dcb921eaafa8cc935ae4d7f272b1d60b3", size = 494191, upload-time = "2026-04-03T20:55:51.258Z" }, + { url = "https://files.pythonhosted.org/packages/54/5b/1bc35f479eef8285c4baf88d8c002023efdeebb7b44a8735b36195486ae7/regex-2026.4.4-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:ee7337f88f2a580679f7bbfe69dc86c043954f9f9c541012f49abc554a962f2e", size = 293877, upload-time = "2026-04-03T20:55:53.214Z" }, + { url = "https://files.pythonhosted.org/packages/39/5b/f53b9ad17480b3ddd14c90da04bfb55ac6894b129e5dea87bcaf7d00e336/regex-2026.4.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7429f4e6192c11d659900c0648ba8776243bf396ab95558b8c51a345afeddde6", size = 292410, upload-time = "2026-04-03T20:55:55.736Z" }, + { url = "https://files.pythonhosted.org/packages/bb/56/52377f59f60a7c51aa4161eecf0b6032c20b461805aca051250da435ffc9/regex-2026.4.4-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dc4f10fbd5dd13dcf4265b4cc07d69ca70280742870c97ae10093e3d66000359", size = 811831, upload-time = "2026-04-03T20:55:57.802Z" }, + { url = "https://files.pythonhosted.org/packages/dd/63/8026310bf066f702a9c361f83a8c9658f3fe4edb349f9c1e5d5273b7c40c/regex-2026.4.4-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a152560af4f9742b96f3827090f866eeec5becd4765c8e0d3473d9d280e76a5a", size = 871199, upload-time = "2026-04-03T20:56:00.333Z" }, + { url = "https://files.pythonhosted.org/packages/20/9f/a514bbb00a466dbb506d43f187a04047f7be1505f10a9a15615ead5080ee/regex-2026.4.4-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:54170b3e95339f415d54651f97df3bff7434a663912f9358237941bbf9143f55", size = 917649, upload-time = "2026-04-03T20:56:02.445Z" }, + { url = "https://files.pythonhosted.org/packages/cb/6b/8399f68dd41a2030218839b9b18360d79b86d22b9fab5ef477c7f23ca67c/regex-2026.4.4-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:07f190d65f5a72dcb9cf7106bfc3d21e7a49dd2879eda2207b683f32165e4d99", size = 816388, upload-time = "2026-04-03T20:56:04.595Z" }, + { url = "https://files.pythonhosted.org/packages/1e/9c/103963f47c24339a483b05edd568594c2be486188f688c0170fd504b2948/regex-2026.4.4-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9a2741ce5a29d3c84b0b94261ba630ab459a1b847a0d6beca7d62d188175c790", size = 785746, upload-time = "2026-04-03T20:56:07.13Z" }, + { url = "https://files.pythonhosted.org/packages/fa/ee/7f6054c0dec0cee3463c304405e4ff42e27cff05bf36fcb34be549ab17bd/regex-2026.4.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:b26c30df3a28fd9793113dac7385a4deb7294a06c0f760dd2b008bd49a9139bc", size = 801483, upload-time = "2026-04-03T20:56:09.365Z" }, + { url = "https://files.pythonhosted.org/packages/30/c2/51d3d941cf6070dc00c3338ecf138615fc3cce0421c3df6abe97a08af61a/regex-2026.4.4-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:421439d1bee44b19f4583ccf42670ca464ffb90e9fdc38d37f39d1ddd1e44f1f", size = 866331, upload-time = "2026-04-03T20:56:12.039Z" }, + { url = "https://files.pythonhosted.org/packages/16/e8/76d50dcc122ac33927d939f350eebcfe3dbcbda96913e03433fc36de5e63/regex-2026.4.4-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:b40379b53ecbc747fd9bdf4a0ea14eb8188ca1bd0f54f78893a39024b28f4863", size = 772673, upload-time = "2026-04-03T20:56:14.558Z" }, + { url = "https://files.pythonhosted.org/packages/a5/6e/5f6bf75e20ea6873d05ba4ec78378c375cbe08cdec571c83fbb01606e563/regex-2026.4.4-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:08c55c13d2eef54f73eeadc33146fb0baaa49e7335eb1aff6ae1324bf0ddbe4a", size = 857146, upload-time = "2026-04-03T20:56:16.663Z" }, + { url = "https://files.pythonhosted.org/packages/0b/33/3c76d9962949e487ebba353a18e89399f292287204ac8f2f4cfc3a51c233/regex-2026.4.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9776b85f510062f5a75ef112afe5f494ef1635607bf1cc220c1391e9ac2f5e81", size = 803463, upload-time = "2026-04-03T20:56:18.923Z" }, + { url = "https://files.pythonhosted.org/packages/19/eb/ef32dcd2cb69b69bc0c3e55205bce94a7def48d495358946bc42186dcccc/regex-2026.4.4-cp314-cp314t-win32.whl", hash = "sha256:385edaebde5db5be103577afc8699fea73a0e36a734ba24870be7ffa61119d74", size = 275709, upload-time = "2026-04-03T20:56:20.996Z" }, + { url = "https://files.pythonhosted.org/packages/a0/86/c291bf740945acbf35ed7dbebf8e2eea2f3f78041f6bd7cdab80cb274dc0/regex-2026.4.4-cp314-cp314t-win_amd64.whl", hash = "sha256:5d354b18839328927832e2fa5f7c95b7a3ccc39e7a681529e1685898e6436d45", size = 285622, upload-time = "2026-04-03T20:56:23.641Z" }, + { url = "https://files.pythonhosted.org/packages/d5/e7/ec846d560ae6a597115153c02ca6138a7877a1748b2072d9521c10a93e58/regex-2026.4.4-cp314-cp314t-win_arm64.whl", hash = "sha256:af0384cb01a33600c49505c27c6c57ab0b27bf84a74e28524c92ca897ebdac9d", size = 275773, upload-time = "2026-04-03T20:56:26.07Z" }, +] + +[[package]] +name = "rich" +version = "15.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown-it-py" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c0/8f/0722ca900cc807c13a6a0c696dacf35430f72e0ec571c4275d2371fca3e9/rich-15.0.0.tar.gz", hash = "sha256:edd07a4824c6b40189fb7ac9bc4c52536e9780fbbfbddf6f1e2502c31b068c36", size = 230680, upload-time = "2026-04-12T08:24:00.75Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/82/3b/64d4899d73f91ba49a8c18a8ff3f0ea8f1c1d75481760df8c68ef5235bf5/rich-15.0.0-py3-none-any.whl", hash = "sha256:33bd4ef74232fb73fe9279a257718407f169c09b78a87ad3d296f548e27de0bb", size = 310654, upload-time = "2026-04-12T08:24:02.83Z" }, +] + +[[package]] +name = "safetensors" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/29/9c/6e74567782559a63bd040a236edca26fd71bc7ba88de2ef35d75df3bca5e/safetensors-0.7.0.tar.gz", hash = "sha256:07663963b67e8bd9f0b8ad15bb9163606cd27cc5a1b96235a50d8369803b96b0", size = 200878, upload-time = "2025-11-19T15:18:43.199Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fa/47/aef6c06649039accf914afef490268e1067ed82be62bcfa5b7e886ad15e8/safetensors-0.7.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c82f4d474cf725255d9e6acf17252991c3c8aac038d6ef363a4bf8be2f6db517", size = 467781, upload-time = "2025-11-19T15:18:35.84Z" }, + { url = "https://files.pythonhosted.org/packages/e8/00/374c0c068e30cd31f1e1b46b4b5738168ec79e7689ca82ee93ddfea05109/safetensors-0.7.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:94fd4858284736bb67a897a41608b5b0c2496c9bdb3bf2af1fa3409127f20d57", size = 447058, upload-time = "2025-11-19T15:18:34.416Z" }, + { url = "https://files.pythonhosted.org/packages/f1/06/578ffed52c2296f93d7fd2d844cabfa92be51a587c38c8afbb8ae449ca89/safetensors-0.7.0-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e07d91d0c92a31200f25351f4acb2bc6aff7f48094e13ebb1d0fb995b54b6542", size = 491748, upload-time = "2025-11-19T15:18:09.79Z" }, + { url = "https://files.pythonhosted.org/packages/ae/33/1debbbb70e4791dde185edb9413d1fe01619255abb64b300157d7f15dddd/safetensors-0.7.0-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8469155f4cb518bafb4acf4865e8bb9d6804110d2d9bdcaa78564b9fd841e104", size = 503881, upload-time = "2025-11-19T15:18:16.145Z" }, + { url = "https://files.pythonhosted.org/packages/8e/1c/40c2ca924d60792c3be509833df711b553c60effbd91da6f5284a83f7122/safetensors-0.7.0-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:54bef08bf00a2bff599982f6b08e8770e09cc012d7bba00783fc7ea38f1fb37d", size = 623463, upload-time = "2025-11-19T15:18:21.11Z" }, + { url = "https://files.pythonhosted.org/packages/9b/3a/13784a9364bd43b0d61eef4bea2845039bc2030458b16594a1bd787ae26e/safetensors-0.7.0-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:42cb091236206bb2016d245c377ed383aa7f78691748f3bb6ee1bfa51ae2ce6a", size = 532855, upload-time = "2025-11-19T15:18:25.719Z" }, + { url = "https://files.pythonhosted.org/packages/a0/60/429e9b1cb3fc651937727befe258ea24122d9663e4d5709a48c9cbfceecb/safetensors-0.7.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac7252938f0696ddea46f5e855dd3138444e82236e3be475f54929f0c510d48", size = 507152, upload-time = "2025-11-19T15:18:33.023Z" }, + { url = "https://files.pythonhosted.org/packages/3c/a8/4b45e4e059270d17af60359713ffd83f97900d45a6afa73aaa0d737d48b6/safetensors-0.7.0-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1d060c70284127fa805085d8f10fbd0962792aed71879d00864acda69dbab981", size = 541856, upload-time = "2025-11-19T15:18:31.075Z" }, + { url = "https://files.pythonhosted.org/packages/06/87/d26d8407c44175d8ae164a95b5a62707fcc445f3c0c56108e37d98070a3d/safetensors-0.7.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:cdab83a366799fa730f90a4ebb563e494f28e9e92c4819e556152ad55e43591b", size = 674060, upload-time = "2025-11-19T15:18:37.211Z" }, + { url = "https://files.pythonhosted.org/packages/11/f5/57644a2ff08dc6325816ba7217e5095f17269dada2554b658442c66aed51/safetensors-0.7.0-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:672132907fcad9f2aedcb705b2d7b3b93354a2aec1b2f706c4db852abe338f85", size = 771715, upload-time = "2025-11-19T15:18:38.689Z" }, + { url = "https://files.pythonhosted.org/packages/86/31/17883e13a814bd278ae6e266b13282a01049b0c81341da7fd0e3e71a80a3/safetensors-0.7.0-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:5d72abdb8a4d56d4020713724ba81dac065fedb7f3667151c4a637f1d3fb26c0", size = 714377, upload-time = "2025-11-19T15:18:40.162Z" }, + { url = "https://files.pythonhosted.org/packages/4a/d8/0c8a7dc9b41dcac53c4cbf9df2b9c83e0e0097203de8b37a712b345c0be5/safetensors-0.7.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b0f6d66c1c538d5a94a73aa9ddca8ccc4227e6c9ff555322ea40bdd142391dd4", size = 677368, upload-time = "2025-11-19T15:18:41.627Z" }, + { url = "https://files.pythonhosted.org/packages/05/e5/cb4b713c8a93469e3c5be7c3f8d77d307e65fe89673e731f5c2bfd0a9237/safetensors-0.7.0-cp38-abi3-win32.whl", hash = "sha256:c74af94bf3ac15ac4d0f2a7c7b4663a15f8c2ab15ed0fc7531ca61d0835eccba", size = 326423, upload-time = "2025-11-19T15:18:45.74Z" }, + { url = "https://files.pythonhosted.org/packages/5d/e6/ec8471c8072382cb91233ba7267fd931219753bb43814cbc71757bfd4dab/safetensors-0.7.0-cp38-abi3-win_amd64.whl", hash = "sha256:d1239932053f56f3456f32eb9625590cc7582e905021f94636202a864d470755", size = 341380, upload-time = "2025-11-19T15:18:44.427Z" }, +] + +[[package]] +name = "setuptools" +version = "81.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0d/1c/73e719955c59b8e424d015ab450f51c0af856ae46ea2da83eba51cc88de1/setuptools-81.0.0.tar.gz", hash = "sha256:487b53915f52501f0a79ccfd0c02c165ffe06631443a886740b91af4b7a5845a", size = 1198299, upload-time = "2026-02-06T21:10:39.601Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e1/e3/c164c88b2e5ce7b24d667b9bd83589cf4f3520d97cad01534cd3c4f55fdb/setuptools-81.0.0-py3-none-any.whl", hash = "sha256:fdd925d5c5d9f62e4b74b30d6dd7828ce236fd6ed998a08d81de62ce5a6310d6", size = 1062021, upload-time = "2026-02-06T21:10:37.175Z" }, +] + +[[package]] +name = "shellingham" +version = "1.5.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, +] + +[[package]] +name = "sympy" +version = "1.14.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mpmath" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload-time = "2025-04-27T18:05:01.611Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload-time = "2025-04-27T18:04:59.103Z" }, +] + +[[package]] +name = "tokenizers" +version = "0.22.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "huggingface-hub" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/73/6f/f80cfef4a312e1fb34baf7d85c72d4411afde10978d4657f8cdd811d3ccc/tokenizers-0.22.2.tar.gz", hash = "sha256:473b83b915e547aa366d1eee11806deaf419e17be16310ac0a14077f1e28f917", size = 372115, upload-time = "2026-01-05T10:45:15.988Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/92/97/5dbfabf04c7e348e655e907ed27913e03db0923abb5dfdd120d7b25630e1/tokenizers-0.22.2-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:544dd704ae7238755d790de45ba8da072e9af3eea688f698b137915ae959281c", size = 3100275, upload-time = "2026-01-05T10:41:02.158Z" }, + { url = "https://files.pythonhosted.org/packages/2e/47/174dca0502ef88b28f1c9e06b73ce33500eedfac7a7692108aec220464e7/tokenizers-0.22.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:1e418a55456beedca4621dbab65a318981467a2b188e982a23e117f115ce5001", size = 2981472, upload-time = "2026-01-05T10:41:00.276Z" }, + { url = "https://files.pythonhosted.org/packages/d6/84/7990e799f1309a8b87af6b948f31edaa12a3ed22d11b352eaf4f4b2e5753/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2249487018adec45d6e3554c71d46eb39fa8ea67156c640f7513eb26f318cec7", size = 3290736, upload-time = "2026-01-05T10:40:32.165Z" }, + { url = "https://files.pythonhosted.org/packages/78/59/09d0d9ba94dcd5f4f1368d4858d24546b4bdc0231c2354aa31d6199f0399/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:25b85325d0815e86e0bac263506dd114578953b7b53d7de09a6485e4a160a7dd", size = 3168835, upload-time = "2026-01-05T10:40:38.847Z" }, + { url = "https://files.pythonhosted.org/packages/47/50/b3ebb4243e7160bda8d34b731e54dd8ab8b133e50775872e7a434e524c28/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bfb88f22a209ff7b40a576d5324bf8286b519d7358663db21d6246fb17eea2d5", size = 3521673, upload-time = "2026-01-05T10:40:56.614Z" }, + { url = "https://files.pythonhosted.org/packages/e0/fa/89f4cb9e08df770b57adb96f8cbb7e22695a4cb6c2bd5f0c4f0ebcf33b66/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1c774b1276f71e1ef716e5486f21e76333464f47bece56bbd554485982a9e03e", size = 3724818, upload-time = "2026-01-05T10:40:44.507Z" }, + { url = "https://files.pythonhosted.org/packages/64/04/ca2363f0bfbe3b3d36e95bf67e56a4c88c8e3362b658e616d1ac185d47f2/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:df6c4265b289083bf710dff49bc51ef252f9d5be33a45ee2bed151114a56207b", size = 3379195, upload-time = "2026-01-05T10:40:51.139Z" }, + { url = "https://files.pythonhosted.org/packages/2e/76/932be4b50ef6ccedf9d3c6639b056a967a86258c6d9200643f01269211ca/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:369cc9fc8cc10cb24143873a0d95438bb8ee257bb80c71989e3ee290e8d72c67", size = 3274982, upload-time = "2026-01-05T10:40:58.331Z" }, + { url = "https://files.pythonhosted.org/packages/1d/28/5f9f5a4cc211b69e89420980e483831bcc29dade307955cc9dc858a40f01/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:29c30b83d8dcd061078b05ae0cb94d3c710555fbb44861139f9f83dcca3dc3e4", size = 9478245, upload-time = "2026-01-05T10:41:04.053Z" }, + { url = "https://files.pythonhosted.org/packages/6c/fb/66e2da4704d6aadebf8cb39f1d6d1957df667ab24cff2326b77cda0dcb85/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:37ae80a28c1d3265bb1f22464c856bd23c02a05bb211e56d0c5301a435be6c1a", size = 9560069, upload-time = "2026-01-05T10:45:10.673Z" }, + { url = "https://files.pythonhosted.org/packages/16/04/fed398b05caa87ce9b1a1bb5166645e38196081b225059a6edaff6440fac/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:791135ee325f2336f498590eb2f11dc5c295232f288e75c99a36c5dbce63088a", size = 9899263, upload-time = "2026-01-05T10:45:12.559Z" }, + { url = "https://files.pythonhosted.org/packages/05/a1/d62dfe7376beaaf1394917e0f8e93ee5f67fea8fcf4107501db35996586b/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:38337540fbbddff8e999d59970f3c6f35a82de10053206a7562f1ea02d046fa5", size = 10033429, upload-time = "2026-01-05T10:45:14.333Z" }, + { url = "https://files.pythonhosted.org/packages/fd/18/a545c4ea42af3df6effd7d13d250ba77a0a86fb20393143bbb9a92e434d4/tokenizers-0.22.2-cp39-abi3-win32.whl", hash = "sha256:a6bf3f88c554a2b653af81f3204491c818ae2ac6fbc09e76ef4773351292bc92", size = 2502363, upload-time = "2026-01-05T10:45:20.593Z" }, + { url = "https://files.pythonhosted.org/packages/65/71/0670843133a43d43070abeb1949abfdef12a86d490bea9cd9e18e37c5ff7/tokenizers-0.22.2-cp39-abi3-win_amd64.whl", hash = "sha256:c9ea31edff2968b44a88f97d784c2f16dc0729b8b143ed004699ebca91f05c48", size = 2747786, upload-time = "2026-01-05T10:45:18.411Z" }, + { url = "https://files.pythonhosted.org/packages/72/f4/0de46cfa12cdcbcd464cc59fde36912af405696f687e53a091fb432f694c/tokenizers-0.22.2-cp39-abi3-win_arm64.whl", hash = "sha256:9ce725d22864a1e965217204946f830c37876eee3b2ba6fc6255e8e903d5fcbc", size = 2612133, upload-time = "2026-01-05T10:45:17.232Z" }, +] + +[[package]] +name = "torch" +version = "2.11.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cuda-bindings", marker = "sys_platform == 'linux'" }, + { name = "cuda-toolkit", extra = ["cublas", "cudart", "cufft", "cufile", "cupti", "curand", "cusolver", "cusparse", "nvjitlink", "nvrtc", "nvtx"], marker = "sys_platform == 'linux'" }, + { name = "filelock" }, + { name = "fsspec" }, + { name = "jinja2" }, + { name = "networkx" }, + { name = "nvidia-cudnn-cu13", marker = "sys_platform == 'linux'" }, + { name = "nvidia-cusparselt-cu13", marker = "sys_platform == 'linux'" }, + { name = "nvidia-nccl-cu13", marker = "sys_platform == 'linux'" }, + { name = "nvidia-nvshmem-cu13", marker = "sys_platform == 'linux'" }, + { name = "setuptools" }, + { name = "sympy" }, + { name = "triton", marker = "sys_platform == 'linux'" }, + { name = "typing-extensions" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/ae/0d/98b410492609e34a155fa8b121b55c7dca229f39636851c3a9ec20edea21/torch-2.11.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7b6a60d48062809f58595509c524b88e6ddec3ebe25833d6462eeab81e5f2ce4", size = 80529712, upload-time = "2026-03-23T18:12:02.608Z" }, + { url = "https://files.pythonhosted.org/packages/84/03/acea680005f098f79fd70c1d9d5ccc0cb4296ec2af539a0450108232fc0c/torch-2.11.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:d91aac77f24082809d2c5a93f52a5f085032740a1ebc9252a7b052ef5a4fddc6", size = 419718178, upload-time = "2026-03-23T18:10:46.675Z" }, + { url = "https://files.pythonhosted.org/packages/8c/8b/d7be22fbec9ffee6cff31a39f8750d4b3a65d349a286cf4aec74c2375662/torch-2.11.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:7aa2f9bbc6d4595ba72138026b2074be1233186150e9292865e04b7a63b8c67a", size = 530604548, upload-time = "2026-03-23T18:10:03.569Z" }, + { url = "https://files.pythonhosted.org/packages/d1/bd/9912d30b68845256aabbb4a40aeefeef3c3b20db5211ccda653544ada4b6/torch-2.11.0-cp311-cp311-win_amd64.whl", hash = "sha256:73e24aaf8f36ab90d95cd1761208b2eb70841c2a9ca1a3f9061b39fc5331b708", size = 114519675, upload-time = "2026-03-23T18:11:52.995Z" }, + { url = "https://files.pythonhosted.org/packages/6f/8b/69e3008d78e5cee2b30183340cc425081b78afc5eff3d080daab0adda9aa/torch-2.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4b5866312ee6e52ea625cd211dcb97d6a2cdc1131a5f15cc0d87eec948f6dd34", size = 80606338, upload-time = "2026-03-23T18:11:34.781Z" }, + { url = "https://files.pythonhosted.org/packages/13/16/42e5915ebe4868caa6bac83a8ed59db57f12e9a61b7d749d584776ed53d5/torch-2.11.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:f99924682ef0aa6a4ab3b1b76f40dc6e273fca09f367d15a524266db100a723f", size = 419731115, upload-time = "2026-03-23T18:11:06.944Z" }, + { url = "https://files.pythonhosted.org/packages/1a/c9/82638ef24d7877510f83baf821f5619a61b45568ce21c0a87a91576510aa/torch-2.11.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:0f68f4ac6d95d12e896c3b7a912b5871619542ec54d3649cf48cc1edd4dd2756", size = 530712279, upload-time = "2026-03-23T18:10:31.481Z" }, + { url = "https://files.pythonhosted.org/packages/1c/ff/6756f1c7ee302f6d202120e0f4f05b432b839908f9071157302cedfc5232/torch-2.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:fbf39280699d1b869f55eac536deceaa1b60bd6788ba74f399cc67e60a5fab10", size = 114556047, upload-time = "2026-03-23T18:10:55.931Z" }, + { url = "https://files.pythonhosted.org/packages/87/89/5ea6722763acee56b045435fb84258db7375c48165ec8be7880ab2b281c5/torch-2.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1e6debd97ccd3205bbb37eb806a9d8219e1139d15419982c09e23ef7d4369d18", size = 80606801, upload-time = "2026-03-23T18:10:18.649Z" }, + { url = "https://files.pythonhosted.org/packages/32/d1/8ed2173589cbfe744ed54e5a73efc107c0085ba5777ee93a5f4c1ab90553/torch-2.11.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:63a68fa59de8f87acc7e85a5478bb2dddbb3392b7593ec3e78827c793c4b73fd", size = 419732382, upload-time = "2026-03-23T18:08:30.835Z" }, + { url = "https://files.pythonhosted.org/packages/3d/e1/b73f7c575a4b8f87a5928f50a1e35416b5e27295d8be9397d5293e7e8d4c/torch-2.11.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:cc89b9b173d9adfab59fd227f0ab5e5516d9a52b658ae41d64e59d2e55a418db", size = 530711509, upload-time = "2026-03-23T18:08:47.213Z" }, + { url = "https://files.pythonhosted.org/packages/66/82/3e3fcdd388fbe54e29fd3f991f36846ff4ac90b0d0181e9c8f7236565f82/torch-2.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:4dda3b3f52d121063a731ddb835f010dc137b920d7fec2778e52f60d8e4bf0cd", size = 114555842, upload-time = "2026-03-23T18:09:52.111Z" }, + { url = "https://files.pythonhosted.org/packages/db/38/8ac78069621b8c2b4979c2f96dc8409ef5e9c4189f6aac629189a78677ca/torch-2.11.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:8b394322f49af4362d4f80e424bcaca7efcd049619af03a4cf4501520bdf0fb4", size = 80959574, upload-time = "2026-03-23T18:10:14.214Z" }, + { url = "https://files.pythonhosted.org/packages/6d/6c/56bfb37073e7136e6dd86bfc6af7339946dd684e0ecf2155ac0eee687ae1/torch-2.11.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:2658f34ce7e2dabf4ec73b45e2ca68aedad7a5be87ea756ad656eaf32bf1e1ea", size = 419732324, upload-time = "2026-03-23T18:09:36.604Z" }, + { url = "https://files.pythonhosted.org/packages/07/f4/1b666b6d61d3394cca306ea543ed03a64aad0a201b6cd159f1d41010aeb1/torch-2.11.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:98bb213c3084cfe176302949bdc360074b18a9da7ab59ef2edc9d9f742504778", size = 530596026, upload-time = "2026-03-23T18:09:20.842Z" }, + { url = "https://files.pythonhosted.org/packages/48/6b/30d1459fa7e4b67e9e3fe1685ca1d8bb4ce7c62ef436c3a615963c6c866c/torch-2.11.0-cp313-cp313t-win_amd64.whl", hash = "sha256:a97b94bbf62992949b4730c6cd2cc9aee7b335921ee8dc207d930f2ed09ae2db", size = 114793702, upload-time = "2026-03-23T18:09:47.304Z" }, + { url = "https://files.pythonhosted.org/packages/26/0d/8603382f61abd0db35841148ddc1ffd607bf3100b11c6e1dab6d2fc44e72/torch-2.11.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:01018087326984a33b64e04c8cb5c2795f9120e0d775ada1f6638840227b04d7", size = 80573442, upload-time = "2026-03-23T18:09:10.117Z" }, + { url = "https://files.pythonhosted.org/packages/c7/86/7cd7c66cb9cec6be330fff36db5bd0eef386d80c031b581ec81be1d4b26c/torch-2.11.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:2bb3cc54bd0dea126b0060bb1ec9de0f9c7f7342d93d436646516b0330cd5be7", size = 419749385, upload-time = "2026-03-23T18:07:33.77Z" }, + { url = "https://files.pythonhosted.org/packages/47/e8/b98ca2d39b2e0e4730c0ee52537e488e7008025bc77ca89552ff91021f7c/torch-2.11.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:4dc8b3809469b6c30b411bb8c4cad3828efd26236153d9beb6a3ec500f211a60", size = 530716756, upload-time = "2026-03-23T18:07:50.02Z" }, + { url = "https://files.pythonhosted.org/packages/78/88/d4a4cda8362f8a30d1ed428564878c3cafb0d87971fbd3947d4c84552095/torch-2.11.0-cp314-cp314-win_amd64.whl", hash = "sha256:2b4e811728bd0cc58fb2b0948fe939a1ee2bf1422f6025be2fca4c7bd9d79718", size = 114552300, upload-time = "2026-03-23T18:09:05.617Z" }, + { url = "https://files.pythonhosted.org/packages/bf/46/4419098ed6d801750f26567b478fc185c3432e11e2cad712bc6b4c2ab0d0/torch-2.11.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:8245477871c3700d4370352ffec94b103cfcb737229445cf9946cddb7b2ca7cd", size = 80959460, upload-time = "2026-03-23T18:09:00.818Z" }, + { url = "https://files.pythonhosted.org/packages/fd/66/54a56a4a6ceaffb567231994a9745821d3af922a854ed33b0b3a278e0a99/torch-2.11.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:ab9a8482f475f9ba20e12db84b0e55e2f58784bdca43a854a6ccd3fd4b9f75e6", size = 419735835, upload-time = "2026-03-23T18:07:18.974Z" }, + { url = "https://files.pythonhosted.org/packages/b1/e7/0b6665f533aa9e337662dc190425abc0af1fe3234088f4454c52393ded61/torch-2.11.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:563ed3d25542d7e7bbc5b235ccfacfeb97fb470c7fee257eae599adb8005c8a2", size = 530613405, upload-time = "2026-03-23T18:08:07.014Z" }, + { url = "https://files.pythonhosted.org/packages/cf/bf/c8d12a2c86dbfd7f40fb2f56fbf5a505ccf2d9ce131eb559dfc7c51e1a04/torch-2.11.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b2a43985ff5ef6ddd923bbcf99943e5f58059805787c5c9a2622bf05ca2965b0", size = 114792991, upload-time = "2026-03-23T18:08:19.216Z" }, +] + +[[package]] +name = "tqdm" +version = "4.67.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/09/a9/6ba95a270c6f1fbcd8dac228323f2777d886cb206987444e4bce66338dd4/tqdm-4.67.3.tar.gz", hash = "sha256:7d825f03f89244ef73f1d4ce193cb1774a8179fd96f31d7e1dcde62092b960bb", size = 169598, upload-time = "2026-02-03T17:35:53.048Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size = 78374, upload-time = "2026-02-03T17:35:50.982Z" }, +] + +[[package]] +name = "transformers" +version = "5.5.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "huggingface-hub" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "pyyaml" }, + { name = "regex" }, + { name = "safetensors" }, + { name = "tokenizers" }, + { name = "tqdm" }, + { name = "typer" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a5/1e/1e244ab2ab50a863e6b52cc55761910567fa532b69a6740f6e99c5fdbd98/transformers-5.5.4.tar.gz", hash = "sha256:2e67cadba81fc7608cc07c4dd54f524820bc3d95b1cabd0ef3db7733c4f8b82e", size = 8227649, upload-time = "2026-04-13T16:55:55.181Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/29/fb/162a66789c65e5afa3b051309240c26bf37fbc8fea285b4546ae747995a2/transformers-5.5.4-py3-none-any.whl", hash = "sha256:0bd6281b82966fe5a7a16f553ea517a9db1dee6284d7cb224dfd88fc0dd1c167", size = 10236696, upload-time = "2026-04-13T16:55:51.497Z" }, +] + +[[package]] +name = "triton" +version = "3.6.0" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0f/2c/96f92f3c60387e14cc45aed49487f3486f89ea27106c1b1376913c62abe4/triton-3.6.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:49df5ef37379c0c2b5c0012286f80174fcf0e073e5ade1ca9a86c36814553651", size = 176081190, upload-time = "2026-01-20T16:16:00.523Z" }, + { url = "https://files.pythonhosted.org/packages/e0/12/b05ba554d2c623bffa59922b94b0775673de251f468a9609bc9e45de95e9/triton-3.6.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e8e323d608e3a9bfcc2d9efcc90ceefb764a82b99dea12a86d643c72539ad5d3", size = 188214640, upload-time = "2026-01-20T16:00:35.869Z" }, + { url = "https://files.pythonhosted.org/packages/17/5d/08201db32823bdf77a0e2b9039540080b2e5c23a20706ddba942924ebcd6/triton-3.6.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:374f52c11a711fd062b4bfbb201fd9ac0a5febd28a96fb41b4a0f51dde3157f4", size = 176128243, upload-time = "2026-01-20T16:16:07.857Z" }, + { url = "https://files.pythonhosted.org/packages/ab/a8/cdf8b3e4c98132f965f88c2313a4b493266832ad47fb52f23d14d4f86bb5/triton-3.6.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:74caf5e34b66d9f3a429af689c1c7128daba1d8208df60e81106b115c00d6fca", size = 188266850, upload-time = "2026-01-20T16:00:43.041Z" }, + { url = "https://files.pythonhosted.org/packages/3c/12/34d71b350e89a204c2c7777a9bba0dcf2f19a5bfdd70b57c4dbc5ffd7154/triton-3.6.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:448e02fe6dc898e9e5aa89cf0ee5c371e99df5aa5e8ad976a80b93334f3494fd", size = 176133521, upload-time = "2026-01-20T16:16:13.321Z" }, + { url = "https://files.pythonhosted.org/packages/f9/0b/37d991d8c130ce81a8728ae3c25b6e60935838e9be1b58791f5997b24a54/triton-3.6.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:10c7f76c6e72d2ef08df639e3d0d30729112f47a56b0c81672edc05ee5116ac9", size = 188289450, upload-time = "2026-01-20T16:00:49.136Z" }, + { url = "https://files.pythonhosted.org/packages/ce/4e/41b0c8033b503fd3cfcd12392cdd256945026a91ff02452bef40ec34bee7/triton-3.6.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1722e172d34e32abc3eb7711d0025bb69d7959ebea84e3b7f7a341cd7ed694d6", size = 176276087, upload-time = "2026-01-20T16:16:18.989Z" }, + { url = "https://files.pythonhosted.org/packages/35/f8/9c66bfc55361ec6d0e4040a0337fb5924ceb23de4648b8a81ae9d33b2b38/triton-3.6.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d002e07d7180fd65e622134fbd980c9a3d4211fb85224b56a0a0efbd422ab72f", size = 188400296, upload-time = "2026-01-20T16:00:56.042Z" }, + { url = "https://files.pythonhosted.org/packages/49/55/5ecf0dcaa0f2fbbd4420f7ef227ee3cb172e91e5fede9d0ecaddc43363b4/triton-3.6.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ef5523241e7d1abca00f1d240949eebdd7c673b005edbbce0aca95b8191f1d43", size = 176138577, upload-time = "2026-01-20T16:16:25.426Z" }, + { url = "https://files.pythonhosted.org/packages/df/3d/9e7eee57b37c80cec63322c0231bb6da3cfe535a91d7a4d64896fcb89357/triton-3.6.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a17a5d5985f0ac494ed8a8e54568f092f7057ef60e1b0fa09d3fd1512064e803", size = 188273063, upload-time = "2026-01-20T16:01:07.278Z" }, + { url = "https://files.pythonhosted.org/packages/48/db/56ee649cab5eaff4757541325aca81f52d02d4a7cd3506776cad2451e060/triton-3.6.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0b3a97e8ed304dfa9bd23bb41ca04cdf6b2e617d5e782a8653d616037a5d537d", size = 176274804, upload-time = "2026-01-20T16:16:31.528Z" }, + { url = "https://files.pythonhosted.org/packages/f6/56/6113c23ff46c00aae423333eb58b3e60bdfe9179d542781955a5e1514cb3/triton-3.6.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:46bd1c1af4b6704e554cad2eeb3b0a6513a980d470ccfa63189737340c7746a7", size = 188397994, upload-time = "2026-01-20T16:01:14.236Z" }, +] + +[[package]] +name = "typer" +version = "0.24.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-doc" }, + { name = "click" }, + { name = "rich" }, + { name = "shellingham" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f5/24/cb09efec5cc954f7f9b930bf8279447d24618bb6758d4f6adf2574c41780/typer-0.24.1.tar.gz", hash = "sha256:e39b4732d65fbdcde189ae76cf7cd48aeae72919dea1fdfc16593be016256b45", size = 118613, upload-time = "2026-02-21T16:54:40.609Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4a/91/48db081e7a63bb37284f9fbcefda7c44c277b18b0e13fbc36ea2335b71e6/typer-0.24.1-py3-none-any.whl", hash = "sha256:112c1f0ce578bfb4cab9ffdabc68f031416ebcc216536611ba21f04e9aa84c9e", size = 56085, upload-time = "2026-02-21T16:54:41.616Z" }, +] + +[[package]] +name = "typing-extensions" +version = "4.15.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" }, +] diff --git a/vitest.config.ts b/vitest.config.ts index 70df29d..fe46f1f 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -65,6 +65,12 @@ export default defineConfig({ functions: 80, lines: 80, }, + "src/embeddings/text.ts": { + statements: 80, + branches: 80, + functions: 80, + lines: 80, + }, }, }, }, From 13e8b28944fe1604e749ddd8b5f6f129f9a1ef52 Mon Sep 17 00:00:00 2001 From: davitbun Date: Mon, 20 Apr 2026 22:43:07 -0700 Subject: [PATCH 5/7] Add semantic retrieval and transcript-backed facts --- .gitignore | 7 + claude-code/bundle/capture.js | 15 +- claude-code/bundle/pre-tool-use.js | 614 ++++++++++++++++-- claude-code/bundle/session-end.js | 15 +- claude-code/bundle/session-start.js | 61 +- claude-code/bundle/shell/deeplake-shell.js | 249 ++++++- claude-code/bundle/wiki-worker.js | 54 +- .../tests/bash-command-compiler.test.ts | 82 ++- claude-code/tests/grep-core.test.ts | 63 ++ claude-code/tests/hooks-source.test.ts | 27 + claude-code/tests/memory-facts.test.ts | 27 +- codex/bundle/capture.js | 15 +- codex/bundle/pre-tool-use.js | 614 ++++++++++++++++-- codex/bundle/session-start.js | 55 +- codex/bundle/shell/deeplake-shell.js | 249 ++++++- codex/bundle/stop.js | 15 +- codex/bundle/wiki-worker.js | 54 +- esbuild.config.mjs | 4 +- ...ackfill_harrier_embeddings.cpython-312.pyc | Bin 60971 -> 0 bytes ...ackfill_harrier_embeddings.cpython-314.pyc | Bin 72656 -> 0 bytes src/hooks/bash-command-compiler.ts | 560 ++++++++++++++-- src/hooks/codex/session-start.ts | 55 +- src/hooks/codex/wiki-worker.ts | 14 +- src/hooks/memory-facts.ts | 57 +- src/hooks/session-start.ts | 61 +- src/hooks/wiki-worker.ts | 14 +- src/shell/grep-core.ts | 156 ++++- src/tools/backfill-locomo-facts.ts | 52 +- src/utils/retrieval-mode.ts | 13 + 29 files changed, 2890 insertions(+), 312 deletions(-) delete mode 100644 scripts/__pycache__/backfill_harrier_embeddings.cpython-312.pyc delete mode 100644 scripts/__pycache__/backfill_harrier_embeddings.cpython-314.pyc diff --git a/.gitignore b/.gitignore index 930a1ba..be1242e 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,10 @@ bench/ CLAUDE.md RESULTS-fast-path-all-commands.md PLAN-fast-path-all-commands.md +__pycache__/ +*.pyc +.pytest_cache/ +.mypy_cache/ +.ruff_cache/ +.coverage +htmlcov/ diff --git a/claude-code/bundle/capture.js b/claude-code/bundle/capture.js index a62bd70..52a4555 100755 --- a/claude-code/bundle/capture.js +++ b/claude-code/bundle/capture.js @@ -844,25 +844,28 @@ Rules: // dist/src/hooks/memory-facts.js import { randomUUID as randomUUID4 } from "node:crypto"; -var MEMORY_FACT_PROMPT_TEMPLATE = `You are extracting durable long-term memory facts from a session summary. +var MEMORY_FACT_PROMPT_TEMPLATE = `You are extracting durable long-term memory facts from raw session transcript rows. SESSION ID: __SESSION_ID__ SOURCE PATH: __SOURCE_PATH__ PROJECT: __PROJECT__ -SUMMARY MARKDOWN: -__SUMMARY_TEXT__ +TRANSCRIPT ROWS: +__TRANSCRIPT_TEXT__ Return ONLY valid JSON with this exact shape: {"facts":[{"subject":"canonical entity","subject_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","subject_aliases":["optional alias"],"predicate":"snake_case_relation","object":"canonical object text","object_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","object_aliases":["optional alias"],"summary":"short factual claim","evidence":"short supporting phrase","confidence":0.0,"valid_at":"optional date/time text","valid_from":"optional date/time text","valid_to":"optional date/time text"}]} Rules: +- The transcript rows are the only source of truth for this extraction. Do not rely on summaries or inferred rewrites. - Extract atomic facts that are useful for later recall. One durable claim per fact. - Prefer canonical names for repeated people, organizations, places, projects, tools, and artifacts. - Use relation-style predicates such as works_on, home_country, relationship_status, prefers, plans, decided_to_pursue, located_in, uses_tool, recommended, supports, owns, read, attends, moved_from, moved_to. -- Facts should preserve temporal history instead of overwriting it. If the summary says something changed, emit the new fact and include timing in valid_at / valid_from / valid_to when the summary supports it. -- Include assistant-confirmed or tool-confirmed actions when they are stated as completed facts in the summary. -- Do not invent facts that are not supported by the summary. +- Facts should preserve temporal history instead of overwriting it. If the transcript says something changed, emit the new fact and include timing in valid_at / valid_from / valid_to when the transcript supports it. +- Include assistant-confirmed or tool-confirmed actions when they are stated as completed facts in the transcript. +- If a speaker explicitly self-identifies or states a status, preserve that exact label instead of broadening it. +- Preserve exact named places, titles, organizations, and relative time phrases when they are the stated fact. +- Do not invent facts that are not supported by the transcript. - Avoid duplicates or near-duplicates. If two facts say the same thing, keep the more specific one. - Return no markdown, no prose, no code fences, only JSON.`; diff --git a/claude-code/bundle/pre-tool-use.js b/claude-code/bundle/pre-tool-use.js index 3891171..a87a584 100755 --- a/claude-code/bundle/pre-tool-use.js +++ b/claude-code/bundle/pre-tool-use.js @@ -37,12 +37,12 @@ function loadConfig() { return null; } } - const env = process.env; - if (!env.HIVEMIND_TOKEN && env.DEEPLAKE_TOKEN) { + const env2 = process.env; + if (!env2.HIVEMIND_TOKEN && env2.DEEPLAKE_TOKEN) { process.stderr.write("[hivemind] DEEPLAKE_* env vars are deprecated; use HIVEMIND_* instead\n"); } - const token = env.HIVEMIND_TOKEN ?? env.DEEPLAKE_TOKEN ?? creds?.token; - const orgId = env.HIVEMIND_ORG_ID ?? env.DEEPLAKE_ORG_ID ?? creds?.orgId; + const token = env2.HIVEMIND_TOKEN ?? env2.DEEPLAKE_TOKEN ?? creds?.token; + const orgId = env2.HIVEMIND_ORG_ID ?? env2.DEEPLAKE_ORG_ID ?? creds?.orgId; if (!token || !orgId) return null; return { @@ -50,16 +50,16 @@ function loadConfig() { orgId, orgName: creds?.orgName ?? orgId, userName: creds?.userName || userInfo().username || "unknown", - workspaceId: env.HIVEMIND_WORKSPACE_ID ?? env.DEEPLAKE_WORKSPACE_ID ?? creds?.workspaceId ?? "default", - apiUrl: env.HIVEMIND_API_URL ?? env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", - tableName: env.HIVEMIND_TABLE ?? env.DEEPLAKE_TABLE ?? "memory", - sessionsTableName: env.HIVEMIND_SESSIONS_TABLE ?? env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", - graphNodesTableName: env.HIVEMIND_GRAPH_NODES_TABLE ?? env.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", - graphEdgesTableName: env.HIVEMIND_GRAPH_EDGES_TABLE ?? env.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", - factsTableName: env.HIVEMIND_FACTS_TABLE ?? env.DEEPLAKE_FACTS_TABLE ?? "memory_facts", - entitiesTableName: env.HIVEMIND_ENTITIES_TABLE ?? env.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", - factEntityLinksTableName: env.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", - memoryPath: env.HIVEMIND_MEMORY_PATH ?? env.DEEPLAKE_MEMORY_PATH ?? join(home, ".deeplake", "memory") + workspaceId: env2.HIVEMIND_WORKSPACE_ID ?? env2.DEEPLAKE_WORKSPACE_ID ?? creds?.workspaceId ?? "default", + apiUrl: env2.HIVEMIND_API_URL ?? env2.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", + tableName: env2.HIVEMIND_TABLE ?? env2.DEEPLAKE_TABLE ?? "memory", + sessionsTableName: env2.HIVEMIND_SESSIONS_TABLE ?? env2.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + graphNodesTableName: env2.HIVEMIND_GRAPH_NODES_TABLE ?? env2.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", + graphEdgesTableName: env2.HIVEMIND_GRAPH_EDGES_TABLE ?? env2.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", + factsTableName: env2.HIVEMIND_FACTS_TABLE ?? env2.DEEPLAKE_FACTS_TABLE ?? "memory_facts", + entitiesTableName: env2.HIVEMIND_ENTITIES_TABLE ?? env2.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", + factEntityLinksTableName: env2.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env2.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", + memoryPath: env2.HIVEMIND_MEMORY_PATH ?? env2.DEEPLAKE_MEMORY_PATH ?? join(home, ".deeplake", "memory") }; } @@ -687,11 +687,144 @@ function isDirectRun(metaUrl) { } } +// dist/src/embeddings/harrier.js +import { AutoModel, AutoTokenizer, LogLevel, env } from "@huggingface/transformers"; +var DEFAULT_MODEL_ID = "onnx-community/harrier-oss-v1-0.6b-ONNX"; +var DEFAULT_DOCUMENT_BATCH_SIZE = 8; +var DEFAULT_MAX_LENGTH = 32768; +function toNumber(value) { + return typeof value === "bigint" ? Number(value) : Number(value ?? 0); +} +function tensorToRows(tensor) { + const [batchSize, width] = tensor.dims; + const rows = []; + for (let batchIndex = 0; batchIndex < batchSize; batchIndex++) { + const offset = batchIndex * width; + const row = []; + for (let hiddenIndex = 0; hiddenIndex < width; hiddenIndex++) { + row.push(Number(tensor.data[offset + hiddenIndex] ?? 0)); + } + rows.push(row); + } + return rows; +} +function l2Normalize(rows) { + return rows.map((row) => { + let sumSquares = 0; + for (const value of row) + sumSquares += value * value; + const norm = Math.sqrt(sumSquares) || 1; + return row.map((value) => value / norm); + }); +} +function lastTokenPool(outputs, attentionMask) { + const [batchSize, sequenceLength, hiddenSize] = outputs.dims; + const rows = []; + const maskData = attentionMask.data; + const hiddenData = outputs.data; + for (let batchIndex = 0; batchIndex < batchSize; batchIndex++) { + let lastTokenIndex = sequenceLength - 1; + for (let tokenIndex = sequenceLength - 1; tokenIndex >= 0; tokenIndex--) { + const maskOffset = batchIndex * sequenceLength + tokenIndex; + if (toNumber(maskData[maskOffset]) > 0) { + lastTokenIndex = tokenIndex; + break; + } + } + const row = []; + const hiddenOffset = (batchIndex * sequenceLength + lastTokenIndex) * hiddenSize; + for (let hiddenIndex = 0; hiddenIndex < hiddenSize; hiddenIndex++) { + row.push(Number(hiddenData[hiddenOffset + hiddenIndex] ?? 0)); + } + rows.push(row); + } + return rows; +} +function formatQuery(task, query) { + return `Instruct: ${task} +Query: ${query}`; +} +var HarrierEmbedder = class { + modelId; + tokenizerPromise = null; + modelPromise = null; + options; + constructor(options = {}) { + this.modelId = options.modelId ?? DEFAULT_MODEL_ID; + this.options = { + ...options, + maxLength: options.maxLength ?? DEFAULT_MAX_LENGTH, + batchSize: options.batchSize ?? DEFAULT_DOCUMENT_BATCH_SIZE + }; + if (options.cacheDir) + env.cacheDir = options.cacheDir; + if (options.localModelPath) + env.localModelPath = options.localModelPath; + env.logLevel = LogLevel.ERROR; + } + async embedDocuments(texts) { + return this.embedInternal(texts); + } + async embedQueries(texts, options = {}) { + const task = options.task ?? "Given a user query, retrieve relevant memory rows and session events"; + return this.embedInternal(texts.map((text) => formatQuery(task, text))); + } + async load() { + if (!this.tokenizerPromise) { + this.tokenizerPromise = AutoTokenizer.from_pretrained(this.modelId, { + local_files_only: this.options.localFilesOnly + }); + } + if (!this.modelPromise) { + this.modelPromise = AutoModel.from_pretrained(this.modelId, { + local_files_only: this.options.localFilesOnly, + device: this.options.device ?? "cpu", + dtype: this.options.dtype + }); + } + const [tokenizer, model] = await Promise.all([this.tokenizerPromise, this.modelPromise]); + return { tokenizer, model }; + } + async embedInternal(texts) { + if (texts.length === 0) + return []; + const { tokenizer, model } = await this.load(); + const rows = []; + for (let start = 0; start < texts.length; start += this.options.batchSize) { + const batch = texts.slice(start, start + this.options.batchSize); + const inputs = tokenizer(batch, { + padding: true, + truncation: true, + max_length: this.options.maxLength + }); + const outputs = await model(inputs); + const sentenceEmbedding = outputs["sentence_embedding"]; + if (sentenceEmbedding && typeof sentenceEmbedding === "object" && sentenceEmbedding !== null) { + rows.push(...l2Normalize(tensorToRows(sentenceEmbedding))); + continue; + } + const lastHiddenState = outputs["last_hidden_state"]; + const attentionMask = inputs["attention_mask"]; + if (!lastHiddenState || typeof lastHiddenState !== "object" || !attentionMask || typeof attentionMask !== "object") { + throw new Error(`Harrier model "${this.modelId}" did not return a usable embedding tensor`); + } + rows.push(...l2Normalize(lastTokenPool(lastHiddenState, attentionMask))); + } + return rows; + } +}; + // dist/src/utils/retrieval-mode.js function isSessionsOnlyMode() { const raw = process.env["HIVEMIND_SESSIONS_ONLY"] ?? process.env["DEEPLAKE_SESSIONS_ONLY"] ?? ""; return /^(1|true|yes|on)$/i.test(raw.trim()); } +function getGrepRetrievalMode() { + const raw = (process.env["HIVEMIND_GREP_RETRIEVAL_MODE"] ?? process.env["DEEPLAKE_GREP_RETRIEVAL_MODE"] ?? "").trim().toLowerCase(); + if (raw === "embedding" || raw === "hybrid") + return raw; + return "classic"; +} function isIndexDisabled() { const raw = process.env["HIVEMIND_DISABLE_INDEX"] ?? process.env["DEEPLAKE_DISABLE_INDEX"] ?? ""; return /^(1|true|yes|on)$/i.test(raw.trim()); @@ -704,9 +837,56 @@ function isPsqlMode() { const raw = process.env["HIVEMIND_PSQL_MODE"] ?? process.env["DEEPLAKE_PSQL_MODE"] ?? ""; return /^(1|true|yes|on)$/i.test(raw.trim()); } +function isFactsSessionsOnlyPsqlMode() { + const raw = process.env["HIVEMIND_PSQL_FACTS_SESSIONS_ONLY"] ?? process.env["DEEPLAKE_PSQL_FACTS_SESSIONS_ONLY"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} // dist/src/shell/grep-core.js var DEFAULT_GREP_CANDIDATE_LIMIT = Number(process.env["HIVEMIND_GREP_LIMIT"] ?? process.env["DEEPLAKE_GREP_LIMIT"] ?? 500); +var DEFAULT_EMBED_RETRIEVAL_MODEL_ID = "onnx-community/harrier-oss-v1-270m-ONNX"; +var DEFAULT_HYBRID_VECTOR_WEIGHT = 0.7; +var DEFAULT_HYBRID_TEXT_WEIGHT = 0.3; +var retrievalEmbedder = null; +function envString(...names) { + for (const name of names) { + const value = process.env[name]?.trim(); + if (value) + return value; + } + return void 0; +} +function envFlag(...names) { + const raw = envString(...names) ?? ""; + return /^(1|true|yes|on)$/i.test(raw); +} +function envNumber(fallback, ...names) { + const raw = envString(...names); + const parsed = Number(raw); + return Number.isFinite(parsed) ? parsed : fallback; +} +function getRetrievalEmbedder() { + if (!retrievalEmbedder) { + retrievalEmbedder = new HarrierEmbedder({ + modelId: envString("HIVEMIND_EMBED_RETRIEVAL_MODEL_ID", "DEEPLAKE_EMBED_RETRIEVAL_MODEL_ID", "HIVEMIND_HARRIER_MODEL_ID", "DEEPLAKE_HARRIER_MODEL_ID") ?? DEFAULT_EMBED_RETRIEVAL_MODEL_ID, + device: envString("HIVEMIND_EMBED_RETRIEVAL_DEVICE", "DEEPLAKE_EMBED_RETRIEVAL_DEVICE") ?? "cpu", + dtype: envString("HIVEMIND_EMBED_RETRIEVAL_DTYPE", "DEEPLAKE_EMBED_RETRIEVAL_DTYPE"), + cacheDir: envString("HIVEMIND_EMBED_RETRIEVAL_CACHE_DIR", "DEEPLAKE_EMBED_RETRIEVAL_CACHE_DIR"), + localModelPath: envString("HIVEMIND_EMBED_RETRIEVAL_LOCAL_MODEL_PATH", "DEEPLAKE_EMBED_RETRIEVAL_LOCAL_MODEL_PATH"), + localFilesOnly: envFlag("HIVEMIND_EMBED_RETRIEVAL_LOCAL_FILES_ONLY", "DEEPLAKE_EMBED_RETRIEVAL_LOCAL_FILES_ONLY") + }); + } + return retrievalEmbedder; +} +function sqlFloat4Array(values) { + if (values.length === 0) + throw new Error("Query embedding is empty"); + return `ARRAY[${values.map((value) => { + if (!Number.isFinite(value)) + throw new Error("Query embedding contains non-finite values"); + return Math.fround(value).toString(); + }).join(", ")}]::float4[]`; +} function escapeRegexLiteral(value) { return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); } @@ -921,7 +1101,7 @@ function buildPathCondition(targetPath) { return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%')`; } async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { - const { pathFilter, contentScanOnly, likeOp, escapedPattern, regexPattern, prefilterPattern, prefilterPatterns, bm25QueryText } = opts; + const { pathFilter, contentScanOnly, likeOp, escapedPattern, regexPattern, prefilterPattern, prefilterPatterns, queryText, bm25QueryText } = opts; const limit = opts.limit ?? DEFAULT_GREP_CANDIDATE_LIMIT; const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : [escapedPattern]; const ignoreCase = likeOp === "ILIKE"; @@ -933,7 +1113,11 @@ async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { const fallbackSessFilter = likeSessFilter; const hasSqlRegexFilter = Boolean(regexMemFilter || regexSessFilter); const sessionsOnly = isSessionsOnlyMode(); - const useSummaryBm25 = !sessionsOnly && !isSummaryBm25Disabled() && Boolean(bm25QueryText); + const retrievalMode = getGrepRetrievalMode(); + const semanticQueryText = (queryText ?? bm25QueryText ?? "").trim(); + const useEmbeddingRetrieval = retrievalMode === "embedding" && semanticQueryText.length > 0; + const useHybridRetrieval = retrievalMode === "hybrid" && semanticQueryText.length > 0; + const useSummaryBm25 = retrievalMode === "classic" && !sessionsOnly && !isSummaryBm25Disabled() && Boolean(bm25QueryText); const shouldUseFallbackCapablePrimary = useSummaryBm25 || hasSqlRegexFilter; const ensureSummaryBm25Index = api.ensureSummaryBm25Index; if (useSummaryBm25 && typeof ensureSummaryBm25Index === "function") { @@ -945,6 +1129,25 @@ async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { const sessQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; return sessionsOnly ? `SELECT path, content, source_order, creation_date FROM (${sessQuery}) AS combined ORDER BY path, source_order, creation_date` : `SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`; }; + if (useEmbeddingRetrieval || useHybridRetrieval) { + const embedder = getRetrievalEmbedder(); + const [queryEmbedding] = await embedder.embedQueries([semanticQueryText]); + if (!queryEmbedding) + throw new Error("Failed to build query embedding"); + const queryVectorSql = sqlFloat4Array(queryEmbedding); + const vectorWeight = envNumber(DEFAULT_HYBRID_VECTOR_WEIGHT, "HIVEMIND_HYBRID_VECTOR_WEIGHT", "DEEPLAKE_HYBRID_VECTOR_WEIGHT"); + const textWeight = envNumber(DEFAULT_HYBRID_TEXT_WEIGHT, "HIVEMIND_HYBRID_TEXT_WEIGHT", "DEEPLAKE_HYBRID_TEXT_WEIGHT"); + const buildSemanticCombinedQuery = () => { + const memQuery = useHybridRetrieval ? buildHybridSimilarityQuery(memoryTable, pathFilter, "summary::text", 0, "''", queryVectorSql, semanticQueryText, vectorWeight, textWeight, limit) : buildEmbeddingSimilarityQuery(memoryTable, pathFilter, "summary::text", 0, "''", queryVectorSql, limit); + const sessQuery = useHybridRetrieval ? buildHybridSimilarityQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", queryVectorSql, semanticQueryText, vectorWeight, textWeight, limit) : buildEmbeddingSimilarityQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", queryVectorSql, limit); + return sessionsOnly ? `SELECT path, content, source_order, creation_date FROM (${sessQuery}) AS combined ORDER BY path, source_order, creation_date` : `SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`; + }; + const rows2 = await api.query(buildSemanticCombinedQuery()); + return rows2.map((row) => ({ + path: String(row["path"]), + content: String(row["content"] ?? "") + })); + } const primaryMemFilter = useSummaryBm25 ? "" : `${likeMemFilter}${regexMemFilter}`; const primaryQuery = buildCombinedQuery(primaryMemFilter, primarySessFilter, useSummaryBm25); const fallbackQuery = buildCombinedQuery(likeMemFilter, fallbackSessFilter, false); @@ -1039,6 +1242,7 @@ function buildGrepSearchOptions(params, targetPath) { const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(normalizedPattern) : null; const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(normalizedPattern) : null; const bm25QueryText = buildSummaryBm25QueryText(normalizedPattern, params.fixedString, literalPrefilter, alternationPrefilters); + const queryText = (bm25QueryText ?? normalizedPattern.trim()) || void 0; const regexBase = params.fixedString ? escapeRegexLiteral(normalizedPattern) : normalizedPattern; const sqlRegexPattern = params.wordMatch ? `\\b(?:${regexBase})\\b` : hasRegexMeta ? regexBase : void 0; return { @@ -1049,6 +1253,7 @@ function buildGrepSearchOptions(params, targetPath) { regexPattern: sqlRegexPattern, prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : void 0, prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)), + queryText, bm25QueryText: bm25QueryText ?? void 0, limit: DEFAULT_GREP_CANDIDATE_LIMIT }; @@ -1072,6 +1277,12 @@ function buildRegexFilter(column, pattern, ignoreCase) { function buildSummaryBm25Query(memoryTable, pathFilter, queryText, limit) { return `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter} ORDER BY (summary <#> '${sqlStr(queryText)}') DESC LIMIT ${limit}`; } +function buildEmbeddingSimilarityQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryVectorSql, limit) { + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date FROM "${tableName}" WHERE 1=1${pathFilter} AND embedding IS NOT NULL ORDER BY (embedding <#> ${queryVectorSql}) DESC LIMIT ${limit}`; +} +function buildHybridSimilarityQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryVectorSql, queryText, vectorWeight, textWeight, limit) { + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date FROM "${tableName}" WHERE 1=1${pathFilter} AND embedding IS NOT NULL ORDER BY (((embedding, ${contentExpr})::deeplake_hybrid_record) <#> deeplake_hybrid_record(${queryVectorSql}, '${sqlStr(queryText)}', ${vectorWeight}, ${textWeight})) DESC LIMIT ${limit}`; +} function toSqlRegexPattern(pattern, ignoreCase) { if (!pattern) return null; @@ -1844,6 +2055,49 @@ function dedupeRowsByPath(rows) { } // dist/src/hooks/bash-command-compiler.js +var DEFAULT_EMBED_RETRIEVAL_MODEL_ID2 = "onnx-community/harrier-oss-v1-270m-ONNX"; +var DEFAULT_HYBRID_VECTOR_WEIGHT2 = 0.7; +var DEFAULT_HYBRID_TEXT_WEIGHT2 = 0.3; +var summaryRetrievalEmbedder = null; +function envString2(...names) { + for (const name of names) { + const value = process.env[name]?.trim(); + if (value) + return value; + } + return void 0; +} +function envFlag2(...names) { + const raw = envString2(...names) ?? ""; + return /^(1|true|yes|on)$/i.test(raw); +} +function envNumber2(fallback, ...names) { + const raw = envString2(...names); + const parsed = Number(raw); + return Number.isFinite(parsed) ? parsed : fallback; +} +function getSummaryRetrievalEmbedder() { + if (!summaryRetrievalEmbedder) { + summaryRetrievalEmbedder = new HarrierEmbedder({ + modelId: envString2("HIVEMIND_EMBED_RETRIEVAL_MODEL_ID", "DEEPLAKE_EMBED_RETRIEVAL_MODEL_ID", "HIVEMIND_HARRIER_MODEL_ID", "DEEPLAKE_HARRIER_MODEL_ID") ?? DEFAULT_EMBED_RETRIEVAL_MODEL_ID2, + device: envString2("HIVEMIND_EMBED_RETRIEVAL_DEVICE", "DEEPLAKE_EMBED_RETRIEVAL_DEVICE") ?? "cpu", + dtype: envString2("HIVEMIND_EMBED_RETRIEVAL_DTYPE", "DEEPLAKE_EMBED_RETRIEVAL_DTYPE"), + cacheDir: envString2("HIVEMIND_EMBED_RETRIEVAL_CACHE_DIR", "DEEPLAKE_EMBED_RETRIEVAL_CACHE_DIR"), + localModelPath: envString2("HIVEMIND_EMBED_RETRIEVAL_LOCAL_MODEL_PATH", "DEEPLAKE_EMBED_RETRIEVAL_LOCAL_MODEL_PATH"), + localFilesOnly: envFlag2("HIVEMIND_EMBED_RETRIEVAL_LOCAL_FILES_ONLY", "DEEPLAKE_EMBED_RETRIEVAL_LOCAL_FILES_ONLY") + }); + } + return summaryRetrievalEmbedder; +} +function sqlFloat4Array2(values) { + if (values.length === 0) + throw new Error("Query embedding is empty"); + return `ARRAY[${values.map((value) => { + if (!Number.isFinite(value)) + throw new Error("Query embedding contains non-finite values"); + return Math.fround(value).toString(); + }).join(", ")}]::float4[]`; +} function isQuoted(ch) { return ch === "'" || ch === '"'; } @@ -2082,22 +2336,64 @@ function extractPsqlQueryFromCommand(cmd) { function normalizeSqlRef(ref) { return ref.replace(/\s+/g, "").replace(/"/g, "").toLowerCase(); } -var INTERCEPTED_SQL_REFS = /* @__PURE__ */ new Set([ - "memory", - "sessions", - "graph_nodes", - "graph_edges", - "memory_facts", - "memory_entities", - "fact_entity_links", - "hivemind.memory", - "hivemind.sessions", - "hivemind.graph_nodes", - "hivemind.graph_edges", - "hivemind.memory_facts", - "hivemind.memory_entities", - "hivemind.fact_entity_links" -]); +function deriveSiblingTableName(tableName, expectedBase, targetBase) { + if (tableName === expectedBase) + return null; + if (!tableName.startsWith(expectedBase)) + return null; + return `${targetBase}${tableName.slice(expectedBase.length)}`; +} +function resolveInterceptedTableNames(memoryTable, sessionsTable) { + const memoryDerived = { + graphNodesTable: deriveSiblingTableName(memoryTable, "memory", "graph_nodes"), + graphEdgesTable: deriveSiblingTableName(memoryTable, "memory", "graph_edges"), + factsTable: deriveSiblingTableName(memoryTable, "memory", "memory_facts"), + entitiesTable: deriveSiblingTableName(memoryTable, "memory", "memory_entities"), + factEntityLinksTable: deriveSiblingTableName(memoryTable, "memory", "fact_entity_links") + }; + const sessionsDerived = { + factsTable: deriveSiblingTableName(sessionsTable, "sessions", "memory_facts"), + entitiesTable: deriveSiblingTableName(sessionsTable, "sessions", "memory_entities"), + factEntityLinksTable: deriveSiblingTableName(sessionsTable, "sessions", "fact_entity_links") + }; + return { + graphNodesTable: process.env["HIVEMIND_GRAPH_NODES_TABLE"] ?? process.env["DEEPLAKE_GRAPH_NODES_TABLE"] ?? memoryDerived.graphNodesTable ?? "graph_nodes", + graphEdgesTable: process.env["HIVEMIND_GRAPH_EDGES_TABLE"] ?? process.env["DEEPLAKE_GRAPH_EDGES_TABLE"] ?? memoryDerived.graphEdgesTable ?? "graph_edges", + factsTable: process.env["HIVEMIND_FACTS_TABLE"] ?? process.env["DEEPLAKE_FACTS_TABLE"] ?? memoryDerived.factsTable ?? sessionsDerived.factsTable ?? "memory_facts", + entitiesTable: process.env["HIVEMIND_ENTITIES_TABLE"] ?? process.env["DEEPLAKE_ENTITIES_TABLE"] ?? memoryDerived.entitiesTable ?? sessionsDerived.entitiesTable ?? "memory_entities", + factEntityLinksTable: process.env["HIVEMIND_FACT_ENTITY_LINKS_TABLE"] ?? process.env["DEEPLAKE_FACT_ENTITY_LINKS_TABLE"] ?? memoryDerived.factEntityLinksTable ?? sessionsDerived.factEntityLinksTable ?? "fact_entity_links" + }; +} +function getInterceptedSqlRefs() { + if (isFactsSessionsOnlyPsqlMode()) { + return /* @__PURE__ */ new Set([ + "sessions", + "memory_facts", + "memory_entities", + "fact_entity_links", + "hivemind.sessions", + "hivemind.memory_facts", + "hivemind.memory_entities", + "hivemind.fact_entity_links" + ]); + } + return /* @__PURE__ */ new Set([ + "memory", + "sessions", + "graph_nodes", + "graph_edges", + "memory_facts", + "memory_entities", + "fact_entity_links", + "hivemind.memory", + "hivemind.sessions", + "hivemind.graph_nodes", + "hivemind.graph_edges", + "hivemind.memory_facts", + "hivemind.memory_entities", + "hivemind.fact_entity_links" + ]); +} function extractSqlTableRefs(query) { const refs = []; const regex = /\b(?:from|join)\s+((?:"[^"]+"|[a-zA-Z_][a-zA-Z0-9_]*)(?:\s*\.\s*(?:"[^"]+"|[a-zA-Z_][a-zA-Z0-9_]*))?)/gi; @@ -2108,11 +2404,13 @@ function extractSqlTableRefs(query) { return refs; } function queryReferencesInterceptedTables(query) { - return extractSqlTableRefs(query).some((ref) => INTERCEPTED_SQL_REFS.has(ref)); + const interceptedRefs = getInterceptedSqlRefs(); + return extractSqlTableRefs(query).some((ref) => interceptedRefs.has(ref)); } function queryUsesOnlyInterceptedTables(query) { const refs = extractSqlTableRefs(query); - return refs.length > 0 && refs.every((ref) => INTERCEPTED_SQL_REFS.has(ref)); + const interceptedRefs = getInterceptedSqlRefs(); + return refs.length > 0 && refs.every((ref) => interceptedRefs.has(ref)); } function parsePsqlSegment(pipeline, tokens) { if (tokens[0] !== "psql" || !isPsqlMode()) @@ -2158,13 +2456,19 @@ function parsePsqlSegment(pipeline, tokens) { } return { kind: "psql", query, lineLimit, tuplesOnly, fieldSeparator }; } -function normalizePsqlQuery(query, memoryTable, sessionsTable, graphNodesTable = process.env["HIVEMIND_GRAPH_NODES_TABLE"] ?? "graph_nodes", graphEdgesTable = process.env["HIVEMIND_GRAPH_EDGES_TABLE"] ?? "graph_edges", factsTable = process.env["HIVEMIND_FACTS_TABLE"] ?? "memory_facts", entitiesTable = process.env["HIVEMIND_ENTITIES_TABLE"] ?? "memory_entities", factEntityLinksTable = process.env["HIVEMIND_FACT_ENTITY_LINKS_TABLE"] ?? "fact_entity_links") { +function normalizePsqlQuery(query, memoryTable, sessionsTable, graphNodesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphNodesTable, graphEdgesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphEdgesTable, factsTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factsTable, entitiesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).entitiesTable, factEntityLinksTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factEntityLinksTable) { let sql = query.trim().replace(/;+\s*$/, ""); - sql = sql.replace(/\bFROM\s+"?memory"?\b/gi, `FROM "${memoryTable}"`).replace(/\bJOIN\s+"?memory"?\b/gi, `JOIN "${memoryTable}"`).replace(/\bFROM\s+"?sessions"?\b/gi, `FROM "${sessionsTable}"`).replace(/\bJOIN\s+"?sessions"?\b/gi, `JOIN "${sessionsTable}"`).replace(/\bFROM\s+"?graph_nodes"?\b/gi, `FROM "${graphNodesTable}"`).replace(/\bJOIN\s+"?graph_nodes"?\b/gi, `JOIN "${graphNodesTable}"`).replace(/\bFROM\s+"?graph_edges"?\b/gi, `FROM "${graphEdgesTable}"`).replace(/\bJOIN\s+"?graph_edges"?\b/gi, `JOIN "${graphEdgesTable}"`).replace(/\bFROM\s+"?memory_facts"?\b/gi, `FROM "${factsTable}"`).replace(/\bJOIN\s+"?memory_facts"?\b/gi, `JOIN "${factsTable}"`).replace(/\bFROM\s+"?memory_entities"?\b/gi, `FROM "${entitiesTable}"`).replace(/\bJOIN\s+"?memory_entities"?\b/gi, `JOIN "${entitiesTable}"`).replace(/\bFROM\s+"?fact_entity_links"?\b/gi, `FROM "${factEntityLinksTable}"`).replace(/\bJOIN\s+"?fact_entity_links"?\b/gi, `JOIN "${factEntityLinksTable}"`).replace(/\bFROM\s+"?hivemind"?\."?memory"?\b/gi, `FROM "${memoryTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?memory"?\b/gi, `JOIN "${memoryTable}"`).replace(/\bFROM\s+"?hivemind"?\."?sessions"?\b/gi, `FROM "${sessionsTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?sessions"?\b/gi, `JOIN "${sessionsTable}"`).replace(/\bFROM\s+"?hivemind"?\."?graph_nodes"?\b/gi, `FROM "${graphNodesTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?graph_nodes"?\b/gi, `JOIN "${graphNodesTable}"`).replace(/\bFROM\s+"?hivemind"?\."?graph_edges"?\b/gi, `FROM "${graphEdgesTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?graph_edges"?\b/gi, `JOIN "${graphEdgesTable}"`).replace(/\bFROM\s+"?hivemind"?\."?memory_facts"?\b/gi, `FROM "${factsTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?memory_facts"?\b/gi, `JOIN "${factsTable}"`).replace(/\bFROM\s+"?hivemind"?\."?memory_entities"?\b/gi, `FROM "${entitiesTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?memory_entities"?\b/gi, `JOIN "${entitiesTable}"`).replace(/\bFROM\s+"?hivemind"?\."?fact_entity_links"?\b/gi, `FROM "${factEntityLinksTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?fact_entity_links"?\b/gi, `JOIN "${factEntityLinksTable}"`); + sql = sql.replace(/\bFROM\s+"?sessions"?\b/gi, `FROM "${sessionsTable}"`).replace(/\bJOIN\s+"?sessions"?\b/gi, `JOIN "${sessionsTable}"`).replace(/\bFROM\s+"?memory_facts"?\b/gi, `FROM "${factsTable}"`).replace(/\bJOIN\s+"?memory_facts"?\b/gi, `JOIN "${factsTable}"`).replace(/\bFROM\s+"?memory_entities"?\b/gi, `FROM "${entitiesTable}"`).replace(/\bJOIN\s+"?memory_entities"?\b/gi, `JOIN "${entitiesTable}"`).replace(/\bFROM\s+"?fact_entity_links"?\b/gi, `FROM "${factEntityLinksTable}"`).replace(/\bJOIN\s+"?fact_entity_links"?\b/gi, `JOIN "${factEntityLinksTable}"`).replace(/\bFROM\s+"?hivemind"?\."?sessions"?\b/gi, `FROM "${sessionsTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?sessions"?\b/gi, `JOIN "${sessionsTable}"`).replace(/\bFROM\s+"?hivemind"?\."?memory_facts"?\b/gi, `FROM "${factsTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?memory_facts"?\b/gi, `JOIN "${factsTable}"`).replace(/\bFROM\s+"?hivemind"?\."?memory_entities"?\b/gi, `FROM "${entitiesTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?memory_entities"?\b/gi, `JOIN "${entitiesTable}"`).replace(/\bFROM\s+"?hivemind"?\."?fact_entity_links"?\b/gi, `FROM "${factEntityLinksTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?fact_entity_links"?\b/gi, `JOIN "${factEntityLinksTable}"`); + if (!isFactsSessionsOnlyPsqlMode()) { + sql = sql.replace(/\bFROM\s+"?memory"?\b/gi, `FROM "${memoryTable}"`).replace(/\bJOIN\s+"?memory"?\b/gi, `JOIN "${memoryTable}"`).replace(/\bFROM\s+"?graph_nodes"?\b/gi, `FROM "${graphNodesTable}"`).replace(/\bJOIN\s+"?graph_nodes"?\b/gi, `JOIN "${graphNodesTable}"`).replace(/\bFROM\s+"?graph_edges"?\b/gi, `FROM "${graphEdgesTable}"`).replace(/\bJOIN\s+"?graph_edges"?\b/gi, `JOIN "${graphEdgesTable}"`).replace(/\bFROM\s+"?hivemind"?\."?memory"?\b/gi, `FROM "${memoryTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?memory"?\b/gi, `JOIN "${memoryTable}"`).replace(/\bFROM\s+"?hivemind"?\."?graph_nodes"?\b/gi, `FROM "${graphNodesTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?graph_nodes"?\b/gi, `JOIN "${graphNodesTable}"`).replace(/\bFROM\s+"?hivemind"?\."?graph_edges"?\b/gi, `FROM "${graphEdgesTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?graph_edges"?\b/gi, `JOIN "${graphEdgesTable}"`); + } return sql; } -function validatePsqlQuery(query, memoryTable, sessionsTable, graphNodesTable = process.env["HIVEMIND_GRAPH_NODES_TABLE"] ?? "graph_nodes", graphEdgesTable = process.env["HIVEMIND_GRAPH_EDGES_TABLE"] ?? "graph_edges", factsTable = process.env["HIVEMIND_FACTS_TABLE"] ?? "memory_facts", entitiesTable = process.env["HIVEMIND_ENTITIES_TABLE"] ?? "memory_entities", factEntityLinksTable = process.env["HIVEMIND_FACT_ENTITY_LINKS_TABLE"] ?? "fact_entity_links") { +function validatePsqlQuery(query, memoryTable, sessionsTable, graphNodesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphNodesTable, graphEdgesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphEdgesTable, factsTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factsTable, entitiesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).entitiesTable, factEntityLinksTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factEntityLinksTable) { if (!queryUsesOnlyInterceptedTables(query)) { + if (isFactsSessionsOnlyPsqlMode()) { + throw new Error("psql queries must reference only sessions, memory_facts, memory_entities, fact_entity_links, or their hivemind.* aliases"); + } throw new Error("psql queries must reference only memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, fact_entity_links, or their hivemind.* aliases"); } const sql = normalizePsqlQuery(query, memoryTable, sessionsTable, graphNodesTable, graphEdgesTable, factsTable, entitiesTable, factEntityLinksTable); @@ -2173,14 +2477,16 @@ function validatePsqlQuery(query, memoryTable, sessionsTable, graphNodesTable = throw new Error("psql mode only supports SELECT queries"); } const allowedTables = /* @__PURE__ */ new Set([ - memoryTable, sessionsTable, - graphNodesTable, - graphEdgesTable, factsTable, entitiesTable, factEntityLinksTable ]); + if (!isFactsSessionsOnlyPsqlMode()) { + allowedTables.add(memoryTable); + allowedTables.add(graphNodesTable); + allowedTables.add(graphEdgesTable); + } const tableMatches = [...compact.matchAll(/\b(?:from|join)\s+"?([a-zA-Z_][a-zA-Z0-9_]*)"?/gi)]; if (tableMatches.length === 0) { throw new Error("psql query must reference an intercepted hivemind memory table"); @@ -2270,6 +2576,140 @@ async function fetchGraphCandidates(api, graphNodesTable, graphEdgesTable, terms } return expanded; } +function splitDelimitedField(value) { + if (typeof value !== "string") + return []; + return value.split(",").map((item) => item.trim()).filter(Boolean); +} +function extractSessionIdFromPath(value) { + return value.match(/(conv_\d+_session_\d+)/)?.[1] ?? ""; +} +function extractSummarySourcePath(summary) { + return summary.match(/^- \*\*Source\*\*: (.+)$/m)?.[1]?.trim() ?? ""; +} +function addHybridCandidate(map, candidate) { + const sessionId = candidate.sessionId?.trim() ?? ""; + const sourcePath = candidate.sourcePath?.trim() ?? ""; + if (!sessionId && !sourcePath) + return; + const key = `${sessionId}@@${sourcePath}`; + const existing = map.get(key); + if (existing) { + existing.score += candidate.score; + existing.signals.add(candidate.signal); + return; + } + map.set(key, { + sessionId, + sourcePath, + score: candidate.score, + signals: /* @__PURE__ */ new Set([candidate.signal]) + }); +} +async function fetchEntityResolution(api, entitiesTable, terms) { + const filteredTerms = [...new Set(terms.map((term) => term.trim()).filter(Boolean))].slice(0, 4); + if (filteredTerms.length === 0) + return { entityIds: [], candidates: [] }; + const entityTerms = chooseEntityTerms(filteredTerms); + if (entityTerms.length === 0) + return { entityIds: [], candidates: [] }; + const phrase = sqlStr(filteredTerms.join(" ")); + const where = entityTerms.map((term) => `(canonical_name ILIKE '%${sqlLike(term)}%' OR aliases ILIKE '%${sqlLike(term)}%')`).join(" OR "); + const sql = `SELECT entity_id, source_session_ids, source_paths, search_text, search_text <#> '${phrase}' AS score FROM "${entitiesTable}" WHERE ${where} ORDER BY score ASC LIMIT 8`; + const rows = await api.query(sql); + const entityIds = []; + const candidateMap = /* @__PURE__ */ new Map(); + for (const row of rows) { + const entityId = typeof row["entity_id"] === "string" ? row["entity_id"] : ""; + if (entityId && !entityIds.includes(entityId)) + entityIds.push(entityId); + const sessionIds = splitDelimitedField(row["source_session_ids"]); + const sourcePaths = splitDelimitedField(row["source_paths"]); + const maxLen = Math.max(sessionIds.length, sourcePaths.length); + for (let i = 0; i < maxLen; i++) { + const sourcePath = sourcePaths[i] || (sessionIds[i] ? `/sessions/${sessionIds[i]}.json` : ""); + const sessionId = sessionIds[i] || extractSessionIdFromPath(sourcePath); + addHybridCandidate(candidateMap, { + sessionId, + sourcePath, + score: 1.2, + signal: "entity" + }); + } + } + return { entityIds, candidates: [...candidateMap.values()] }; +} +async function fetchFactCandidates(api, factsTable, terms, entityIds) { + const filteredTerms = [...new Set(terms.map((term) => term.trim()).filter(Boolean))].slice(0, 4); + if (filteredTerms.length === 0 && entityIds.length === 0) + return { entityIds: [], candidates: [] }; + const phrase = sqlStr(filteredTerms.join(" ")); + const entityTerms = chooseEntityTerms(filteredTerms); + const topicTerms = filteredTerms.filter((term) => !entityTerms.includes(term)); + const topicClauses = (topicTerms.length > 0 ? topicTerms : filteredTerms).map((term) => `(predicate ILIKE '%${sqlLike(term)}%' OR object_name ILIKE '%${sqlLike(term)}%' OR summary ILIKE '%${sqlLike(term)}%' OR search_text ILIKE '%${sqlLike(term)}%')`); + const entityFilter = entityIds.length > 0 ? `(subject_entity_id IN (${entityIds.map((id) => `'${sqlStr(id)}'`).join(", ")}) OR object_entity_id IN (${entityIds.map((id) => `'${sqlStr(id)}'`).join(", ")}))` : ""; + const whereParts = [ + entityFilter, + topicClauses.length > 0 ? `(${topicClauses.join(" OR ")})` : "" + ].filter(Boolean); + if (whereParts.length === 0) + return { entityIds: [], candidates: [] }; + const sql = `SELECT source_session_id, source_path, subject_entity_id, object_entity_id, search_text <#> '${phrase}' AS score FROM "${factsTable}" WHERE ${whereParts.join(" AND ")} ORDER BY score ASC LIMIT 16`; + const rows = await api.query(sql); + const relatedEntityIds = []; + const candidateMap = /* @__PURE__ */ new Map(); + for (const row of rows) { + for (const key of ["subject_entity_id", "object_entity_id"]) { + const value = typeof row[key] === "string" ? row[key] : ""; + if (value && !relatedEntityIds.includes(value)) + relatedEntityIds.push(value); + } + const sourcePath = typeof row["source_path"] === "string" ? row["source_path"] : ""; + const sessionId = typeof row["source_session_id"] === "string" ? row["source_session_id"] : extractSessionIdFromPath(sourcePath); + addHybridCandidate(candidateMap, { + sessionId, + sourcePath, + score: 2.6, + signal: "fact" + }); + } + return { entityIds: relatedEntityIds, candidates: [...candidateMap.values()] }; +} +async function fetchSummaryCandidates(api, memoryTable, terms) { + const filteredTerms = [...new Set(terms.map((term) => term.trim()).filter(Boolean))].slice(0, 4); + if (filteredTerms.length === 0) + return []; + const retrievalMode = getGrepRetrievalMode(); + const phrase = filteredTerms.join(" "); + const clauses = filteredTerms.map((term) => `summary ILIKE '%${sqlLike(term)}%'`); + let sql; + if (retrievalMode === "embedding" || retrievalMode === "hybrid") { + const embedder = getSummaryRetrievalEmbedder(); + const [queryEmbedding] = await embedder.embedQueries([phrase]); + if (!queryEmbedding) + return []; + const queryVectorSql = sqlFloat4Array2(queryEmbedding); + sql = retrievalMode === "hybrid" ? `SELECT path, summary, ((embedding, summary)::deeplake_hybrid_record <#> deeplake_hybrid_record(${queryVectorSql}, '${sqlStr(phrase)}', ${envNumber2(DEFAULT_HYBRID_VECTOR_WEIGHT2, "HIVEMIND_HYBRID_VECTOR_WEIGHT", "DEEPLAKE_HYBRID_VECTOR_WEIGHT")}, ${envNumber2(DEFAULT_HYBRID_TEXT_WEIGHT2, "HIVEMIND_HYBRID_TEXT_WEIGHT", "DEEPLAKE_HYBRID_TEXT_WEIGHT")})) AS score FROM "${memoryTable}" WHERE embedding IS NOT NULL ORDER BY score DESC LIMIT 8` : `SELECT path, summary, (embedding <#> ${queryVectorSql}) AS score FROM "${memoryTable}" WHERE embedding IS NOT NULL ORDER BY score DESC LIMIT 8`; + } else { + const phraseSql = sqlStr(phrase); + sql = `SELECT path, summary, summary <#> '${phraseSql}' AS score FROM "${memoryTable}" WHERE ${clauses.join(" OR ")} ORDER BY score ASC LIMIT 8`; + } + const rows = await api.query(sql); + const candidateMap = /* @__PURE__ */ new Map(); + for (const row of rows) { + const path = typeof row["path"] === "string" ? row["path"] : ""; + const summary = typeof row["summary"] === "string" ? row["summary"] : ""; + const sourcePath = extractSummarySourcePath(summary) || (extractSessionIdFromPath(path) ? `/sessions/${extractSessionIdFromPath(path)}.json` : ""); + const sessionId = extractSessionIdFromPath(path) || extractSessionIdFromPath(sourcePath); + addHybridCandidate(candidateMap, { + sessionId, + sourcePath, + score: 1.6, + signal: "summary" + }); + } + return [...candidateMap.values()]; +} function prependCtes(sql, ctes) { if (ctes.length === 0) return sql; @@ -2278,32 +2718,70 @@ function prependCtes(sql, ctes) { } return `WITH ${ctes.join(", ")} ${sql}`; } -function rewriteQueryWithRestrictedTables(sql, memoryTable, sessionsTable, restrictedMemoryAlias, restrictedSessionsAlias) { +function rewriteQueryWithRestrictedTables(sql, aliases) { let rewritten = sql; - if (restrictedMemoryAlias) { - const memoryPattern = escapeRegex2(memoryTable); - rewritten = rewritten.replace(new RegExp(`\\bFROM\\s+"?${memoryPattern}"?`, "gi"), `FROM "${restrictedMemoryAlias}"`).replace(new RegExp(`\\bJOIN\\s+"?${memoryPattern}"?`, "gi"), `JOIN "${restrictedMemoryAlias}"`); + if (aliases.restrictedMemoryAlias) { + const memoryPattern = escapeRegex2(aliases.memoryTable); + rewritten = rewritten.replace(new RegExp(`\\bFROM\\s+"?${memoryPattern}"?`, "gi"), `FROM "${aliases.restrictedMemoryAlias}"`).replace(new RegExp(`\\bJOIN\\s+"?${memoryPattern}"?`, "gi"), `JOIN "${aliases.restrictedMemoryAlias}"`); + } + if (aliases.restrictedSessionsAlias) { + const sessionsPattern = escapeRegex2(aliases.sessionsTable); + rewritten = rewritten.replace(new RegExp(`\\bFROM\\s+"?${sessionsPattern}"?`, "gi"), `FROM "${aliases.restrictedSessionsAlias}"`).replace(new RegExp(`\\bJOIN\\s+"?${sessionsPattern}"?`, "gi"), `JOIN "${aliases.restrictedSessionsAlias}"`); + } + if (aliases.restrictedFactsAlias) { + const factsPattern = escapeRegex2(aliases.factsTable); + rewritten = rewritten.replace(new RegExp(`\\bFROM\\s+"?${factsPattern}"?`, "gi"), `FROM "${aliases.restrictedFactsAlias}"`).replace(new RegExp(`\\bJOIN\\s+"?${factsPattern}"?`, "gi"), `JOIN "${aliases.restrictedFactsAlias}"`); } - if (restrictedSessionsAlias) { - const sessionsPattern = escapeRegex2(sessionsTable); - rewritten = rewritten.replace(new RegExp(`\\bFROM\\s+"?${sessionsPattern}"?`, "gi"), `FROM "${restrictedSessionsAlias}"`).replace(new RegExp(`\\bJOIN\\s+"?${sessionsPattern}"?`, "gi"), `JOIN "${restrictedSessionsAlias}"`); + if (aliases.restrictedEntitiesAlias) { + const entitiesPattern = escapeRegex2(aliases.entitiesTable); + rewritten = rewritten.replace(new RegExp(`\\bFROM\\s+"?${entitiesPattern}"?`, "gi"), `FROM "${aliases.restrictedEntitiesAlias}"`).replace(new RegExp(`\\bJOIN\\s+"?${entitiesPattern}"?`, "gi"), `JOIN "${aliases.restrictedEntitiesAlias}"`); + } + if (aliases.restrictedLinksAlias) { + const linksPattern = escapeRegex2(aliases.factEntityLinksTable); + rewritten = rewritten.replace(new RegExp(`\\bFROM\\s+"?${linksPattern}"?`, "gi"), `FROM "${aliases.restrictedLinksAlias}"`).replace(new RegExp(`\\bJOIN\\s+"?${linksPattern}"?`, "gi"), `JOIN "${aliases.restrictedLinksAlias}"`); } return rewritten; } -async function applyGraphRestrictionsToPsqlQuery(api, sql, memoryTable, sessionsTable, graphNodesTable, graphEdgesTable) { +async function applyGraphRestrictionsToPsqlQuery(api, sql, memoryTable, sessionsTable, graphNodesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphNodesTable, graphEdgesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphEdgesTable, factsTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factsTable, entitiesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).entitiesTable, factEntityLinksTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factEntityLinksTable) { + if (isFactsSessionsOnlyPsqlMode()) { + return sql; + } if (extractSqlTableRefs(sql).some((ref) => ref === normalizeSqlRef(graphNodesTable) || ref === normalizeSqlRef(graphEdgesTable))) { return sql; } const refs = extractSqlTableRefs(sql); const touchesMemory2 = refs.some((ref) => ref === normalizeSqlRef(memoryTable)); const touchesSessions = refs.some((ref) => ref === normalizeSqlRef(sessionsTable)); - if (!touchesMemory2 && !touchesSessions) + const touchesFacts = refs.some((ref) => ref === normalizeSqlRef(factsTable)); + const touchesEntities = refs.some((ref) => ref === normalizeSqlRef(entitiesTable)); + const touchesLinks = refs.some((ref) => ref === normalizeSqlRef(factEntityLinksTable)); + if (!touchesMemory2 && !touchesSessions && !touchesFacts && !touchesEntities && !touchesLinks) return sql; const terms = extractSqlSearchTerms(sql); if (terms.length === 0) return sql; - const candidates = await fetchGraphCandidates(api, graphNodesTable, graphEdgesTable, terms); - if (candidates.length === 0 || candidates.length > 16) + const candidateMap = /* @__PURE__ */ new Map(); + const graphCandidates = await fetchGraphCandidates(api, graphNodesTable, graphEdgesTable, terms); + for (const candidate of graphCandidates) { + addHybridCandidate(candidateMap, { ...candidate, score: 2, signal: "graph" }); + } + const entityResolution = await fetchEntityResolution(api, entitiesTable, terms); + for (const candidate of entityResolution.candidates) { + addHybridCandidate(candidateMap, { ...candidate, signal: "entity" }); + } + const factCandidates = await fetchFactCandidates(api, factsTable, terms, entityResolution.entityIds); + for (const candidate of factCandidates.candidates) { + addHybridCandidate(candidateMap, { ...candidate, signal: "fact" }); + } + const summaryCandidates = await fetchSummaryCandidates(api, memoryTable, terms); + for (const candidate of summaryCandidates) { + addHybridCandidate(candidateMap, { ...candidate, signal: "summary" }); + } + const candidateEntityIds = [.../* @__PURE__ */ new Set([...entityResolution.entityIds, ...factCandidates.entityIds])].slice(0, 12); + const candidates = [...candidateMap.values()].sort((a, b) => b.score - a.score || b.signals.size - a.signals.size).slice(0, 12); + if (candidates.length === 0) + return sql; + if (candidates.length > 16) return sql; const values = candidates.map((candidate) => `('${sqlStr(candidate.sessionId)}', '${sqlStr(candidate.sourcePath)}')`); const ctes = [ @@ -2311,6 +2789,12 @@ async function applyGraphRestrictionsToPsqlQuery(api, sql, memoryTable, sessions ]; let restrictedMemoryAlias = null; let restrictedSessionsAlias = null; + let restrictedFactsAlias = null; + let restrictedEntitiesAlias = null; + let restrictedLinksAlias = null; + if (candidateEntityIds.length > 0) { + ctes.push(`__hm_entity_candidates(entity_id) AS (VALUES ${candidateEntityIds.map((entityId) => `('${sqlStr(entityId)}')`).join(", ")})`); + } if (touchesMemory2) { restrictedMemoryAlias = "__hm_memory"; ctes.push(`"${restrictedMemoryAlias}" AS ( SELECT * FROM "${memoryTable}" m WHERE EXISTS ( SELECT 1 FROM __hm_graph_candidates gc WHERE (gc.source_path <> '' AND m.summary ILIKE '%' || gc.source_path || '%') OR (gc.source_session_id <> '' AND m.path ILIKE '%' || gc.source_session_id || '%') ))`); @@ -2319,7 +2803,30 @@ async function applyGraphRestrictionsToPsqlQuery(api, sql, memoryTable, sessions restrictedSessionsAlias = "__hm_sessions"; ctes.push(`"${restrictedSessionsAlias}" AS ( SELECT * FROM "${sessionsTable}" s WHERE s.path IN (SELECT source_path FROM __hm_graph_candidates WHERE source_path <> ''))`); } - return prependCtes(rewriteQueryWithRestrictedTables(sql, memoryTable, sessionsTable, restrictedMemoryAlias, restrictedSessionsAlias), ctes); + if (touchesFacts) { + restrictedFactsAlias = "__hm_memory_facts"; + ctes.push(`"${restrictedFactsAlias}" AS ( SELECT * FROM "${factsTable}" f WHERE ( f.source_path IN (SELECT source_path FROM __hm_graph_candidates WHERE source_path <> '') OR f.source_session_id IN (SELECT source_session_id FROM __hm_graph_candidates WHERE source_session_id <> '')` + (candidateEntityIds.length > 0 ? ` OR f.subject_entity_id IN (SELECT entity_id FROM __hm_entity_candidates) OR f.object_entity_id IN (SELECT entity_id FROM __hm_entity_candidates)` : "") + ` ))`); + } + if (touchesEntities && candidateEntityIds.length > 0) { + restrictedEntitiesAlias = "__hm_memory_entities"; + ctes.push(`"${restrictedEntitiesAlias}" AS ( SELECT * FROM "${entitiesTable}" e WHERE e.entity_id IN (SELECT entity_id FROM __hm_entity_candidates))`); + } + if (touchesLinks) { + restrictedLinksAlias = "__hm_fact_entity_links"; + ctes.push(`"${restrictedLinksAlias}" AS ( SELECT * FROM "${factEntityLinksTable}" l WHERE ( l.source_path IN (SELECT source_path FROM __hm_graph_candidates WHERE source_path <> '') OR l.source_session_id IN (SELECT source_session_id FROM __hm_graph_candidates WHERE source_session_id <> '')` + (candidateEntityIds.length > 0 ? ` OR l.entity_id IN (SELECT entity_id FROM __hm_entity_candidates)` : "") + (touchesFacts ? ` OR l.fact_id IN (SELECT fact_id FROM "__hm_memory_facts")` : "") + ` ))`); + } + return prependCtes(rewriteQueryWithRestrictedTables(sql, { + memoryTable, + sessionsTable, + factsTable, + entitiesTable, + factEntityLinksTable, + restrictedMemoryAlias, + restrictedSessionsAlias, + restrictedFactsAlias, + restrictedEntitiesAlias, + restrictedLinksAlias + }), ctes); } function formatPsqlValue(value) { if (value === null || value === void 0) @@ -2623,8 +3130,7 @@ async function executeCompiledBashCommand(api, memoryTable, sessionsTable, cmd, continue; } if (segment.kind === "psql") { - const graphNodesTable = process.env["HIVEMIND_GRAPH_NODES_TABLE"] ?? "graph_nodes"; - const graphEdgesTable = process.env["HIVEMIND_GRAPH_EDGES_TABLE"] ?? "graph_edges"; + const { graphNodesTable, graphEdgesTable } = resolveInterceptedTableNames(memoryTable, sessionsTable); const validated = validatePsqlQuery(segment.query, memoryTable, sessionsTable, graphNodesTable, graphEdgesTable); const prepared = await applyGraphRestrictionsToPsqlQuery(api, validated, memoryTable, sessionsTable, graphNodesTable, graphEdgesTable); const rows = await api.query(prepared); diff --git a/claude-code/bundle/session-end.js b/claude-code/bundle/session-end.js index 23113b7..5a0ab1a 100755 --- a/claude-code/bundle/session-end.js +++ b/claude-code/bundle/session-end.js @@ -725,25 +725,28 @@ Rules: // dist/src/hooks/memory-facts.js import { randomUUID as randomUUID4 } from "node:crypto"; -var MEMORY_FACT_PROMPT_TEMPLATE = `You are extracting durable long-term memory facts from a session summary. +var MEMORY_FACT_PROMPT_TEMPLATE = `You are extracting durable long-term memory facts from raw session transcript rows. SESSION ID: __SESSION_ID__ SOURCE PATH: __SOURCE_PATH__ PROJECT: __PROJECT__ -SUMMARY MARKDOWN: -__SUMMARY_TEXT__ +TRANSCRIPT ROWS: +__TRANSCRIPT_TEXT__ Return ONLY valid JSON with this exact shape: {"facts":[{"subject":"canonical entity","subject_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","subject_aliases":["optional alias"],"predicate":"snake_case_relation","object":"canonical object text","object_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","object_aliases":["optional alias"],"summary":"short factual claim","evidence":"short supporting phrase","confidence":0.0,"valid_at":"optional date/time text","valid_from":"optional date/time text","valid_to":"optional date/time text"}]} Rules: +- The transcript rows are the only source of truth for this extraction. Do not rely on summaries or inferred rewrites. - Extract atomic facts that are useful for later recall. One durable claim per fact. - Prefer canonical names for repeated people, organizations, places, projects, tools, and artifacts. - Use relation-style predicates such as works_on, home_country, relationship_status, prefers, plans, decided_to_pursue, located_in, uses_tool, recommended, supports, owns, read, attends, moved_from, moved_to. -- Facts should preserve temporal history instead of overwriting it. If the summary says something changed, emit the new fact and include timing in valid_at / valid_from / valid_to when the summary supports it. -- Include assistant-confirmed or tool-confirmed actions when they are stated as completed facts in the summary. -- Do not invent facts that are not supported by the summary. +- Facts should preserve temporal history instead of overwriting it. If the transcript says something changed, emit the new fact and include timing in valid_at / valid_from / valid_to when the transcript supports it. +- Include assistant-confirmed or tool-confirmed actions when they are stated as completed facts in the transcript. +- If a speaker explicitly self-identifies or states a status, preserve that exact label instead of broadening it. +- Preserve exact named places, titles, organizations, and relative time phrases when they are the stated fact. +- Do not invent facts that are not supported by the transcript. - Avoid duplicates or near-duplicates. If two facts say the same thing, keep the more specific one. - Return no markdown, no prose, no code fences, only JSON.`; diff --git a/claude-code/bundle/session-start.js b/claude-code/bundle/session-start.js index ad55e71..a741c46 100755 --- a/claude-code/bundle/session-start.js +++ b/claude-code/bundle/session-start.js @@ -83,6 +83,10 @@ function isPsqlMode() { const raw = process.env["HIVEMIND_PSQL_MODE"] ?? process.env["DEEPLAKE_PSQL_MODE"] ?? ""; return /^(1|true|yes|on)$/i.test(raw.trim()); } +function isFactsSessionsOnlyPsqlMode() { + const raw = process.env["HIVEMIND_PSQL_FACTS_SESSIONS_ONLY"] ?? process.env["DEEPLAKE_PSQL_FACTS_SESSIONS_ONLY"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} // dist/src/hooks/version-check.js import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync as readFileSync2, writeFileSync as writeFileSync2 } from "node:fs"; @@ -333,10 +337,64 @@ Answer rules: IMPORTANT: Only psql SELECT queries over memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links are intercepted in this mode. For normal recall, query memory_facts for distilled claims, memory_entities for canonical names, and sessions for exact grounding; graph-based restriction is applied automatically where relevant. Do NOT use python, python3, node, curl, or filesystem paths for recall in this mode. +Debugging: Set HIVEMIND_DEBUG=1 to enable verbose logging to ~/.deeplake/hook-debug.log`; +var CLAUDE_SESSION_START_CONTEXT_PSQL_FACTS_SESSIONS_ONLY = `DEEPLAKE MEMORY SQL MODE: For this run, use SQL only when answering recall questions. + +Available Deeplake tables: +- sessions(path, creation_date, turn_index, event_type, dia_id, speaker, text, turn_summary, source_date_time, message) +- memory_facts(path, fact_id, subject_entity_id, subject_name, subject_type, predicate, object_entity_id, object_name, object_type, summary, evidence, search_text, confidence, valid_at, valid_from, valid_to, source_session_id, source_path) +- memory_entities(path, entity_id, canonical_name, entity_type, aliases, summary, search_text, source_session_ids, source_paths) +- fact_entity_links(path, link_id, fact_id, entity_id, entity_role, source_session_id, source_path) + +The summary and graph tables are intentionally unavailable in this mode. Treat them as if they do not exist. + +Use this command shape: +- psql -At -F '|' -c "SELECT ..." + +SQL strategy: +1. Start with memory_entities to resolve the named person, project, place, or organization into a canonical entity. +2. Expand connected facts through fact_entity_links and memory_facts. +3. Use memory_facts to identify the small set of likely source sessions. +4. Ground every exact answer on sessions rows from those source sessions. +5. Prefer small targeted SELECTs with ORDER BY and LIMIT 5-10. +6. Do not use filesystem commands, grep, cat, ls, Read, or Glob for recall in this mode. +7. Use sessions.text, sessions.speaker, sessions.turn_index, and sessions.source_date_time for transcript retrieval. Use sessions.message only when you need the raw JSON payload. +8. For identity, origin, relationship, preference, and "what did they decide" questions, prefer transcript grounding over paraphrased fact labels. +9. For list/profile questions, facts are for narrowing and aggregation; sessions are for final verification. + +Good query patterns: +- Canonical entity lookup: + psql -At -F '|' -c "SELECT entity_id, canonical_name, entity_type, aliases, summary FROM memory_entities WHERE canonical_name ILIKE '%%' OR aliases ILIKE '%%' LIMIT 5" +- Fact lookup by name/topic: + psql -At -F '|' -c "SELECT fact_id, subject_name, predicate, object_name, summary, valid_at, valid_from, valid_to, source_session_id, source_path FROM memory_facts WHERE subject_name ILIKE '%%' AND (predicate ILIKE '%%' OR object_name ILIKE '%%') ORDER BY creation_date DESC LIMIT 10" +- Entity-linked fact expansion: + psql -At -F '|' -c "SELECT f.fact_id, f.subject_name, f.predicate, f.object_name, f.summary, f.source_session_id, f.source_path FROM fact_entity_links l JOIN memory_facts f ON f.fact_id = l.fact_id WHERE l.entity_id = '' ORDER BY f.creation_date DESC LIMIT 10" +- Transcript grounding by exact path: + psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') ORDER BY path ASC, turn_index ASC" +- Transcript search inside known sessions: + psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') AND (speaker ILIKE '%%' OR text ILIKE '%%') ORDER BY path ASC, turn_index ASC" + +Avoid these mistakes: +- Do NOT query memory, graph_nodes, or graph_edges in this mode. +- Do NOT use fact tables for exact quoted wording when a transcript row is available; use them to narrow and aggregate, then ground on sessions. +- Do NOT filter sessions.message directly when sessions.text / sessions.speaker already contain the needed transcript fields. +- Do NOT blend multiple different events when the question asks about one specific event. Prefer the most direct supporting row. +- Do NOT replace an exact status or self-label with a broader biography. +- Do NOT recalculate a relative-time answer against today's date when the stored phrase already answers the question. + +Answer rules: +- Return the smallest exact answer supported by the data. +- Resolve relative dates against the session's own creation_date or transcript date metadata, not today's date. +- Do not answer "not found" until you have checked both the fact layer and a likely sessions row for the named person. +- For duration or age-style answers, preserve the stored relative phrase when it directly answers the question instead of over-converting it. +- For list or profile questions, aggregate across the small set of candidate sessions before answering. + +IMPORTANT: Only psql SELECT queries over sessions, memory_facts, memory_entities, and fact_entity_links are intercepted in this mode. Do NOT use python, python3, node, curl, filesystem paths, memory, or graph tables for recall in this mode. + Debugging: Set HIVEMIND_DEBUG=1 to enable verbose logging to ~/.deeplake/hook-debug.log`; var GITHUB_RAW_PKG = "https://raw.githubusercontent.com/activeloopai/hivemind/main/package.json"; function buildSessionStartAdditionalContext(args) { - const template = isPsqlMode() ? CLAUDE_SESSION_START_CONTEXT_PSQL : isSessionsOnlyMode() ? CLAUDE_SESSION_START_CONTEXT_SESSIONS_ONLY : isIndexDisabled() ? CLAUDE_SESSION_START_CONTEXT_NO_INDEX : CLAUDE_SESSION_START_CONTEXT; + const template = isPsqlMode() ? isFactsSessionsOnlyPsqlMode() ? CLAUDE_SESSION_START_CONTEXT_PSQL_FACTS_SESSIONS_ONLY : CLAUDE_SESSION_START_CONTEXT_PSQL : isSessionsOnlyMode() ? CLAUDE_SESSION_START_CONTEXT_SESSIONS_ONLY : isIndexDisabled() ? CLAUDE_SESSION_START_CONTEXT_NO_INDEX : CLAUDE_SESSION_START_CONTEXT; const resolvedContext = template.replace(/HIVEMIND_AUTH_CMD/g, args.authCommand); let updateNotice = ""; if (args.currentVersion) { @@ -402,6 +460,7 @@ export { CLAUDE_SESSION_START_CONTEXT, CLAUDE_SESSION_START_CONTEXT_NO_INDEX, CLAUDE_SESSION_START_CONTEXT_PSQL, + CLAUDE_SESSION_START_CONTEXT_PSQL_FACTS_SESSIONS_ONLY, CLAUDE_SESSION_START_CONTEXT_SESSIONS_ONLY, buildSessionStartAdditionalContext, runSessionStartHook diff --git a/claude-code/bundle/shell/deeplake-shell.js b/claude-code/bundle/shell/deeplake-shell.js index 819d244..2e88d4f 100755 --- a/claude-code/bundle/shell/deeplake-shell.js +++ b/claude-code/bundle/shell/deeplake-shell.js @@ -66735,12 +66735,12 @@ function loadConfig() { return null; } } - const env2 = process.env; - if (!env2.HIVEMIND_TOKEN && env2.DEEPLAKE_TOKEN) { + const env3 = process.env; + if (!env3.HIVEMIND_TOKEN && env3.DEEPLAKE_TOKEN) { process.stderr.write("[hivemind] DEEPLAKE_* env vars are deprecated; use HIVEMIND_* instead\n"); } - const token = env2.HIVEMIND_TOKEN ?? env2.DEEPLAKE_TOKEN ?? creds?.token; - const orgId = env2.HIVEMIND_ORG_ID ?? env2.DEEPLAKE_ORG_ID ?? creds?.orgId; + const token = env3.HIVEMIND_TOKEN ?? env3.DEEPLAKE_TOKEN ?? creds?.token; + const orgId = env3.HIVEMIND_ORG_ID ?? env3.DEEPLAKE_ORG_ID ?? creds?.orgId; if (!token || !orgId) return null; return { @@ -66748,16 +66748,16 @@ function loadConfig() { orgId, orgName: creds?.orgName ?? orgId, userName: creds?.userName || userInfo().username || "unknown", - workspaceId: env2.HIVEMIND_WORKSPACE_ID ?? env2.DEEPLAKE_WORKSPACE_ID ?? creds?.workspaceId ?? "default", - apiUrl: env2.HIVEMIND_API_URL ?? env2.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", - tableName: env2.HIVEMIND_TABLE ?? env2.DEEPLAKE_TABLE ?? "memory", - sessionsTableName: env2.HIVEMIND_SESSIONS_TABLE ?? env2.DEEPLAKE_SESSIONS_TABLE ?? "sessions", - graphNodesTableName: env2.HIVEMIND_GRAPH_NODES_TABLE ?? env2.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", - graphEdgesTableName: env2.HIVEMIND_GRAPH_EDGES_TABLE ?? env2.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", - factsTableName: env2.HIVEMIND_FACTS_TABLE ?? env2.DEEPLAKE_FACTS_TABLE ?? "memory_facts", - entitiesTableName: env2.HIVEMIND_ENTITIES_TABLE ?? env2.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", - factEntityLinksTableName: env2.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env2.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", - memoryPath: env2.HIVEMIND_MEMORY_PATH ?? env2.DEEPLAKE_MEMORY_PATH ?? join4(home, ".deeplake", "memory") + workspaceId: env3.HIVEMIND_WORKSPACE_ID ?? env3.DEEPLAKE_WORKSPACE_ID ?? creds?.workspaceId ?? "default", + apiUrl: env3.HIVEMIND_API_URL ?? env3.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", + tableName: env3.HIVEMIND_TABLE ?? env3.DEEPLAKE_TABLE ?? "memory", + sessionsTableName: env3.HIVEMIND_SESSIONS_TABLE ?? env3.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + graphNodesTableName: env3.HIVEMIND_GRAPH_NODES_TABLE ?? env3.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", + graphEdgesTableName: env3.HIVEMIND_GRAPH_EDGES_TABLE ?? env3.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", + factsTableName: env3.HIVEMIND_FACTS_TABLE ?? env3.DEEPLAKE_FACTS_TABLE ?? "memory_facts", + entitiesTableName: env3.HIVEMIND_ENTITIES_TABLE ?? env3.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", + factEntityLinksTableName: env3.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env3.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", + memoryPath: env3.HIVEMIND_MEMORY_PATH ?? env3.DEEPLAKE_MEMORY_PATH ?? join4(home, ".deeplake", "memory") }; } @@ -67375,11 +67375,144 @@ var DeeplakeApi = class { import { basename as basename5, posix } from "node:path"; import { randomUUID as randomUUID2 } from "node:crypto"; +// dist/src/embeddings/harrier.js +import { AutoModel, AutoTokenizer, LogLevel, env } from "@huggingface/transformers"; +var DEFAULT_MODEL_ID = "onnx-community/harrier-oss-v1-0.6b-ONNX"; +var DEFAULT_DOCUMENT_BATCH_SIZE = 8; +var DEFAULT_MAX_LENGTH = 32768; +function toNumber(value) { + return typeof value === "bigint" ? Number(value) : Number(value ?? 0); +} +function tensorToRows(tensor) { + const [batchSize, width] = tensor.dims; + const rows = []; + for (let batchIndex = 0; batchIndex < batchSize; batchIndex++) { + const offset = batchIndex * width; + const row = []; + for (let hiddenIndex = 0; hiddenIndex < width; hiddenIndex++) { + row.push(Number(tensor.data[offset + hiddenIndex] ?? 0)); + } + rows.push(row); + } + return rows; +} +function l2Normalize(rows) { + return rows.map((row) => { + let sumSquares = 0; + for (const value of row) + sumSquares += value * value; + const norm = Math.sqrt(sumSquares) || 1; + return row.map((value) => value / norm); + }); +} +function lastTokenPool(outputs, attentionMask) { + const [batchSize, sequenceLength, hiddenSize] = outputs.dims; + const rows = []; + const maskData = attentionMask.data; + const hiddenData = outputs.data; + for (let batchIndex = 0; batchIndex < batchSize; batchIndex++) { + let lastTokenIndex = sequenceLength - 1; + for (let tokenIndex = sequenceLength - 1; tokenIndex >= 0; tokenIndex--) { + const maskOffset = batchIndex * sequenceLength + tokenIndex; + if (toNumber(maskData[maskOffset]) > 0) { + lastTokenIndex = tokenIndex; + break; + } + } + const row = []; + const hiddenOffset = (batchIndex * sequenceLength + lastTokenIndex) * hiddenSize; + for (let hiddenIndex = 0; hiddenIndex < hiddenSize; hiddenIndex++) { + row.push(Number(hiddenData[hiddenOffset + hiddenIndex] ?? 0)); + } + rows.push(row); + } + return rows; +} +function formatQuery(task, query) { + return `Instruct: ${task} +Query: ${query}`; +} +var HarrierEmbedder = class { + modelId; + tokenizerPromise = null; + modelPromise = null; + options; + constructor(options = {}) { + this.modelId = options.modelId ?? DEFAULT_MODEL_ID; + this.options = { + ...options, + maxLength: options.maxLength ?? DEFAULT_MAX_LENGTH, + batchSize: options.batchSize ?? DEFAULT_DOCUMENT_BATCH_SIZE + }; + if (options.cacheDir) + env.cacheDir = options.cacheDir; + if (options.localModelPath) + env.localModelPath = options.localModelPath; + env.logLevel = LogLevel.ERROR; + } + async embedDocuments(texts) { + return this.embedInternal(texts); + } + async embedQueries(texts, options = {}) { + const task = options.task ?? "Given a user query, retrieve relevant memory rows and session events"; + return this.embedInternal(texts.map((text) => formatQuery(task, text))); + } + async load() { + if (!this.tokenizerPromise) { + this.tokenizerPromise = AutoTokenizer.from_pretrained(this.modelId, { + local_files_only: this.options.localFilesOnly + }); + } + if (!this.modelPromise) { + this.modelPromise = AutoModel.from_pretrained(this.modelId, { + local_files_only: this.options.localFilesOnly, + device: this.options.device ?? "cpu", + dtype: this.options.dtype + }); + } + const [tokenizer, model] = await Promise.all([this.tokenizerPromise, this.modelPromise]); + return { tokenizer, model }; + } + async embedInternal(texts) { + if (texts.length === 0) + return []; + const { tokenizer, model } = await this.load(); + const rows = []; + for (let start = 0; start < texts.length; start += this.options.batchSize) { + const batch = texts.slice(start, start + this.options.batchSize); + const inputs = tokenizer(batch, { + padding: true, + truncation: true, + max_length: this.options.maxLength + }); + const outputs = await model(inputs); + const sentenceEmbedding = outputs["sentence_embedding"]; + if (sentenceEmbedding && typeof sentenceEmbedding === "object" && sentenceEmbedding !== null) { + rows.push(...l2Normalize(tensorToRows(sentenceEmbedding))); + continue; + } + const lastHiddenState = outputs["last_hidden_state"]; + const attentionMask = inputs["attention_mask"]; + if (!lastHiddenState || typeof lastHiddenState !== "object" || !attentionMask || typeof attentionMask !== "object") { + throw new Error(`Harrier model "${this.modelId}" did not return a usable embedding tensor`); + } + rows.push(...l2Normalize(lastTokenPool(lastHiddenState, attentionMask))); + } + return rows; + } +}; + // dist/src/utils/retrieval-mode.js function isSessionsOnlyMode() { const raw = process.env["HIVEMIND_SESSIONS_ONLY"] ?? process.env["DEEPLAKE_SESSIONS_ONLY"] ?? ""; return /^(1|true|yes|on)$/i.test(raw.trim()); } +function getGrepRetrievalMode() { + const raw = (process.env["HIVEMIND_GREP_RETRIEVAL_MODE"] ?? process.env["DEEPLAKE_GREP_RETRIEVAL_MODE"] ?? "").trim().toLowerCase(); + if (raw === "embedding" || raw === "hybrid") + return raw; + return "classic"; +} function isIndexDisabled() { const raw = process.env["HIVEMIND_DISABLE_INDEX"] ?? process.env["DEEPLAKE_DISABLE_INDEX"] ?? ""; return /^(1|true|yes|on)$/i.test(raw.trim()); @@ -67391,6 +67524,49 @@ function isSummaryBm25Disabled() { // dist/src/shell/grep-core.js var DEFAULT_GREP_CANDIDATE_LIMIT = Number(process.env["HIVEMIND_GREP_LIMIT"] ?? process.env["DEEPLAKE_GREP_LIMIT"] ?? 500); +var DEFAULT_EMBED_RETRIEVAL_MODEL_ID = "onnx-community/harrier-oss-v1-270m-ONNX"; +var DEFAULT_HYBRID_VECTOR_WEIGHT = 0.7; +var DEFAULT_HYBRID_TEXT_WEIGHT = 0.3; +var retrievalEmbedder = null; +function envString(...names) { + for (const name of names) { + const value = process.env[name]?.trim(); + if (value) + return value; + } + return void 0; +} +function envFlag(...names) { + const raw = envString(...names) ?? ""; + return /^(1|true|yes|on)$/i.test(raw); +} +function envNumber(fallback, ...names) { + const raw = envString(...names); + const parsed = Number(raw); + return Number.isFinite(parsed) ? parsed : fallback; +} +function getRetrievalEmbedder() { + if (!retrievalEmbedder) { + retrievalEmbedder = new HarrierEmbedder({ + modelId: envString("HIVEMIND_EMBED_RETRIEVAL_MODEL_ID", "DEEPLAKE_EMBED_RETRIEVAL_MODEL_ID", "HIVEMIND_HARRIER_MODEL_ID", "DEEPLAKE_HARRIER_MODEL_ID") ?? DEFAULT_EMBED_RETRIEVAL_MODEL_ID, + device: envString("HIVEMIND_EMBED_RETRIEVAL_DEVICE", "DEEPLAKE_EMBED_RETRIEVAL_DEVICE") ?? "cpu", + dtype: envString("HIVEMIND_EMBED_RETRIEVAL_DTYPE", "DEEPLAKE_EMBED_RETRIEVAL_DTYPE"), + cacheDir: envString("HIVEMIND_EMBED_RETRIEVAL_CACHE_DIR", "DEEPLAKE_EMBED_RETRIEVAL_CACHE_DIR"), + localModelPath: envString("HIVEMIND_EMBED_RETRIEVAL_LOCAL_MODEL_PATH", "DEEPLAKE_EMBED_RETRIEVAL_LOCAL_MODEL_PATH"), + localFilesOnly: envFlag("HIVEMIND_EMBED_RETRIEVAL_LOCAL_FILES_ONLY", "DEEPLAKE_EMBED_RETRIEVAL_LOCAL_FILES_ONLY") + }); + } + return retrievalEmbedder; +} +function sqlFloat4Array(values) { + if (values.length === 0) + throw new Error("Query embedding is empty"); + return `ARRAY[${values.map((value) => { + if (!Number.isFinite(value)) + throw new Error("Query embedding contains non-finite values"); + return Math.fround(value).toString(); + }).join(", ")}]::float4[]`; +} function escapeRegexLiteral(value) { return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); } @@ -67605,7 +67781,7 @@ function buildPathCondition(targetPath) { return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%')`; } async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { - const { pathFilter, contentScanOnly, likeOp, escapedPattern, regexPattern, prefilterPattern, prefilterPatterns, bm25QueryText } = opts; + const { pathFilter, contentScanOnly, likeOp, escapedPattern, regexPattern, prefilterPattern, prefilterPatterns, queryText, bm25QueryText } = opts; const limit = opts.limit ?? DEFAULT_GREP_CANDIDATE_LIMIT; const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : [escapedPattern]; const ignoreCase = likeOp === "ILIKE"; @@ -67617,7 +67793,11 @@ async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { const fallbackSessFilter = likeSessFilter; const hasSqlRegexFilter = Boolean(regexMemFilter || regexSessFilter); const sessionsOnly = isSessionsOnlyMode(); - const useSummaryBm25 = !sessionsOnly && !isSummaryBm25Disabled() && Boolean(bm25QueryText); + const retrievalMode = getGrepRetrievalMode(); + const semanticQueryText = (queryText ?? bm25QueryText ?? "").trim(); + const useEmbeddingRetrieval = retrievalMode === "embedding" && semanticQueryText.length > 0; + const useHybridRetrieval = retrievalMode === "hybrid" && semanticQueryText.length > 0; + const useSummaryBm25 = retrievalMode === "classic" && !sessionsOnly && !isSummaryBm25Disabled() && Boolean(bm25QueryText); const shouldUseFallbackCapablePrimary = useSummaryBm25 || hasSqlRegexFilter; const ensureSummaryBm25Index = api.ensureSummaryBm25Index; if (useSummaryBm25 && typeof ensureSummaryBm25Index === "function") { @@ -67629,6 +67809,25 @@ async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { const sessQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; return sessionsOnly ? `SELECT path, content, source_order, creation_date FROM (${sessQuery}) AS combined ORDER BY path, source_order, creation_date` : `SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`; }; + if (useEmbeddingRetrieval || useHybridRetrieval) { + const embedder = getRetrievalEmbedder(); + const [queryEmbedding] = await embedder.embedQueries([semanticQueryText]); + if (!queryEmbedding) + throw new Error("Failed to build query embedding"); + const queryVectorSql = sqlFloat4Array(queryEmbedding); + const vectorWeight = envNumber(DEFAULT_HYBRID_VECTOR_WEIGHT, "HIVEMIND_HYBRID_VECTOR_WEIGHT", "DEEPLAKE_HYBRID_VECTOR_WEIGHT"); + const textWeight = envNumber(DEFAULT_HYBRID_TEXT_WEIGHT, "HIVEMIND_HYBRID_TEXT_WEIGHT", "DEEPLAKE_HYBRID_TEXT_WEIGHT"); + const buildSemanticCombinedQuery = () => { + const memQuery = useHybridRetrieval ? buildHybridSimilarityQuery(memoryTable, pathFilter, "summary::text", 0, "''", queryVectorSql, semanticQueryText, vectorWeight, textWeight, limit) : buildEmbeddingSimilarityQuery(memoryTable, pathFilter, "summary::text", 0, "''", queryVectorSql, limit); + const sessQuery = useHybridRetrieval ? buildHybridSimilarityQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", queryVectorSql, semanticQueryText, vectorWeight, textWeight, limit) : buildEmbeddingSimilarityQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", queryVectorSql, limit); + return sessionsOnly ? `SELECT path, content, source_order, creation_date FROM (${sessQuery}) AS combined ORDER BY path, source_order, creation_date` : `SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`; + }; + const rows2 = await api.query(buildSemanticCombinedQuery()); + return rows2.map((row) => ({ + path: String(row["path"]), + content: String(row["content"] ?? "") + })); + } const primaryMemFilter = useSummaryBm25 ? "" : `${likeMemFilter}${regexMemFilter}`; const primaryQuery = buildCombinedQuery(primaryMemFilter, primarySessFilter, useSummaryBm25); const fallbackQuery = buildCombinedQuery(likeMemFilter, fallbackSessFilter, false); @@ -67733,6 +67932,7 @@ function buildGrepSearchOptions(params, targetPath) { const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(normalizedPattern) : null; const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(normalizedPattern) : null; const bm25QueryText = buildSummaryBm25QueryText(normalizedPattern, params.fixedString, literalPrefilter, alternationPrefilters); + const queryText = (bm25QueryText ?? normalizedPattern.trim()) || void 0; const regexBase = params.fixedString ? escapeRegexLiteral(normalizedPattern) : normalizedPattern; const sqlRegexPattern = params.wordMatch ? `\\b(?:${regexBase})\\b` : hasRegexMeta ? regexBase : void 0; return { @@ -67743,6 +67943,7 @@ function buildGrepSearchOptions(params, targetPath) { regexPattern: sqlRegexPattern, prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : void 0, prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)), + queryText, bm25QueryText: bm25QueryText ?? void 0, limit: DEFAULT_GREP_CANDIDATE_LIMIT }; @@ -67766,6 +67967,12 @@ function buildRegexFilter(column, pattern, ignoreCase) { function buildSummaryBm25Query(memoryTable, pathFilter, queryText, limit) { return `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter} ORDER BY (summary <#> '${sqlStr(queryText)}') DESC LIMIT ${limit}`; } +function buildEmbeddingSimilarityQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryVectorSql, limit) { + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date FROM "${tableName}" WHERE 1=1${pathFilter} AND embedding IS NOT NULL ORDER BY (embedding <#> ${queryVectorSql}) DESC LIMIT ${limit}`; +} +function buildHybridSimilarityQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryVectorSql, queryText, vectorWeight, textWeight, limit) { + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date FROM "${tableName}" WHERE 1=1${pathFilter} AND embedding IS NOT NULL ORDER BY (((embedding, ${contentExpr})::deeplake_hybrid_record) <#> deeplake_hybrid_record(${queryVectorSql}, '${sqlStr(queryText)}', ${vectorWeight}, ${textWeight})) DESC LIMIT ${limit}`; +} function toSqlRegexPattern(pattern, ignoreCase) { if (!pattern) return null; @@ -69355,8 +69562,8 @@ var YargsParser = class { if (typeof envPrefix === "undefined") return; const prefix = typeof envPrefix === "string" ? envPrefix : ""; - const env2 = mixin.env(); - Object.keys(env2).forEach(function(envVar) { + const env3 = mixin.env(); + Object.keys(env3).forEach(function(envVar) { if (prefix === "" || envVar.lastIndexOf(prefix, 0) === 0) { const keys = envVar.split("__").map(function(key, i11) { if (i11 === 0) { @@ -69365,7 +69572,7 @@ var YargsParser = class { return camelCase2(key); }); if ((configOnly && flags.configs[keys.join(".")] || !configOnly) && !hasKey(argv2, keys)) { - setArg(keys.join("."), env2[envVar]); + setArg(keys.join("."), env3[envVar]); } } }); @@ -69676,12 +69883,12 @@ if (nodeVersion) { throw Error(`yargs parser supports a minimum Node.js version of ${minNodeVersion}. Read our version support policy: https://github.com/yargs/yargs-parser#supported-nodejs-versions`); } } -var env = process ? process.env : {}; +var env2 = process ? process.env : {}; var require2 = createRequire ? createRequire(import.meta.url) : void 0; var parser = new YargsParser({ cwd: process.cwd, env: () => { - return env; + return env2; }, format, normalize, diff --git a/claude-code/bundle/wiki-worker.js b/claude-code/bundle/wiki-worker.js index 10580ce..1ace259 100755 --- a/claude-code/bundle/wiki-worker.js +++ b/claude-code/bundle/wiki-worker.js @@ -352,25 +352,28 @@ ${node.name}`); // dist/src/hooks/memory-facts.js import { randomUUID as randomUUID3 } from "node:crypto"; -var MEMORY_FACT_PROMPT_TEMPLATE = `You are extracting durable long-term memory facts from a session summary. +var MEMORY_FACT_PROMPT_TEMPLATE = `You are extracting durable long-term memory facts from raw session transcript rows. SESSION ID: __SESSION_ID__ SOURCE PATH: __SOURCE_PATH__ PROJECT: __PROJECT__ -SUMMARY MARKDOWN: -__SUMMARY_TEXT__ +TRANSCRIPT ROWS: +__TRANSCRIPT_TEXT__ Return ONLY valid JSON with this exact shape: {"facts":[{"subject":"canonical entity","subject_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","subject_aliases":["optional alias"],"predicate":"snake_case_relation","object":"canonical object text","object_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","object_aliases":["optional alias"],"summary":"short factual claim","evidence":"short supporting phrase","confidence":0.0,"valid_at":"optional date/time text","valid_from":"optional date/time text","valid_to":"optional date/time text"}]} Rules: +- The transcript rows are the only source of truth for this extraction. Do not rely on summaries or inferred rewrites. - Extract atomic facts that are useful for later recall. One durable claim per fact. - Prefer canonical names for repeated people, organizations, places, projects, tools, and artifacts. - Use relation-style predicates such as works_on, home_country, relationship_status, prefers, plans, decided_to_pursue, located_in, uses_tool, recommended, supports, owns, read, attends, moved_from, moved_to. -- Facts should preserve temporal history instead of overwriting it. If the summary says something changed, emit the new fact and include timing in valid_at / valid_from / valid_to when the summary supports it. -- Include assistant-confirmed or tool-confirmed actions when they are stated as completed facts in the summary. -- Do not invent facts that are not supported by the summary. +- Facts should preserve temporal history instead of overwriting it. If the transcript says something changed, emit the new fact and include timing in valid_at / valid_from / valid_to when the transcript supports it. +- Include assistant-confirmed or tool-confirmed actions when they are stated as completed facts in the transcript. +- If a speaker explicitly self-identifies or states a status, preserve that exact label instead of broadening it. +- Preserve exact named places, titles, organizations, and relative time phrases when they are the stated fact. +- Do not invent facts that are not supported by the transcript. - Avoid duplicates or near-duplicates. If two facts say the same thing, keep the more specific one. - Return no markdown, no prose, no code fences, only JSON.`; function stripCodeFences2(text) { @@ -550,8 +553,32 @@ function parseMemoryFactExtraction(raw) { }) }; } +function buildMemoryFactTranscript(rows) { + const normalized = rows.map((row) => ({ + turnIndex: Number.isFinite(row.turnIndex) ? row.turnIndex : 0, + speaker: normalizeString2(row.speaker), + text: normalizeString2(row.text), + eventType: normalizeString2(row.eventType) || "message", + turnSummary: normalizeString2(row.turnSummary), + sourceDateTime: normalizeString2(row.sourceDateTime) || normalizeString2(row.creationDate) + })).filter((row) => row.text || row.turnSummary); + if (normalized.length === 0) + return "(no transcript rows)"; + return normalized.map((row) => { + const prefix = [ + `turn=${row.turnIndex}`, + row.sourceDateTime ? `time=${row.sourceDateTime}` : "", + row.speaker ? `speaker=${row.speaker}` : `event=${row.eventType}` + ].filter(Boolean).join(" | "); + const lines = [`[${prefix}] ${row.text || row.turnSummary}`]; + if (row.turnSummary && row.turnSummary !== row.text) { + lines.push(`summary: ${row.turnSummary}`); + } + return lines.join("\n"); + }).join("\n"); +} function buildMemoryFactPrompt(args) { - return (args.template ?? MEMORY_FACT_PROMPT_TEMPLATE).replace(/__SUMMARY_TEXT__/g, args.summaryText).replace(/__SESSION_ID__/g, args.sessionId).replace(/__SOURCE_PATH__/g, args.sourcePath).replace(/__PROJECT__/g, args.project); + return (args.template ?? MEMORY_FACT_PROMPT_TEMPLATE).replace(/__TRANSCRIPT_TEXT__/g, args.transcriptText).replace(/__SESSION_ID__/g, args.sessionId).replace(/__SOURCE_PATH__/g, args.sourcePath).replace(/__PROJECT__/g, args.project); } async function replaceSessionFacts(params) { const ts = params.ts ?? (/* @__PURE__ */ new Date()).toISOString(); @@ -740,7 +767,7 @@ function cleanup() { async function main() { try { wlog("fetching session events"); - const rows = await query(`SELECT message, creation_date FROM "${cfg.sessionsTable}" WHERE path LIKE '${esc2(`/sessions/%${cfg.sessionId}%`)}' ORDER BY creation_date ASC, turn_index ASC`); + const rows = await query(`SELECT path, message, creation_date, turn_index, event_type, speaker, text, turn_summary, source_date_time FROM "${cfg.sessionsTable}" WHERE path LIKE '${esc2(`/sessions/%${cfg.sessionId}%`)}' ORDER BY creation_date ASC, turn_index ASC`); if (rows.length === 0) { wlog("no session events found \u2014 exiting"); return; @@ -838,8 +865,17 @@ async function main() { wlog(`graph update failed: ${e.message}`); } try { + const transcriptText = buildMemoryFactTranscript(rows.map((row) => ({ + turnIndex: Number(row["turn_index"] ?? 0), + eventType: typeof row["event_type"] === "string" ? row["event_type"] : "", + speaker: typeof row["speaker"] === "string" ? row["speaker"] : "", + text: typeof row["text"] === "string" ? row["text"] : "", + turnSummary: typeof row["turn_summary"] === "string" ? row["turn_summary"] : "", + sourceDateTime: typeof row["source_date_time"] === "string" ? row["source_date_time"] : "", + creationDate: typeof row["creation_date"] === "string" ? row["creation_date"] : "" + }))); const factPrompt = buildMemoryFactPrompt({ - summaryText: text, + transcriptText, sessionId: cfg.sessionId, sourcePath: jsonlServerPath, project: cfg.project, diff --git a/claude-code/tests/bash-command-compiler.test.ts b/claude-code/tests/bash-command-compiler.test.ts index f879330..b656bc3 100644 --- a/claude-code/tests/bash-command-compiler.test.ts +++ b/claude-code/tests/bash-command-compiler.test.ts @@ -11,10 +11,13 @@ import { } from "../../src/hooks/bash-command-compiler.js"; const originalPsqlMode = process.env.HIVEMIND_PSQL_MODE; +const originalFactsSessionsOnlyPsqlMode = process.env.HIVEMIND_PSQL_FACTS_SESSIONS_ONLY; function restorePsqlMode(): void { if (originalPsqlMode === undefined) delete process.env.HIVEMIND_PSQL_MODE; else process.env.HIVEMIND_PSQL_MODE = originalPsqlMode; + if (originalFactsSessionsOnlyPsqlMode === undefined) delete process.env.HIVEMIND_PSQL_FACTS_SESSIONS_ONLY; + else process.env.HIVEMIND_PSQL_FACTS_SESSIONS_ONLY = originalFactsSessionsOnlyPsqlMode; } describe("bash-command-compiler parsing", () => { @@ -336,6 +339,32 @@ describe("bash-command-compiler parsing", () => { restorePsqlMode(); }); + it("parses only facts-and-sessions psql segments when the mode is enabled", () => { + process.env.HIVEMIND_PSQL_MODE = "1"; + process.env.HIVEMIND_PSQL_FACTS_SESSIONS_ONLY = "1"; + + expect(parseCompiledSegment("psql -At -F '|' -c \"SELECT path, summary FROM memory LIMIT 2\"")).toBeNull(); + expect(parseCompiledSegment("psql -At -F '|' -c \"SELECT node_id, canonical_name FROM graph_nodes LIMIT 2\"")).toBeNull(); + + expect(parseCompiledSegment("psql -At -F '|' -c \"SELECT path, creation_date, turn_index, speaker, text FROM sessions WHERE text ILIKE '%camp%' LIMIT 2\"")).toEqual({ + kind: "psql", + query: "SELECT path, creation_date, turn_index, speaker, text FROM sessions WHERE text ILIKE '%camp%' LIMIT 2", + lineLimit: 0, + tuplesOnly: true, + fieldSeparator: "|", + }); + + expect(parseCompiledSegment("psql -At -F '|' -c \"SELECT fact_id, subject_name, predicate, object_name FROM memory_facts LIMIT 2\"")).toEqual({ + kind: "psql", + query: "SELECT fact_id, subject_name, predicate, object_name FROM memory_facts LIMIT 2", + lineLimit: 0, + tuplesOnly: true, + fieldSeparator: "|", + }); + + restorePsqlMode(); + }); + it("rejects unsupported segments and command shapes", () => { process.env.HIVEMIND_PSQL_MODE = "1"; expect(parseCompiledSegment("cat")).toBeNull(); @@ -528,7 +557,13 @@ describe("bash-command-compiler execution", () => { it("executes psql queries against normalized memory and sessions table names", async () => { const query = vi.fn(async (sql: string) => { - if (sql.includes('FROM "graph_nodes"') || sql.includes('FROM "graph_edges"')) { + if ( + sql.includes('FROM "graph_nodes') || + sql.includes('FROM "graph_edges') || + sql.includes('FROM "memory_entities') || + sql.includes('FROM "memory_facts') || + (sql.includes('FROM "memory_actual"') && !sql.includes('JOIN "sessions_actual"')) + ) { return []; } expect(sql).toContain('FROM "memory_actual"'); @@ -552,7 +587,13 @@ describe("bash-command-compiler execution", () => { it("executes direct sessions queries against physical per-message rows", async () => { const query = vi.fn(async (sql: string) => { - if (sql.includes('FROM "graph_nodes"') || sql.includes('FROM "graph_edges"')) { + if ( + sql.includes('FROM "graph_nodes') || + sql.includes('FROM "graph_edges') || + sql.includes('FROM "memory_entities') || + sql.includes('FROM "memory_facts') || + sql.includes('FROM "memory_actual"') + ) { return []; } expect(sql).toContain('FROM "sessions_actual"'); @@ -616,6 +657,43 @@ describe("bash-command-compiler execution", () => { "psql -At -F '|' -c \"SELECT * FROM users\"", ); expect(unrelated).toBeNull(); + + process.env.HIVEMIND_PSQL_FACTS_SESSIONS_ONLY = "1"; + const summaryQuery = await executeCompiledBashCommand( + { query: vi.fn() } as any, + "memory", + "sessions", + "psql -At -F '|' -c \"SELECT * FROM memory\"", + ); + expect(summaryQuery).toBeNull(); + + restorePsqlMode(); + }); + + it("executes facts-and-sessions-only psql queries without summary or graph helper queries", async () => { + process.env.HIVEMIND_PSQL_MODE = "1"; + process.env.HIVEMIND_PSQL_FACTS_SESSIONS_ONLY = "1"; + + const query = vi.fn(async (sql: string) => { + expect(sql).not.toContain('FROM "memory_actual"'); + expect(sql).not.toContain('FROM "graph_nodes'); + expect(sql).not.toContain('FROM "graph_edges'); + expect(sql).not.toContain("__hm_graph_candidates"); + expect(sql).toContain('FROM "memory_facts_actual"'); + return [ + { fact_id: "f1", subject_name: "Caroline", predicate: "home_country", object_name: "Sweden" }, + ]; + }); + + const output = await executeCompiledBashCommand( + { query } as any, + "memory_actual", + "sessions_actual", + "psql -At -F '|' -c \"SELECT fact_id, subject_name, predicate, object_name FROM memory_facts WHERE subject_name ILIKE '%Caroline%' LIMIT 1\"", + ); + + expect(output).toBe("f1|Caroline|home_country|Sweden"); + expect(query).toHaveBeenCalledTimes(1); restorePsqlMode(); }); diff --git a/claude-code/tests/grep-core.test.ts b/claude-code/tests/grep-core.test.ts index 6c4623a..8bfd2b3 100644 --- a/claude-code/tests/grep-core.test.ts +++ b/claude-code/tests/grep-core.test.ts @@ -1,4 +1,5 @@ import { describe, it, expect, vi } from "vitest"; +import { HarrierEmbedder } from "../../src/embeddings/harrier.js"; import { buildGrepSearchOptions, buildSummaryBm25QueryText, @@ -655,6 +656,68 @@ describe("searchDeeplakeTables", () => { expect(sql).toContain('FROM "sessions"'); }); + it("uses vector similarity on embedding columns when retrieval mode is embedding", async () => { + const prev = process.env.HIVEMIND_GREP_RETRIEVAL_MODE; + process.env.HIVEMIND_GREP_RETRIEVAL_MODE = "embedding"; + const embedSpy = vi.spyOn(HarrierEmbedder.prototype, "embedQueries").mockResolvedValue([[0.25, -0.5]]); + try { + const api = mockApi([]); + await searchDeeplakeTables(api, "memory", "sessions", { + pathFilter: "", + contentScanOnly: false, + likeOp: "ILIKE", + escapedPattern: "book", + queryText: "book novel literature", + bm25QueryText: "book novel literature", + limit: 50, + }); + const sql = api.query.mock.calls[0][0] as string; + expect(embedSpy).toHaveBeenCalledWith(["book novel literature"]); + expect(sql).toContain("embedding <#> ARRAY[0.25, -0.5]::float4[]"); + expect(sql).not.toContain("summary::text ILIKE"); + expect(sql).not.toContain("message::text ILIKE"); + } finally { + embedSpy.mockRestore(); + if (prev === undefined) delete process.env.HIVEMIND_GREP_RETRIEVAL_MODE; + else process.env.HIVEMIND_GREP_RETRIEVAL_MODE = prev; + } + }); + + it("uses deeplake hybrid record scoring when retrieval mode is hybrid", async () => { + const prevMode = process.env.HIVEMIND_GREP_RETRIEVAL_MODE; + const prevVector = process.env.HIVEMIND_HYBRID_VECTOR_WEIGHT; + const prevText = process.env.HIVEMIND_HYBRID_TEXT_WEIGHT; + process.env.HIVEMIND_GREP_RETRIEVAL_MODE = "hybrid"; + process.env.HIVEMIND_HYBRID_VECTOR_WEIGHT = "0.6"; + process.env.HIVEMIND_HYBRID_TEXT_WEIGHT = "0.4"; + const embedSpy = vi.spyOn(HarrierEmbedder.prototype, "embedQueries").mockResolvedValue([[0.1, 0.2, 0.3]]); + try { + const api = mockApi([]); + await searchDeeplakeTables(api, "memory", "sessions", { + pathFilter: "", + contentScanOnly: false, + likeOp: "ILIKE", + escapedPattern: "book", + queryText: "book novel literature", + bm25QueryText: "book novel literature", + limit: 50, + }); + const sql = api.query.mock.calls[0][0] as string; + expect(embedSpy).toHaveBeenCalledWith(["book novel literature"]); + expect(sql).toContain("deeplake_hybrid_record"); + expect(sql).toContain("0.6, 0.4"); + expect(sql).toContain("ARRAY[0.10000000149011612"); + } finally { + embedSpy.mockRestore(); + if (prevMode === undefined) delete process.env.HIVEMIND_GREP_RETRIEVAL_MODE; + else process.env.HIVEMIND_GREP_RETRIEVAL_MODE = prevMode; + if (prevVector === undefined) delete process.env.HIVEMIND_HYBRID_VECTOR_WEIGHT; + else process.env.HIVEMIND_HYBRID_VECTOR_WEIGHT = prevVector; + if (prevText === undefined) delete process.env.HIVEMIND_HYBRID_TEXT_WEIGHT; + else process.env.HIVEMIND_HYBRID_TEXT_WEIGHT = prevText; + } + }); + it("skips LIKE filter when contentScanOnly is true (regex-in-memory mode)", async () => { const api = mockApi([]); await searchDeeplakeTables(api, "m", "s", { diff --git a/claude-code/tests/hooks-source.test.ts b/claude-code/tests/hooks-source.test.ts index b6ab1f9..bbe0e4b 100644 --- a/claude-code/tests/hooks-source.test.ts +++ b/claude-code/tests/hooks-source.test.ts @@ -778,6 +778,33 @@ describe("claude session start source", () => { } }); + it("switches to facts-and-sessions-only sql guidance when that env flag is set", () => { + const prevPsql = process.env.HIVEMIND_PSQL_MODE; + const prevFactsSessions = process.env.HIVEMIND_PSQL_FACTS_SESSIONS_ONLY; + process.env.HIVEMIND_PSQL_MODE = "1"; + process.env.HIVEMIND_PSQL_FACTS_SESSIONS_ONLY = "1"; + try { + const context = buildSessionStartAdditionalContext({ + authCommand: "/tmp/auth-login.js", + creds: baseCreds, + currentVersion: null, + latestVersion: null, + }); + expect(context).toContain("The summary and graph tables are intentionally unavailable in this mode."); + expect(context).toContain("sessions(path, creation_date, turn_index, event_type, dia_id, speaker, text, turn_summary, source_date_time, message)"); + expect(context).toContain("memory_facts(path, fact_id, subject_entity_id"); + expect(context).toContain("memory_entities(path, entity_id, canonical_name"); + expect(context).toContain("fact_entity_links(path, link_id, fact_id"); + expect(context).not.toContain("memory(path, summary"); + expect(context).not.toContain("Graph-backed entity and relation resolution is applied automatically"); + } finally { + if (prevPsql === undefined) delete process.env.HIVEMIND_PSQL_MODE; + else process.env.HIVEMIND_PSQL_MODE = prevPsql; + if (prevFactsSessions === undefined) delete process.env.HIVEMIND_PSQL_FACTS_SESSIONS_ONLY; + else process.env.HIVEMIND_PSQL_FACTS_SESSIONS_ONLY = prevFactsSessions; + } + }); + it("logs authenticated startup without backfilling when the username is already present", async () => { const logFn = vi.fn(); const save = vi.fn(); diff --git a/claude-code/tests/memory-facts.test.ts b/claude-code/tests/memory-facts.test.ts index c3889de..a3da0b8 100644 --- a/claude-code/tests/memory-facts.test.ts +++ b/claude-code/tests/memory-facts.test.ts @@ -1,5 +1,6 @@ import { describe, expect, it } from "vitest"; import { + buildMemoryFactTranscript, buildMemoryFactPrompt, parseMemoryFactExtraction, replaceSessionFacts, @@ -15,16 +16,36 @@ describe("memory-facts", () => { expect(extraction.facts[0].confidence).toBe(0.92); }); - it("builds a fact prompt with summary and source metadata", () => { + it("builds a transcript-backed fact prompt with source metadata", () => { const prompt = buildMemoryFactPrompt({ - summaryText: "# Session\n- **Source**: /sessions/x.json", + transcriptText: "[turn=1 | time=2023-05-07 | speaker=Caroline] I moved from Sweden.", sessionId: "sess-1", sourcePath: "/sessions/x.json", project: "proj", }); expect(prompt).toContain("SESSION ID: sess-1"); expect(prompt).toContain("SOURCE PATH: /sessions/x.json"); - expect(prompt).toContain("SUMMARY MARKDOWN:"); + expect(prompt).toContain("TRANSCRIPT ROWS:"); + expect(prompt).toContain("speaker=Caroline"); + }); + + it("formats transcript rows for fact extraction", () => { + const transcript = buildMemoryFactTranscript([ + { + turnIndex: 1, + speaker: "Caroline", + text: "I moved from Sweden four years ago.", + sourceDateTime: "2023-05-07", + }, + { + turnIndex: 2, + speaker: "Caroline", + text: "I'm a transgender woman.", + turnSummary: "Caroline shares her identity.", + }, + ]); + expect(transcript).toContain("[turn=1 | time=2023-05-07 | speaker=Caroline] I moved from Sweden four years ago."); + expect(transcript).toContain("summary: Caroline shares her identity."); }); it("replaces per-session fact rows and upserts canonical entities", async () => { diff --git a/codex/bundle/capture.js b/codex/bundle/capture.js index e8b7bfa..84c3aaa 100755 --- a/codex/bundle/capture.js +++ b/codex/bundle/capture.js @@ -241,25 +241,28 @@ Rules: // dist/src/hooks/memory-facts.js import { randomUUID as randomUUID3 } from "node:crypto"; -var MEMORY_FACT_PROMPT_TEMPLATE = `You are extracting durable long-term memory facts from a session summary. +var MEMORY_FACT_PROMPT_TEMPLATE = `You are extracting durable long-term memory facts from raw session transcript rows. SESSION ID: __SESSION_ID__ SOURCE PATH: __SOURCE_PATH__ PROJECT: __PROJECT__ -SUMMARY MARKDOWN: -__SUMMARY_TEXT__ +TRANSCRIPT ROWS: +__TRANSCRIPT_TEXT__ Return ONLY valid JSON with this exact shape: {"facts":[{"subject":"canonical entity","subject_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","subject_aliases":["optional alias"],"predicate":"snake_case_relation","object":"canonical object text","object_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","object_aliases":["optional alias"],"summary":"short factual claim","evidence":"short supporting phrase","confidence":0.0,"valid_at":"optional date/time text","valid_from":"optional date/time text","valid_to":"optional date/time text"}]} Rules: +- The transcript rows are the only source of truth for this extraction. Do not rely on summaries or inferred rewrites. - Extract atomic facts that are useful for later recall. One durable claim per fact. - Prefer canonical names for repeated people, organizations, places, projects, tools, and artifacts. - Use relation-style predicates such as works_on, home_country, relationship_status, prefers, plans, decided_to_pursue, located_in, uses_tool, recommended, supports, owns, read, attends, moved_from, moved_to. -- Facts should preserve temporal history instead of overwriting it. If the summary says something changed, emit the new fact and include timing in valid_at / valid_from / valid_to when the summary supports it. -- Include assistant-confirmed or tool-confirmed actions when they are stated as completed facts in the summary. -- Do not invent facts that are not supported by the summary. +- Facts should preserve temporal history instead of overwriting it. If the transcript says something changed, emit the new fact and include timing in valid_at / valid_from / valid_to when the transcript supports it. +- Include assistant-confirmed or tool-confirmed actions when they are stated as completed facts in the transcript. +- If a speaker explicitly self-identifies or states a status, preserve that exact label instead of broadening it. +- Preserve exact named places, titles, organizations, and relative time phrases when they are the stated fact. +- Do not invent facts that are not supported by the transcript. - Avoid duplicates or near-duplicates. If two facts say the same thing, keep the more specific one. - Return no markdown, no prose, no code fences, only JSON.`; diff --git a/codex/bundle/pre-tool-use.js b/codex/bundle/pre-tool-use.js index 7502520..7bc0074 100755 --- a/codex/bundle/pre-tool-use.js +++ b/codex/bundle/pre-tool-use.js @@ -38,12 +38,12 @@ function loadConfig() { return null; } } - const env = process.env; - if (!env.HIVEMIND_TOKEN && env.DEEPLAKE_TOKEN) { + const env2 = process.env; + if (!env2.HIVEMIND_TOKEN && env2.DEEPLAKE_TOKEN) { process.stderr.write("[hivemind] DEEPLAKE_* env vars are deprecated; use HIVEMIND_* instead\n"); } - const token = env.HIVEMIND_TOKEN ?? env.DEEPLAKE_TOKEN ?? creds?.token; - const orgId = env.HIVEMIND_ORG_ID ?? env.DEEPLAKE_ORG_ID ?? creds?.orgId; + const token = env2.HIVEMIND_TOKEN ?? env2.DEEPLAKE_TOKEN ?? creds?.token; + const orgId = env2.HIVEMIND_ORG_ID ?? env2.DEEPLAKE_ORG_ID ?? creds?.orgId; if (!token || !orgId) return null; return { @@ -51,16 +51,16 @@ function loadConfig() { orgId, orgName: creds?.orgName ?? orgId, userName: creds?.userName || userInfo().username || "unknown", - workspaceId: env.HIVEMIND_WORKSPACE_ID ?? env.DEEPLAKE_WORKSPACE_ID ?? creds?.workspaceId ?? "default", - apiUrl: env.HIVEMIND_API_URL ?? env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", - tableName: env.HIVEMIND_TABLE ?? env.DEEPLAKE_TABLE ?? "memory", - sessionsTableName: env.HIVEMIND_SESSIONS_TABLE ?? env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", - graphNodesTableName: env.HIVEMIND_GRAPH_NODES_TABLE ?? env.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", - graphEdgesTableName: env.HIVEMIND_GRAPH_EDGES_TABLE ?? env.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", - factsTableName: env.HIVEMIND_FACTS_TABLE ?? env.DEEPLAKE_FACTS_TABLE ?? "memory_facts", - entitiesTableName: env.HIVEMIND_ENTITIES_TABLE ?? env.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", - factEntityLinksTableName: env.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", - memoryPath: env.HIVEMIND_MEMORY_PATH ?? env.DEEPLAKE_MEMORY_PATH ?? join(home, ".deeplake", "memory") + workspaceId: env2.HIVEMIND_WORKSPACE_ID ?? env2.DEEPLAKE_WORKSPACE_ID ?? creds?.workspaceId ?? "default", + apiUrl: env2.HIVEMIND_API_URL ?? env2.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", + tableName: env2.HIVEMIND_TABLE ?? env2.DEEPLAKE_TABLE ?? "memory", + sessionsTableName: env2.HIVEMIND_SESSIONS_TABLE ?? env2.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + graphNodesTableName: env2.HIVEMIND_GRAPH_NODES_TABLE ?? env2.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", + graphEdgesTableName: env2.HIVEMIND_GRAPH_EDGES_TABLE ?? env2.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", + factsTableName: env2.HIVEMIND_FACTS_TABLE ?? env2.DEEPLAKE_FACTS_TABLE ?? "memory_facts", + entitiesTableName: env2.HIVEMIND_ENTITIES_TABLE ?? env2.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", + factEntityLinksTableName: env2.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env2.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", + memoryPath: env2.HIVEMIND_MEMORY_PATH ?? env2.DEEPLAKE_MEMORY_PATH ?? join(home, ".deeplake", "memory") }; } @@ -674,11 +674,144 @@ var DeeplakeApi = class { } }; +// dist/src/embeddings/harrier.js +import { AutoModel, AutoTokenizer, LogLevel, env } from "@huggingface/transformers"; +var DEFAULT_MODEL_ID = "onnx-community/harrier-oss-v1-0.6b-ONNX"; +var DEFAULT_DOCUMENT_BATCH_SIZE = 8; +var DEFAULT_MAX_LENGTH = 32768; +function toNumber(value) { + return typeof value === "bigint" ? Number(value) : Number(value ?? 0); +} +function tensorToRows(tensor) { + const [batchSize, width] = tensor.dims; + const rows = []; + for (let batchIndex = 0; batchIndex < batchSize; batchIndex++) { + const offset = batchIndex * width; + const row = []; + for (let hiddenIndex = 0; hiddenIndex < width; hiddenIndex++) { + row.push(Number(tensor.data[offset + hiddenIndex] ?? 0)); + } + rows.push(row); + } + return rows; +} +function l2Normalize(rows) { + return rows.map((row) => { + let sumSquares = 0; + for (const value of row) + sumSquares += value * value; + const norm = Math.sqrt(sumSquares) || 1; + return row.map((value) => value / norm); + }); +} +function lastTokenPool(outputs, attentionMask) { + const [batchSize, sequenceLength, hiddenSize] = outputs.dims; + const rows = []; + const maskData = attentionMask.data; + const hiddenData = outputs.data; + for (let batchIndex = 0; batchIndex < batchSize; batchIndex++) { + let lastTokenIndex = sequenceLength - 1; + for (let tokenIndex = sequenceLength - 1; tokenIndex >= 0; tokenIndex--) { + const maskOffset = batchIndex * sequenceLength + tokenIndex; + if (toNumber(maskData[maskOffset]) > 0) { + lastTokenIndex = tokenIndex; + break; + } + } + const row = []; + const hiddenOffset = (batchIndex * sequenceLength + lastTokenIndex) * hiddenSize; + for (let hiddenIndex = 0; hiddenIndex < hiddenSize; hiddenIndex++) { + row.push(Number(hiddenData[hiddenOffset + hiddenIndex] ?? 0)); + } + rows.push(row); + } + return rows; +} +function formatQuery(task, query) { + return `Instruct: ${task} +Query: ${query}`; +} +var HarrierEmbedder = class { + modelId; + tokenizerPromise = null; + modelPromise = null; + options; + constructor(options = {}) { + this.modelId = options.modelId ?? DEFAULT_MODEL_ID; + this.options = { + ...options, + maxLength: options.maxLength ?? DEFAULT_MAX_LENGTH, + batchSize: options.batchSize ?? DEFAULT_DOCUMENT_BATCH_SIZE + }; + if (options.cacheDir) + env.cacheDir = options.cacheDir; + if (options.localModelPath) + env.localModelPath = options.localModelPath; + env.logLevel = LogLevel.ERROR; + } + async embedDocuments(texts) { + return this.embedInternal(texts); + } + async embedQueries(texts, options = {}) { + const task = options.task ?? "Given a user query, retrieve relevant memory rows and session events"; + return this.embedInternal(texts.map((text) => formatQuery(task, text))); + } + async load() { + if (!this.tokenizerPromise) { + this.tokenizerPromise = AutoTokenizer.from_pretrained(this.modelId, { + local_files_only: this.options.localFilesOnly + }); + } + if (!this.modelPromise) { + this.modelPromise = AutoModel.from_pretrained(this.modelId, { + local_files_only: this.options.localFilesOnly, + device: this.options.device ?? "cpu", + dtype: this.options.dtype + }); + } + const [tokenizer, model] = await Promise.all([this.tokenizerPromise, this.modelPromise]); + return { tokenizer, model }; + } + async embedInternal(texts) { + if (texts.length === 0) + return []; + const { tokenizer, model } = await this.load(); + const rows = []; + for (let start = 0; start < texts.length; start += this.options.batchSize) { + const batch = texts.slice(start, start + this.options.batchSize); + const inputs = tokenizer(batch, { + padding: true, + truncation: true, + max_length: this.options.maxLength + }); + const outputs = await model(inputs); + const sentenceEmbedding = outputs["sentence_embedding"]; + if (sentenceEmbedding && typeof sentenceEmbedding === "object" && sentenceEmbedding !== null) { + rows.push(...l2Normalize(tensorToRows(sentenceEmbedding))); + continue; + } + const lastHiddenState = outputs["last_hidden_state"]; + const attentionMask = inputs["attention_mask"]; + if (!lastHiddenState || typeof lastHiddenState !== "object" || !attentionMask || typeof attentionMask !== "object") { + throw new Error(`Harrier model "${this.modelId}" did not return a usable embedding tensor`); + } + rows.push(...l2Normalize(lastTokenPool(lastHiddenState, attentionMask))); + } + return rows; + } +}; + // dist/src/utils/retrieval-mode.js function isSessionsOnlyMode() { const raw = process.env["HIVEMIND_SESSIONS_ONLY"] ?? process.env["DEEPLAKE_SESSIONS_ONLY"] ?? ""; return /^(1|true|yes|on)$/i.test(raw.trim()); } +function getGrepRetrievalMode() { + const raw = (process.env["HIVEMIND_GREP_RETRIEVAL_MODE"] ?? process.env["DEEPLAKE_GREP_RETRIEVAL_MODE"] ?? "").trim().toLowerCase(); + if (raw === "embedding" || raw === "hybrid") + return raw; + return "classic"; +} function isIndexDisabled() { const raw = process.env["HIVEMIND_DISABLE_INDEX"] ?? process.env["DEEPLAKE_DISABLE_INDEX"] ?? ""; return /^(1|true|yes|on)$/i.test(raw.trim()); @@ -691,9 +824,56 @@ function isPsqlMode() { const raw = process.env["HIVEMIND_PSQL_MODE"] ?? process.env["DEEPLAKE_PSQL_MODE"] ?? ""; return /^(1|true|yes|on)$/i.test(raw.trim()); } +function isFactsSessionsOnlyPsqlMode() { + const raw = process.env["HIVEMIND_PSQL_FACTS_SESSIONS_ONLY"] ?? process.env["DEEPLAKE_PSQL_FACTS_SESSIONS_ONLY"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} // dist/src/shell/grep-core.js var DEFAULT_GREP_CANDIDATE_LIMIT = Number(process.env["HIVEMIND_GREP_LIMIT"] ?? process.env["DEEPLAKE_GREP_LIMIT"] ?? 500); +var DEFAULT_EMBED_RETRIEVAL_MODEL_ID = "onnx-community/harrier-oss-v1-270m-ONNX"; +var DEFAULT_HYBRID_VECTOR_WEIGHT = 0.7; +var DEFAULT_HYBRID_TEXT_WEIGHT = 0.3; +var retrievalEmbedder = null; +function envString(...names) { + for (const name of names) { + const value = process.env[name]?.trim(); + if (value) + return value; + } + return void 0; +} +function envFlag(...names) { + const raw = envString(...names) ?? ""; + return /^(1|true|yes|on)$/i.test(raw); +} +function envNumber(fallback, ...names) { + const raw = envString(...names); + const parsed = Number(raw); + return Number.isFinite(parsed) ? parsed : fallback; +} +function getRetrievalEmbedder() { + if (!retrievalEmbedder) { + retrievalEmbedder = new HarrierEmbedder({ + modelId: envString("HIVEMIND_EMBED_RETRIEVAL_MODEL_ID", "DEEPLAKE_EMBED_RETRIEVAL_MODEL_ID", "HIVEMIND_HARRIER_MODEL_ID", "DEEPLAKE_HARRIER_MODEL_ID") ?? DEFAULT_EMBED_RETRIEVAL_MODEL_ID, + device: envString("HIVEMIND_EMBED_RETRIEVAL_DEVICE", "DEEPLAKE_EMBED_RETRIEVAL_DEVICE") ?? "cpu", + dtype: envString("HIVEMIND_EMBED_RETRIEVAL_DTYPE", "DEEPLAKE_EMBED_RETRIEVAL_DTYPE"), + cacheDir: envString("HIVEMIND_EMBED_RETRIEVAL_CACHE_DIR", "DEEPLAKE_EMBED_RETRIEVAL_CACHE_DIR"), + localModelPath: envString("HIVEMIND_EMBED_RETRIEVAL_LOCAL_MODEL_PATH", "DEEPLAKE_EMBED_RETRIEVAL_LOCAL_MODEL_PATH"), + localFilesOnly: envFlag("HIVEMIND_EMBED_RETRIEVAL_LOCAL_FILES_ONLY", "DEEPLAKE_EMBED_RETRIEVAL_LOCAL_FILES_ONLY") + }); + } + return retrievalEmbedder; +} +function sqlFloat4Array(values) { + if (values.length === 0) + throw new Error("Query embedding is empty"); + return `ARRAY[${values.map((value) => { + if (!Number.isFinite(value)) + throw new Error("Query embedding contains non-finite values"); + return Math.fround(value).toString(); + }).join(", ")}]::float4[]`; +} function escapeRegexLiteral(value) { return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); } @@ -908,7 +1088,7 @@ function buildPathCondition(targetPath) { return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%')`; } async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { - const { pathFilter, contentScanOnly, likeOp, escapedPattern, regexPattern, prefilterPattern, prefilterPatterns, bm25QueryText } = opts; + const { pathFilter, contentScanOnly, likeOp, escapedPattern, regexPattern, prefilterPattern, prefilterPatterns, queryText, bm25QueryText } = opts; const limit = opts.limit ?? DEFAULT_GREP_CANDIDATE_LIMIT; const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : [escapedPattern]; const ignoreCase = likeOp === "ILIKE"; @@ -920,7 +1100,11 @@ async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { const fallbackSessFilter = likeSessFilter; const hasSqlRegexFilter = Boolean(regexMemFilter || regexSessFilter); const sessionsOnly = isSessionsOnlyMode(); - const useSummaryBm25 = !sessionsOnly && !isSummaryBm25Disabled() && Boolean(bm25QueryText); + const retrievalMode = getGrepRetrievalMode(); + const semanticQueryText = (queryText ?? bm25QueryText ?? "").trim(); + const useEmbeddingRetrieval = retrievalMode === "embedding" && semanticQueryText.length > 0; + const useHybridRetrieval = retrievalMode === "hybrid" && semanticQueryText.length > 0; + const useSummaryBm25 = retrievalMode === "classic" && !sessionsOnly && !isSummaryBm25Disabled() && Boolean(bm25QueryText); const shouldUseFallbackCapablePrimary = useSummaryBm25 || hasSqlRegexFilter; const ensureSummaryBm25Index = api.ensureSummaryBm25Index; if (useSummaryBm25 && typeof ensureSummaryBm25Index === "function") { @@ -932,6 +1116,25 @@ async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { const sessQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; return sessionsOnly ? `SELECT path, content, source_order, creation_date FROM (${sessQuery}) AS combined ORDER BY path, source_order, creation_date` : `SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`; }; + if (useEmbeddingRetrieval || useHybridRetrieval) { + const embedder = getRetrievalEmbedder(); + const [queryEmbedding] = await embedder.embedQueries([semanticQueryText]); + if (!queryEmbedding) + throw new Error("Failed to build query embedding"); + const queryVectorSql = sqlFloat4Array(queryEmbedding); + const vectorWeight = envNumber(DEFAULT_HYBRID_VECTOR_WEIGHT, "HIVEMIND_HYBRID_VECTOR_WEIGHT", "DEEPLAKE_HYBRID_VECTOR_WEIGHT"); + const textWeight = envNumber(DEFAULT_HYBRID_TEXT_WEIGHT, "HIVEMIND_HYBRID_TEXT_WEIGHT", "DEEPLAKE_HYBRID_TEXT_WEIGHT"); + const buildSemanticCombinedQuery = () => { + const memQuery = useHybridRetrieval ? buildHybridSimilarityQuery(memoryTable, pathFilter, "summary::text", 0, "''", queryVectorSql, semanticQueryText, vectorWeight, textWeight, limit) : buildEmbeddingSimilarityQuery(memoryTable, pathFilter, "summary::text", 0, "''", queryVectorSql, limit); + const sessQuery = useHybridRetrieval ? buildHybridSimilarityQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", queryVectorSql, semanticQueryText, vectorWeight, textWeight, limit) : buildEmbeddingSimilarityQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", queryVectorSql, limit); + return sessionsOnly ? `SELECT path, content, source_order, creation_date FROM (${sessQuery}) AS combined ORDER BY path, source_order, creation_date` : `SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`; + }; + const rows2 = await api.query(buildSemanticCombinedQuery()); + return rows2.map((row) => ({ + path: String(row["path"]), + content: String(row["content"] ?? "") + })); + } const primaryMemFilter = useSummaryBm25 ? "" : `${likeMemFilter}${regexMemFilter}`; const primaryQuery = buildCombinedQuery(primaryMemFilter, primarySessFilter, useSummaryBm25); const fallbackQuery = buildCombinedQuery(likeMemFilter, fallbackSessFilter, false); @@ -1026,6 +1229,7 @@ function buildGrepSearchOptions(params, targetPath) { const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(normalizedPattern) : null; const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(normalizedPattern) : null; const bm25QueryText = buildSummaryBm25QueryText(normalizedPattern, params.fixedString, literalPrefilter, alternationPrefilters); + const queryText = (bm25QueryText ?? normalizedPattern.trim()) || void 0; const regexBase = params.fixedString ? escapeRegexLiteral(normalizedPattern) : normalizedPattern; const sqlRegexPattern = params.wordMatch ? `\\b(?:${regexBase})\\b` : hasRegexMeta ? regexBase : void 0; return { @@ -1036,6 +1240,7 @@ function buildGrepSearchOptions(params, targetPath) { regexPattern: sqlRegexPattern, prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : void 0, prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)), + queryText, bm25QueryText: bm25QueryText ?? void 0, limit: DEFAULT_GREP_CANDIDATE_LIMIT }; @@ -1059,6 +1264,12 @@ function buildRegexFilter(column, pattern, ignoreCase) { function buildSummaryBm25Query(memoryTable, pathFilter, queryText, limit) { return `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter} ORDER BY (summary <#> '${sqlStr(queryText)}') DESC LIMIT ${limit}`; } +function buildEmbeddingSimilarityQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryVectorSql, limit) { + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date FROM "${tableName}" WHERE 1=1${pathFilter} AND embedding IS NOT NULL ORDER BY (embedding <#> ${queryVectorSql}) DESC LIMIT ${limit}`; +} +function buildHybridSimilarityQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryVectorSql, queryText, vectorWeight, textWeight, limit) { + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date FROM "${tableName}" WHERE 1=1${pathFilter} AND embedding IS NOT NULL ORDER BY (((embedding, ${contentExpr})::deeplake_hybrid_record) <#> deeplake_hybrid_record(${queryVectorSql}, '${sqlStr(queryText)}', ${vectorWeight}, ${textWeight})) DESC LIMIT ${limit}`; +} function toSqlRegexPattern(pattern, ignoreCase) { if (!pattern) return null; @@ -1831,6 +2042,49 @@ function dedupeRowsByPath(rows) { } // dist/src/hooks/bash-command-compiler.js +var DEFAULT_EMBED_RETRIEVAL_MODEL_ID2 = "onnx-community/harrier-oss-v1-270m-ONNX"; +var DEFAULT_HYBRID_VECTOR_WEIGHT2 = 0.7; +var DEFAULT_HYBRID_TEXT_WEIGHT2 = 0.3; +var summaryRetrievalEmbedder = null; +function envString2(...names) { + for (const name of names) { + const value = process.env[name]?.trim(); + if (value) + return value; + } + return void 0; +} +function envFlag2(...names) { + const raw = envString2(...names) ?? ""; + return /^(1|true|yes|on)$/i.test(raw); +} +function envNumber2(fallback, ...names) { + const raw = envString2(...names); + const parsed = Number(raw); + return Number.isFinite(parsed) ? parsed : fallback; +} +function getSummaryRetrievalEmbedder() { + if (!summaryRetrievalEmbedder) { + summaryRetrievalEmbedder = new HarrierEmbedder({ + modelId: envString2("HIVEMIND_EMBED_RETRIEVAL_MODEL_ID", "DEEPLAKE_EMBED_RETRIEVAL_MODEL_ID", "HIVEMIND_HARRIER_MODEL_ID", "DEEPLAKE_HARRIER_MODEL_ID") ?? DEFAULT_EMBED_RETRIEVAL_MODEL_ID2, + device: envString2("HIVEMIND_EMBED_RETRIEVAL_DEVICE", "DEEPLAKE_EMBED_RETRIEVAL_DEVICE") ?? "cpu", + dtype: envString2("HIVEMIND_EMBED_RETRIEVAL_DTYPE", "DEEPLAKE_EMBED_RETRIEVAL_DTYPE"), + cacheDir: envString2("HIVEMIND_EMBED_RETRIEVAL_CACHE_DIR", "DEEPLAKE_EMBED_RETRIEVAL_CACHE_DIR"), + localModelPath: envString2("HIVEMIND_EMBED_RETRIEVAL_LOCAL_MODEL_PATH", "DEEPLAKE_EMBED_RETRIEVAL_LOCAL_MODEL_PATH"), + localFilesOnly: envFlag2("HIVEMIND_EMBED_RETRIEVAL_LOCAL_FILES_ONLY", "DEEPLAKE_EMBED_RETRIEVAL_LOCAL_FILES_ONLY") + }); + } + return summaryRetrievalEmbedder; +} +function sqlFloat4Array2(values) { + if (values.length === 0) + throw new Error("Query embedding is empty"); + return `ARRAY[${values.map((value) => { + if (!Number.isFinite(value)) + throw new Error("Query embedding contains non-finite values"); + return Math.fround(value).toString(); + }).join(", ")}]::float4[]`; +} function isQuoted(ch) { return ch === "'" || ch === '"'; } @@ -2069,22 +2323,64 @@ function extractPsqlQueryFromCommand(cmd) { function normalizeSqlRef(ref) { return ref.replace(/\s+/g, "").replace(/"/g, "").toLowerCase(); } -var INTERCEPTED_SQL_REFS = /* @__PURE__ */ new Set([ - "memory", - "sessions", - "graph_nodes", - "graph_edges", - "memory_facts", - "memory_entities", - "fact_entity_links", - "hivemind.memory", - "hivemind.sessions", - "hivemind.graph_nodes", - "hivemind.graph_edges", - "hivemind.memory_facts", - "hivemind.memory_entities", - "hivemind.fact_entity_links" -]); +function deriveSiblingTableName(tableName, expectedBase, targetBase) { + if (tableName === expectedBase) + return null; + if (!tableName.startsWith(expectedBase)) + return null; + return `${targetBase}${tableName.slice(expectedBase.length)}`; +} +function resolveInterceptedTableNames(memoryTable, sessionsTable) { + const memoryDerived = { + graphNodesTable: deriveSiblingTableName(memoryTable, "memory", "graph_nodes"), + graphEdgesTable: deriveSiblingTableName(memoryTable, "memory", "graph_edges"), + factsTable: deriveSiblingTableName(memoryTable, "memory", "memory_facts"), + entitiesTable: deriveSiblingTableName(memoryTable, "memory", "memory_entities"), + factEntityLinksTable: deriveSiblingTableName(memoryTable, "memory", "fact_entity_links") + }; + const sessionsDerived = { + factsTable: deriveSiblingTableName(sessionsTable, "sessions", "memory_facts"), + entitiesTable: deriveSiblingTableName(sessionsTable, "sessions", "memory_entities"), + factEntityLinksTable: deriveSiblingTableName(sessionsTable, "sessions", "fact_entity_links") + }; + return { + graphNodesTable: process.env["HIVEMIND_GRAPH_NODES_TABLE"] ?? process.env["DEEPLAKE_GRAPH_NODES_TABLE"] ?? memoryDerived.graphNodesTable ?? "graph_nodes", + graphEdgesTable: process.env["HIVEMIND_GRAPH_EDGES_TABLE"] ?? process.env["DEEPLAKE_GRAPH_EDGES_TABLE"] ?? memoryDerived.graphEdgesTable ?? "graph_edges", + factsTable: process.env["HIVEMIND_FACTS_TABLE"] ?? process.env["DEEPLAKE_FACTS_TABLE"] ?? memoryDerived.factsTable ?? sessionsDerived.factsTable ?? "memory_facts", + entitiesTable: process.env["HIVEMIND_ENTITIES_TABLE"] ?? process.env["DEEPLAKE_ENTITIES_TABLE"] ?? memoryDerived.entitiesTable ?? sessionsDerived.entitiesTable ?? "memory_entities", + factEntityLinksTable: process.env["HIVEMIND_FACT_ENTITY_LINKS_TABLE"] ?? process.env["DEEPLAKE_FACT_ENTITY_LINKS_TABLE"] ?? memoryDerived.factEntityLinksTable ?? sessionsDerived.factEntityLinksTable ?? "fact_entity_links" + }; +} +function getInterceptedSqlRefs() { + if (isFactsSessionsOnlyPsqlMode()) { + return /* @__PURE__ */ new Set([ + "sessions", + "memory_facts", + "memory_entities", + "fact_entity_links", + "hivemind.sessions", + "hivemind.memory_facts", + "hivemind.memory_entities", + "hivemind.fact_entity_links" + ]); + } + return /* @__PURE__ */ new Set([ + "memory", + "sessions", + "graph_nodes", + "graph_edges", + "memory_facts", + "memory_entities", + "fact_entity_links", + "hivemind.memory", + "hivemind.sessions", + "hivemind.graph_nodes", + "hivemind.graph_edges", + "hivemind.memory_facts", + "hivemind.memory_entities", + "hivemind.fact_entity_links" + ]); +} function extractSqlTableRefs(query) { const refs = []; const regex = /\b(?:from|join)\s+((?:"[^"]+"|[a-zA-Z_][a-zA-Z0-9_]*)(?:\s*\.\s*(?:"[^"]+"|[a-zA-Z_][a-zA-Z0-9_]*))?)/gi; @@ -2095,11 +2391,13 @@ function extractSqlTableRefs(query) { return refs; } function queryReferencesInterceptedTables(query) { - return extractSqlTableRefs(query).some((ref) => INTERCEPTED_SQL_REFS.has(ref)); + const interceptedRefs = getInterceptedSqlRefs(); + return extractSqlTableRefs(query).some((ref) => interceptedRefs.has(ref)); } function queryUsesOnlyInterceptedTables(query) { const refs = extractSqlTableRefs(query); - return refs.length > 0 && refs.every((ref) => INTERCEPTED_SQL_REFS.has(ref)); + const interceptedRefs = getInterceptedSqlRefs(); + return refs.length > 0 && refs.every((ref) => interceptedRefs.has(ref)); } function parsePsqlSegment(pipeline, tokens) { if (tokens[0] !== "psql" || !isPsqlMode()) @@ -2145,13 +2443,19 @@ function parsePsqlSegment(pipeline, tokens) { } return { kind: "psql", query, lineLimit, tuplesOnly, fieldSeparator }; } -function normalizePsqlQuery(query, memoryTable, sessionsTable, graphNodesTable = process.env["HIVEMIND_GRAPH_NODES_TABLE"] ?? "graph_nodes", graphEdgesTable = process.env["HIVEMIND_GRAPH_EDGES_TABLE"] ?? "graph_edges", factsTable = process.env["HIVEMIND_FACTS_TABLE"] ?? "memory_facts", entitiesTable = process.env["HIVEMIND_ENTITIES_TABLE"] ?? "memory_entities", factEntityLinksTable = process.env["HIVEMIND_FACT_ENTITY_LINKS_TABLE"] ?? "fact_entity_links") { +function normalizePsqlQuery(query, memoryTable, sessionsTable, graphNodesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphNodesTable, graphEdgesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphEdgesTable, factsTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factsTable, entitiesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).entitiesTable, factEntityLinksTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factEntityLinksTable) { let sql = query.trim().replace(/;+\s*$/, ""); - sql = sql.replace(/\bFROM\s+"?memory"?\b/gi, `FROM "${memoryTable}"`).replace(/\bJOIN\s+"?memory"?\b/gi, `JOIN "${memoryTable}"`).replace(/\bFROM\s+"?sessions"?\b/gi, `FROM "${sessionsTable}"`).replace(/\bJOIN\s+"?sessions"?\b/gi, `JOIN "${sessionsTable}"`).replace(/\bFROM\s+"?graph_nodes"?\b/gi, `FROM "${graphNodesTable}"`).replace(/\bJOIN\s+"?graph_nodes"?\b/gi, `JOIN "${graphNodesTable}"`).replace(/\bFROM\s+"?graph_edges"?\b/gi, `FROM "${graphEdgesTable}"`).replace(/\bJOIN\s+"?graph_edges"?\b/gi, `JOIN "${graphEdgesTable}"`).replace(/\bFROM\s+"?memory_facts"?\b/gi, `FROM "${factsTable}"`).replace(/\bJOIN\s+"?memory_facts"?\b/gi, `JOIN "${factsTable}"`).replace(/\bFROM\s+"?memory_entities"?\b/gi, `FROM "${entitiesTable}"`).replace(/\bJOIN\s+"?memory_entities"?\b/gi, `JOIN "${entitiesTable}"`).replace(/\bFROM\s+"?fact_entity_links"?\b/gi, `FROM "${factEntityLinksTable}"`).replace(/\bJOIN\s+"?fact_entity_links"?\b/gi, `JOIN "${factEntityLinksTable}"`).replace(/\bFROM\s+"?hivemind"?\."?memory"?\b/gi, `FROM "${memoryTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?memory"?\b/gi, `JOIN "${memoryTable}"`).replace(/\bFROM\s+"?hivemind"?\."?sessions"?\b/gi, `FROM "${sessionsTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?sessions"?\b/gi, `JOIN "${sessionsTable}"`).replace(/\bFROM\s+"?hivemind"?\."?graph_nodes"?\b/gi, `FROM "${graphNodesTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?graph_nodes"?\b/gi, `JOIN "${graphNodesTable}"`).replace(/\bFROM\s+"?hivemind"?\."?graph_edges"?\b/gi, `FROM "${graphEdgesTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?graph_edges"?\b/gi, `JOIN "${graphEdgesTable}"`).replace(/\bFROM\s+"?hivemind"?\."?memory_facts"?\b/gi, `FROM "${factsTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?memory_facts"?\b/gi, `JOIN "${factsTable}"`).replace(/\bFROM\s+"?hivemind"?\."?memory_entities"?\b/gi, `FROM "${entitiesTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?memory_entities"?\b/gi, `JOIN "${entitiesTable}"`).replace(/\bFROM\s+"?hivemind"?\."?fact_entity_links"?\b/gi, `FROM "${factEntityLinksTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?fact_entity_links"?\b/gi, `JOIN "${factEntityLinksTable}"`); + sql = sql.replace(/\bFROM\s+"?sessions"?\b/gi, `FROM "${sessionsTable}"`).replace(/\bJOIN\s+"?sessions"?\b/gi, `JOIN "${sessionsTable}"`).replace(/\bFROM\s+"?memory_facts"?\b/gi, `FROM "${factsTable}"`).replace(/\bJOIN\s+"?memory_facts"?\b/gi, `JOIN "${factsTable}"`).replace(/\bFROM\s+"?memory_entities"?\b/gi, `FROM "${entitiesTable}"`).replace(/\bJOIN\s+"?memory_entities"?\b/gi, `JOIN "${entitiesTable}"`).replace(/\bFROM\s+"?fact_entity_links"?\b/gi, `FROM "${factEntityLinksTable}"`).replace(/\bJOIN\s+"?fact_entity_links"?\b/gi, `JOIN "${factEntityLinksTable}"`).replace(/\bFROM\s+"?hivemind"?\."?sessions"?\b/gi, `FROM "${sessionsTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?sessions"?\b/gi, `JOIN "${sessionsTable}"`).replace(/\bFROM\s+"?hivemind"?\."?memory_facts"?\b/gi, `FROM "${factsTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?memory_facts"?\b/gi, `JOIN "${factsTable}"`).replace(/\bFROM\s+"?hivemind"?\."?memory_entities"?\b/gi, `FROM "${entitiesTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?memory_entities"?\b/gi, `JOIN "${entitiesTable}"`).replace(/\bFROM\s+"?hivemind"?\."?fact_entity_links"?\b/gi, `FROM "${factEntityLinksTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?fact_entity_links"?\b/gi, `JOIN "${factEntityLinksTable}"`); + if (!isFactsSessionsOnlyPsqlMode()) { + sql = sql.replace(/\bFROM\s+"?memory"?\b/gi, `FROM "${memoryTable}"`).replace(/\bJOIN\s+"?memory"?\b/gi, `JOIN "${memoryTable}"`).replace(/\bFROM\s+"?graph_nodes"?\b/gi, `FROM "${graphNodesTable}"`).replace(/\bJOIN\s+"?graph_nodes"?\b/gi, `JOIN "${graphNodesTable}"`).replace(/\bFROM\s+"?graph_edges"?\b/gi, `FROM "${graphEdgesTable}"`).replace(/\bJOIN\s+"?graph_edges"?\b/gi, `JOIN "${graphEdgesTable}"`).replace(/\bFROM\s+"?hivemind"?\."?memory"?\b/gi, `FROM "${memoryTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?memory"?\b/gi, `JOIN "${memoryTable}"`).replace(/\bFROM\s+"?hivemind"?\."?graph_nodes"?\b/gi, `FROM "${graphNodesTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?graph_nodes"?\b/gi, `JOIN "${graphNodesTable}"`).replace(/\bFROM\s+"?hivemind"?\."?graph_edges"?\b/gi, `FROM "${graphEdgesTable}"`).replace(/\bJOIN\s+"?hivemind"?\."?graph_edges"?\b/gi, `JOIN "${graphEdgesTable}"`); + } return sql; } -function validatePsqlQuery(query, memoryTable, sessionsTable, graphNodesTable = process.env["HIVEMIND_GRAPH_NODES_TABLE"] ?? "graph_nodes", graphEdgesTable = process.env["HIVEMIND_GRAPH_EDGES_TABLE"] ?? "graph_edges", factsTable = process.env["HIVEMIND_FACTS_TABLE"] ?? "memory_facts", entitiesTable = process.env["HIVEMIND_ENTITIES_TABLE"] ?? "memory_entities", factEntityLinksTable = process.env["HIVEMIND_FACT_ENTITY_LINKS_TABLE"] ?? "fact_entity_links") { +function validatePsqlQuery(query, memoryTable, sessionsTable, graphNodesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphNodesTable, graphEdgesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphEdgesTable, factsTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factsTable, entitiesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).entitiesTable, factEntityLinksTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factEntityLinksTable) { if (!queryUsesOnlyInterceptedTables(query)) { + if (isFactsSessionsOnlyPsqlMode()) { + throw new Error("psql queries must reference only sessions, memory_facts, memory_entities, fact_entity_links, or their hivemind.* aliases"); + } throw new Error("psql queries must reference only memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, fact_entity_links, or their hivemind.* aliases"); } const sql = normalizePsqlQuery(query, memoryTable, sessionsTable, graphNodesTable, graphEdgesTable, factsTable, entitiesTable, factEntityLinksTable); @@ -2160,14 +2464,16 @@ function validatePsqlQuery(query, memoryTable, sessionsTable, graphNodesTable = throw new Error("psql mode only supports SELECT queries"); } const allowedTables = /* @__PURE__ */ new Set([ - memoryTable, sessionsTable, - graphNodesTable, - graphEdgesTable, factsTable, entitiesTable, factEntityLinksTable ]); + if (!isFactsSessionsOnlyPsqlMode()) { + allowedTables.add(memoryTable); + allowedTables.add(graphNodesTable); + allowedTables.add(graphEdgesTable); + } const tableMatches = [...compact.matchAll(/\b(?:from|join)\s+"?([a-zA-Z_][a-zA-Z0-9_]*)"?/gi)]; if (tableMatches.length === 0) { throw new Error("psql query must reference an intercepted hivemind memory table"); @@ -2257,6 +2563,140 @@ async function fetchGraphCandidates(api, graphNodesTable, graphEdgesTable, terms } return expanded; } +function splitDelimitedField(value) { + if (typeof value !== "string") + return []; + return value.split(",").map((item) => item.trim()).filter(Boolean); +} +function extractSessionIdFromPath(value) { + return value.match(/(conv_\d+_session_\d+)/)?.[1] ?? ""; +} +function extractSummarySourcePath(summary) { + return summary.match(/^- \*\*Source\*\*: (.+)$/m)?.[1]?.trim() ?? ""; +} +function addHybridCandidate(map, candidate) { + const sessionId = candidate.sessionId?.trim() ?? ""; + const sourcePath = candidate.sourcePath?.trim() ?? ""; + if (!sessionId && !sourcePath) + return; + const key = `${sessionId}@@${sourcePath}`; + const existing = map.get(key); + if (existing) { + existing.score += candidate.score; + existing.signals.add(candidate.signal); + return; + } + map.set(key, { + sessionId, + sourcePath, + score: candidate.score, + signals: /* @__PURE__ */ new Set([candidate.signal]) + }); +} +async function fetchEntityResolution(api, entitiesTable, terms) { + const filteredTerms = [...new Set(terms.map((term) => term.trim()).filter(Boolean))].slice(0, 4); + if (filteredTerms.length === 0) + return { entityIds: [], candidates: [] }; + const entityTerms = chooseEntityTerms(filteredTerms); + if (entityTerms.length === 0) + return { entityIds: [], candidates: [] }; + const phrase = sqlStr(filteredTerms.join(" ")); + const where = entityTerms.map((term) => `(canonical_name ILIKE '%${sqlLike(term)}%' OR aliases ILIKE '%${sqlLike(term)}%')`).join(" OR "); + const sql = `SELECT entity_id, source_session_ids, source_paths, search_text, search_text <#> '${phrase}' AS score FROM "${entitiesTable}" WHERE ${where} ORDER BY score ASC LIMIT 8`; + const rows = await api.query(sql); + const entityIds = []; + const candidateMap = /* @__PURE__ */ new Map(); + for (const row of rows) { + const entityId = typeof row["entity_id"] === "string" ? row["entity_id"] : ""; + if (entityId && !entityIds.includes(entityId)) + entityIds.push(entityId); + const sessionIds = splitDelimitedField(row["source_session_ids"]); + const sourcePaths = splitDelimitedField(row["source_paths"]); + const maxLen = Math.max(sessionIds.length, sourcePaths.length); + for (let i = 0; i < maxLen; i++) { + const sourcePath = sourcePaths[i] || (sessionIds[i] ? `/sessions/${sessionIds[i]}.json` : ""); + const sessionId = sessionIds[i] || extractSessionIdFromPath(sourcePath); + addHybridCandidate(candidateMap, { + sessionId, + sourcePath, + score: 1.2, + signal: "entity" + }); + } + } + return { entityIds, candidates: [...candidateMap.values()] }; +} +async function fetchFactCandidates(api, factsTable, terms, entityIds) { + const filteredTerms = [...new Set(terms.map((term) => term.trim()).filter(Boolean))].slice(0, 4); + if (filteredTerms.length === 0 && entityIds.length === 0) + return { entityIds: [], candidates: [] }; + const phrase = sqlStr(filteredTerms.join(" ")); + const entityTerms = chooseEntityTerms(filteredTerms); + const topicTerms = filteredTerms.filter((term) => !entityTerms.includes(term)); + const topicClauses = (topicTerms.length > 0 ? topicTerms : filteredTerms).map((term) => `(predicate ILIKE '%${sqlLike(term)}%' OR object_name ILIKE '%${sqlLike(term)}%' OR summary ILIKE '%${sqlLike(term)}%' OR search_text ILIKE '%${sqlLike(term)}%')`); + const entityFilter = entityIds.length > 0 ? `(subject_entity_id IN (${entityIds.map((id) => `'${sqlStr(id)}'`).join(", ")}) OR object_entity_id IN (${entityIds.map((id) => `'${sqlStr(id)}'`).join(", ")}))` : ""; + const whereParts = [ + entityFilter, + topicClauses.length > 0 ? `(${topicClauses.join(" OR ")})` : "" + ].filter(Boolean); + if (whereParts.length === 0) + return { entityIds: [], candidates: [] }; + const sql = `SELECT source_session_id, source_path, subject_entity_id, object_entity_id, search_text <#> '${phrase}' AS score FROM "${factsTable}" WHERE ${whereParts.join(" AND ")} ORDER BY score ASC LIMIT 16`; + const rows = await api.query(sql); + const relatedEntityIds = []; + const candidateMap = /* @__PURE__ */ new Map(); + for (const row of rows) { + for (const key of ["subject_entity_id", "object_entity_id"]) { + const value = typeof row[key] === "string" ? row[key] : ""; + if (value && !relatedEntityIds.includes(value)) + relatedEntityIds.push(value); + } + const sourcePath = typeof row["source_path"] === "string" ? row["source_path"] : ""; + const sessionId = typeof row["source_session_id"] === "string" ? row["source_session_id"] : extractSessionIdFromPath(sourcePath); + addHybridCandidate(candidateMap, { + sessionId, + sourcePath, + score: 2.6, + signal: "fact" + }); + } + return { entityIds: relatedEntityIds, candidates: [...candidateMap.values()] }; +} +async function fetchSummaryCandidates(api, memoryTable, terms) { + const filteredTerms = [...new Set(terms.map((term) => term.trim()).filter(Boolean))].slice(0, 4); + if (filteredTerms.length === 0) + return []; + const retrievalMode = getGrepRetrievalMode(); + const phrase = filteredTerms.join(" "); + const clauses = filteredTerms.map((term) => `summary ILIKE '%${sqlLike(term)}%'`); + let sql; + if (retrievalMode === "embedding" || retrievalMode === "hybrid") { + const embedder = getSummaryRetrievalEmbedder(); + const [queryEmbedding] = await embedder.embedQueries([phrase]); + if (!queryEmbedding) + return []; + const queryVectorSql = sqlFloat4Array2(queryEmbedding); + sql = retrievalMode === "hybrid" ? `SELECT path, summary, ((embedding, summary)::deeplake_hybrid_record <#> deeplake_hybrid_record(${queryVectorSql}, '${sqlStr(phrase)}', ${envNumber2(DEFAULT_HYBRID_VECTOR_WEIGHT2, "HIVEMIND_HYBRID_VECTOR_WEIGHT", "DEEPLAKE_HYBRID_VECTOR_WEIGHT")}, ${envNumber2(DEFAULT_HYBRID_TEXT_WEIGHT2, "HIVEMIND_HYBRID_TEXT_WEIGHT", "DEEPLAKE_HYBRID_TEXT_WEIGHT")})) AS score FROM "${memoryTable}" WHERE embedding IS NOT NULL ORDER BY score DESC LIMIT 8` : `SELECT path, summary, (embedding <#> ${queryVectorSql}) AS score FROM "${memoryTable}" WHERE embedding IS NOT NULL ORDER BY score DESC LIMIT 8`; + } else { + const phraseSql = sqlStr(phrase); + sql = `SELECT path, summary, summary <#> '${phraseSql}' AS score FROM "${memoryTable}" WHERE ${clauses.join(" OR ")} ORDER BY score ASC LIMIT 8`; + } + const rows = await api.query(sql); + const candidateMap = /* @__PURE__ */ new Map(); + for (const row of rows) { + const path = typeof row["path"] === "string" ? row["path"] : ""; + const summary = typeof row["summary"] === "string" ? row["summary"] : ""; + const sourcePath = extractSummarySourcePath(summary) || (extractSessionIdFromPath(path) ? `/sessions/${extractSessionIdFromPath(path)}.json` : ""); + const sessionId = extractSessionIdFromPath(path) || extractSessionIdFromPath(sourcePath); + addHybridCandidate(candidateMap, { + sessionId, + sourcePath, + score: 1.6, + signal: "summary" + }); + } + return [...candidateMap.values()]; +} function prependCtes(sql, ctes) { if (ctes.length === 0) return sql; @@ -2265,32 +2705,70 @@ function prependCtes(sql, ctes) { } return `WITH ${ctes.join(", ")} ${sql}`; } -function rewriteQueryWithRestrictedTables(sql, memoryTable, sessionsTable, restrictedMemoryAlias, restrictedSessionsAlias) { +function rewriteQueryWithRestrictedTables(sql, aliases) { let rewritten = sql; - if (restrictedMemoryAlias) { - const memoryPattern = escapeRegex2(memoryTable); - rewritten = rewritten.replace(new RegExp(`\\bFROM\\s+"?${memoryPattern}"?`, "gi"), `FROM "${restrictedMemoryAlias}"`).replace(new RegExp(`\\bJOIN\\s+"?${memoryPattern}"?`, "gi"), `JOIN "${restrictedMemoryAlias}"`); + if (aliases.restrictedMemoryAlias) { + const memoryPattern = escapeRegex2(aliases.memoryTable); + rewritten = rewritten.replace(new RegExp(`\\bFROM\\s+"?${memoryPattern}"?`, "gi"), `FROM "${aliases.restrictedMemoryAlias}"`).replace(new RegExp(`\\bJOIN\\s+"?${memoryPattern}"?`, "gi"), `JOIN "${aliases.restrictedMemoryAlias}"`); + } + if (aliases.restrictedSessionsAlias) { + const sessionsPattern = escapeRegex2(aliases.sessionsTable); + rewritten = rewritten.replace(new RegExp(`\\bFROM\\s+"?${sessionsPattern}"?`, "gi"), `FROM "${aliases.restrictedSessionsAlias}"`).replace(new RegExp(`\\bJOIN\\s+"?${sessionsPattern}"?`, "gi"), `JOIN "${aliases.restrictedSessionsAlias}"`); + } + if (aliases.restrictedFactsAlias) { + const factsPattern = escapeRegex2(aliases.factsTable); + rewritten = rewritten.replace(new RegExp(`\\bFROM\\s+"?${factsPattern}"?`, "gi"), `FROM "${aliases.restrictedFactsAlias}"`).replace(new RegExp(`\\bJOIN\\s+"?${factsPattern}"?`, "gi"), `JOIN "${aliases.restrictedFactsAlias}"`); } - if (restrictedSessionsAlias) { - const sessionsPattern = escapeRegex2(sessionsTable); - rewritten = rewritten.replace(new RegExp(`\\bFROM\\s+"?${sessionsPattern}"?`, "gi"), `FROM "${restrictedSessionsAlias}"`).replace(new RegExp(`\\bJOIN\\s+"?${sessionsPattern}"?`, "gi"), `JOIN "${restrictedSessionsAlias}"`); + if (aliases.restrictedEntitiesAlias) { + const entitiesPattern = escapeRegex2(aliases.entitiesTable); + rewritten = rewritten.replace(new RegExp(`\\bFROM\\s+"?${entitiesPattern}"?`, "gi"), `FROM "${aliases.restrictedEntitiesAlias}"`).replace(new RegExp(`\\bJOIN\\s+"?${entitiesPattern}"?`, "gi"), `JOIN "${aliases.restrictedEntitiesAlias}"`); + } + if (aliases.restrictedLinksAlias) { + const linksPattern = escapeRegex2(aliases.factEntityLinksTable); + rewritten = rewritten.replace(new RegExp(`\\bFROM\\s+"?${linksPattern}"?`, "gi"), `FROM "${aliases.restrictedLinksAlias}"`).replace(new RegExp(`\\bJOIN\\s+"?${linksPattern}"?`, "gi"), `JOIN "${aliases.restrictedLinksAlias}"`); } return rewritten; } -async function applyGraphRestrictionsToPsqlQuery(api, sql, memoryTable, sessionsTable, graphNodesTable, graphEdgesTable) { +async function applyGraphRestrictionsToPsqlQuery(api, sql, memoryTable, sessionsTable, graphNodesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphNodesTable, graphEdgesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphEdgesTable, factsTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factsTable, entitiesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).entitiesTable, factEntityLinksTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factEntityLinksTable) { + if (isFactsSessionsOnlyPsqlMode()) { + return sql; + } if (extractSqlTableRefs(sql).some((ref) => ref === normalizeSqlRef(graphNodesTable) || ref === normalizeSqlRef(graphEdgesTable))) { return sql; } const refs = extractSqlTableRefs(sql); const touchesMemory2 = refs.some((ref) => ref === normalizeSqlRef(memoryTable)); const touchesSessions = refs.some((ref) => ref === normalizeSqlRef(sessionsTable)); - if (!touchesMemory2 && !touchesSessions) + const touchesFacts = refs.some((ref) => ref === normalizeSqlRef(factsTable)); + const touchesEntities = refs.some((ref) => ref === normalizeSqlRef(entitiesTable)); + const touchesLinks = refs.some((ref) => ref === normalizeSqlRef(factEntityLinksTable)); + if (!touchesMemory2 && !touchesSessions && !touchesFacts && !touchesEntities && !touchesLinks) return sql; const terms = extractSqlSearchTerms(sql); if (terms.length === 0) return sql; - const candidates = await fetchGraphCandidates(api, graphNodesTable, graphEdgesTable, terms); - if (candidates.length === 0 || candidates.length > 16) + const candidateMap = /* @__PURE__ */ new Map(); + const graphCandidates = await fetchGraphCandidates(api, graphNodesTable, graphEdgesTable, terms); + for (const candidate of graphCandidates) { + addHybridCandidate(candidateMap, { ...candidate, score: 2, signal: "graph" }); + } + const entityResolution = await fetchEntityResolution(api, entitiesTable, terms); + for (const candidate of entityResolution.candidates) { + addHybridCandidate(candidateMap, { ...candidate, signal: "entity" }); + } + const factCandidates = await fetchFactCandidates(api, factsTable, terms, entityResolution.entityIds); + for (const candidate of factCandidates.candidates) { + addHybridCandidate(candidateMap, { ...candidate, signal: "fact" }); + } + const summaryCandidates = await fetchSummaryCandidates(api, memoryTable, terms); + for (const candidate of summaryCandidates) { + addHybridCandidate(candidateMap, { ...candidate, signal: "summary" }); + } + const candidateEntityIds = [.../* @__PURE__ */ new Set([...entityResolution.entityIds, ...factCandidates.entityIds])].slice(0, 12); + const candidates = [...candidateMap.values()].sort((a, b) => b.score - a.score || b.signals.size - a.signals.size).slice(0, 12); + if (candidates.length === 0) + return sql; + if (candidates.length > 16) return sql; const values = candidates.map((candidate) => `('${sqlStr(candidate.sessionId)}', '${sqlStr(candidate.sourcePath)}')`); const ctes = [ @@ -2298,6 +2776,12 @@ async function applyGraphRestrictionsToPsqlQuery(api, sql, memoryTable, sessions ]; let restrictedMemoryAlias = null; let restrictedSessionsAlias = null; + let restrictedFactsAlias = null; + let restrictedEntitiesAlias = null; + let restrictedLinksAlias = null; + if (candidateEntityIds.length > 0) { + ctes.push(`__hm_entity_candidates(entity_id) AS (VALUES ${candidateEntityIds.map((entityId) => `('${sqlStr(entityId)}')`).join(", ")})`); + } if (touchesMemory2) { restrictedMemoryAlias = "__hm_memory"; ctes.push(`"${restrictedMemoryAlias}" AS ( SELECT * FROM "${memoryTable}" m WHERE EXISTS ( SELECT 1 FROM __hm_graph_candidates gc WHERE (gc.source_path <> '' AND m.summary ILIKE '%' || gc.source_path || '%') OR (gc.source_session_id <> '' AND m.path ILIKE '%' || gc.source_session_id || '%') ))`); @@ -2306,7 +2790,30 @@ async function applyGraphRestrictionsToPsqlQuery(api, sql, memoryTable, sessions restrictedSessionsAlias = "__hm_sessions"; ctes.push(`"${restrictedSessionsAlias}" AS ( SELECT * FROM "${sessionsTable}" s WHERE s.path IN (SELECT source_path FROM __hm_graph_candidates WHERE source_path <> ''))`); } - return prependCtes(rewriteQueryWithRestrictedTables(sql, memoryTable, sessionsTable, restrictedMemoryAlias, restrictedSessionsAlias), ctes); + if (touchesFacts) { + restrictedFactsAlias = "__hm_memory_facts"; + ctes.push(`"${restrictedFactsAlias}" AS ( SELECT * FROM "${factsTable}" f WHERE ( f.source_path IN (SELECT source_path FROM __hm_graph_candidates WHERE source_path <> '') OR f.source_session_id IN (SELECT source_session_id FROM __hm_graph_candidates WHERE source_session_id <> '')` + (candidateEntityIds.length > 0 ? ` OR f.subject_entity_id IN (SELECT entity_id FROM __hm_entity_candidates) OR f.object_entity_id IN (SELECT entity_id FROM __hm_entity_candidates)` : "") + ` ))`); + } + if (touchesEntities && candidateEntityIds.length > 0) { + restrictedEntitiesAlias = "__hm_memory_entities"; + ctes.push(`"${restrictedEntitiesAlias}" AS ( SELECT * FROM "${entitiesTable}" e WHERE e.entity_id IN (SELECT entity_id FROM __hm_entity_candidates))`); + } + if (touchesLinks) { + restrictedLinksAlias = "__hm_fact_entity_links"; + ctes.push(`"${restrictedLinksAlias}" AS ( SELECT * FROM "${factEntityLinksTable}" l WHERE ( l.source_path IN (SELECT source_path FROM __hm_graph_candidates WHERE source_path <> '') OR l.source_session_id IN (SELECT source_session_id FROM __hm_graph_candidates WHERE source_session_id <> '')` + (candidateEntityIds.length > 0 ? ` OR l.entity_id IN (SELECT entity_id FROM __hm_entity_candidates)` : "") + (touchesFacts ? ` OR l.fact_id IN (SELECT fact_id FROM "__hm_memory_facts")` : "") + ` ))`); + } + return prependCtes(rewriteQueryWithRestrictedTables(sql, { + memoryTable, + sessionsTable, + factsTable, + entitiesTable, + factEntityLinksTable, + restrictedMemoryAlias, + restrictedSessionsAlias, + restrictedFactsAlias, + restrictedEntitiesAlias, + restrictedLinksAlias + }), ctes); } function formatPsqlValue(value) { if (value === null || value === void 0) @@ -2610,8 +3117,7 @@ async function executeCompiledBashCommand(api, memoryTable, sessionsTable, cmd, continue; } if (segment.kind === "psql") { - const graphNodesTable = process.env["HIVEMIND_GRAPH_NODES_TABLE"] ?? "graph_nodes"; - const graphEdgesTable = process.env["HIVEMIND_GRAPH_EDGES_TABLE"] ?? "graph_edges"; + const { graphNodesTable, graphEdgesTable } = resolveInterceptedTableNames(memoryTable, sessionsTable); const validated = validatePsqlQuery(segment.query, memoryTable, sessionsTable, graphNodesTable, graphEdgesTable); const prepared = await applyGraphRestrictionsToPsqlQuery(api, validated, memoryTable, sessionsTable, graphNodesTable, graphEdgesTable); const rows = await api.query(prepared); diff --git a/codex/bundle/session-start.js b/codex/bundle/session-start.js index 6707ee9..4945d2b 100755 --- a/codex/bundle/session-start.js +++ b/codex/bundle/session-start.js @@ -79,6 +79,10 @@ function isPsqlMode() { const raw = process.env["HIVEMIND_PSQL_MODE"] ?? process.env["DEEPLAKE_PSQL_MODE"] ?? ""; return /^(1|true|yes|on)$/i.test(raw.trim()); } +function isFactsSessionsOnlyPsqlMode() { + const raw = process.env["HIVEMIND_PSQL_FACTS_SESSIONS_ONLY"] ?? process.env["DEEPLAKE_PSQL_FACTS_SESSIONS_ONLY"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} // dist/src/hooks/version-check.js import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync as readFileSync2, writeFileSync as writeFileSync2 } from "node:fs"; @@ -236,10 +240,58 @@ Answer rules: - For "likely", "would", or profile questions, a concise inference from strong summary evidence is allowed even if the exact final phrase is not quoted verbatim. Only psql SELECT queries over memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links are intercepted in this mode. For normal recall, query memory_facts for distilled claims, memory_entities for canonical names, and sessions for exact grounding; graph-based restriction is applied automatically where relevant. Do NOT use python, python3, node, curl, or filesystem paths for recall in this mode.`; +var CODEX_SESSION_START_CONTEXT_PSQL_FACTS_SESSIONS_ONLY = `DEEPLAKE MEMORY SQL MODE: Use SQL only for Deeplake recall tasks. + +Available tables: +- sessions(path, creation_date, turn_index, event_type, dia_id, speaker, text, turn_summary, source_date_time, message) +- memory_facts(path, fact_id, subject_entity_id, subject_name, subject_type, predicate, object_entity_id, object_name, object_type, summary, evidence, search_text, confidence, valid_at, valid_from, valid_to, source_session_id, source_path) +- memory_entities(path, entity_id, canonical_name, entity_type, aliases, summary, search_text, source_session_ids, source_paths) +- fact_entity_links(path, link_id, fact_id, entity_id, entity_role, source_session_id, source_path) + +The summary and graph tables are intentionally unavailable in this mode. Treat them as if they do not exist. + +Use this command shape: +- psql -At -F '|' -c "SELECT ..." + +Workflow: +1. Resolve the named person, project, place, or organization with memory_entities. +2. Expand connected facts through fact_entity_links and memory_facts. +3. Use memory_facts to identify the small set of likely source sessions. +4. Use sessions for transcript grounding and final answer verification. +5. Prefer small targeted SELECTs with ORDER BY and LIMIT 5-10. +6. Do not use filesystem commands, grep, cat, ls, Read, or Glob for recall in this mode. +7. Use sessions.text, sessions.speaker, sessions.turn_index, and sessions.source_date_time for transcript retrieval. Use sessions.message only when you need the raw JSON payload. +8. Facts are for narrowing and aggregation; sessions are for the final exact answer. + +Good query patterns: +- Canonical entity lookup: + psql -At -F '|' -c "SELECT entity_id, canonical_name, entity_type, aliases, summary FROM memory_entities WHERE canonical_name ILIKE '%%' OR aliases ILIKE '%%' LIMIT 5" +- Fact lookup by entity: + psql -At -F '|' -c "SELECT fact_id, subject_name, predicate, object_name, summary, valid_at, valid_from, valid_to, source_session_id, source_path FROM memory_facts WHERE subject_name ILIKE '%%' AND (predicate ILIKE '%%' OR object_name ILIKE '%%') ORDER BY creation_date DESC LIMIT 10" +- Entity-linked fact expansion: + psql -At -F '|' -c "SELECT f.fact_id, f.subject_name, f.predicate, f.object_name, f.summary, f.source_session_id, f.source_path FROM fact_entity_links l JOIN memory_facts f ON f.fact_id = l.fact_id WHERE l.entity_id = '' ORDER BY f.creation_date DESC LIMIT 10" +- Transcript grounding by exact path: + psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') ORDER BY path ASC, turn_index ASC" +- Transcript search inside known sessions: + psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') AND (speaker ILIKE '%%' OR text ILIKE '%%') ORDER BY path ASC, turn_index ASC" + +Avoid these mistakes: +- Do NOT query memory, graph_nodes, or graph_edges in this mode. +- Do NOT use fact tables for exact quoted wording when a transcript row is available; use them to narrow and aggregate, then ground on sessions. +- Do NOT filter sessions.message directly when sessions.text / sessions.speaker already contain the needed transcript fields. +- Do NOT replace an exact status or self-label with a broader biography. +- Do NOT recalculate a relative-time answer against today's date when the stored phrase already answers the question. + +Answer rules: +- Return the smallest exact answer supported by the data. +- Resolve relative dates against the session's own creation_date or transcript date metadata, not today's date. +- Do not answer "not found" until you have checked both the fact layer and a likely sessions row. + +Only psql SELECT queries over sessions, memory_facts, memory_entities, and fact_entity_links are intercepted in this mode. Do NOT use python, python3, node, curl, filesystem paths, memory, or graph tables for recall in this mode.`; function buildCodexSessionStartContext(args) { const versionNotice = args.currentVersion ? ` Hivemind v${args.currentVersion}` : ""; - const template = isPsqlMode() ? CODEX_SESSION_START_CONTEXT_PSQL : isSessionsOnlyMode() ? CODEX_SESSION_START_CONTEXT_SESSIONS_ONLY : isIndexDisabled() ? CODEX_SESSION_START_CONTEXT_NO_INDEX : CODEX_SESSION_START_CONTEXT; + const template = isPsqlMode() ? isFactsSessionsOnlyPsqlMode() ? CODEX_SESSION_START_CONTEXT_PSQL_FACTS_SESSIONS_ONLY : CODEX_SESSION_START_CONTEXT_PSQL : isSessionsOnlyMode() ? CODEX_SESSION_START_CONTEXT_SESSIONS_ONLY : isIndexDisabled() ? CODEX_SESSION_START_CONTEXT_NO_INDEX : CODEX_SESSION_START_CONTEXT; return args.creds?.token ? `${template} Logged in to Deeplake as org: ${args.creds.orgName ?? args.creds.orgId} (workspace: ${args.creds.workspaceId ?? "default"})${versionNotice}` : `${template} Not logged in to Deeplake. Run: node "${args.authCommand}" login${versionNotice}`; @@ -285,6 +337,7 @@ export { CODEX_SESSION_START_CONTEXT, CODEX_SESSION_START_CONTEXT_NO_INDEX, CODEX_SESSION_START_CONTEXT_PSQL, + CODEX_SESSION_START_CONTEXT_PSQL_FACTS_SESSIONS_ONLY, CODEX_SESSION_START_CONTEXT_SESSIONS_ONLY, buildCodexSessionStartContext, runCodexSessionStartHook diff --git a/codex/bundle/shell/deeplake-shell.js b/codex/bundle/shell/deeplake-shell.js index 819d244..2e88d4f 100755 --- a/codex/bundle/shell/deeplake-shell.js +++ b/codex/bundle/shell/deeplake-shell.js @@ -66735,12 +66735,12 @@ function loadConfig() { return null; } } - const env2 = process.env; - if (!env2.HIVEMIND_TOKEN && env2.DEEPLAKE_TOKEN) { + const env3 = process.env; + if (!env3.HIVEMIND_TOKEN && env3.DEEPLAKE_TOKEN) { process.stderr.write("[hivemind] DEEPLAKE_* env vars are deprecated; use HIVEMIND_* instead\n"); } - const token = env2.HIVEMIND_TOKEN ?? env2.DEEPLAKE_TOKEN ?? creds?.token; - const orgId = env2.HIVEMIND_ORG_ID ?? env2.DEEPLAKE_ORG_ID ?? creds?.orgId; + const token = env3.HIVEMIND_TOKEN ?? env3.DEEPLAKE_TOKEN ?? creds?.token; + const orgId = env3.HIVEMIND_ORG_ID ?? env3.DEEPLAKE_ORG_ID ?? creds?.orgId; if (!token || !orgId) return null; return { @@ -66748,16 +66748,16 @@ function loadConfig() { orgId, orgName: creds?.orgName ?? orgId, userName: creds?.userName || userInfo().username || "unknown", - workspaceId: env2.HIVEMIND_WORKSPACE_ID ?? env2.DEEPLAKE_WORKSPACE_ID ?? creds?.workspaceId ?? "default", - apiUrl: env2.HIVEMIND_API_URL ?? env2.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", - tableName: env2.HIVEMIND_TABLE ?? env2.DEEPLAKE_TABLE ?? "memory", - sessionsTableName: env2.HIVEMIND_SESSIONS_TABLE ?? env2.DEEPLAKE_SESSIONS_TABLE ?? "sessions", - graphNodesTableName: env2.HIVEMIND_GRAPH_NODES_TABLE ?? env2.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", - graphEdgesTableName: env2.HIVEMIND_GRAPH_EDGES_TABLE ?? env2.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", - factsTableName: env2.HIVEMIND_FACTS_TABLE ?? env2.DEEPLAKE_FACTS_TABLE ?? "memory_facts", - entitiesTableName: env2.HIVEMIND_ENTITIES_TABLE ?? env2.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", - factEntityLinksTableName: env2.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env2.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", - memoryPath: env2.HIVEMIND_MEMORY_PATH ?? env2.DEEPLAKE_MEMORY_PATH ?? join4(home, ".deeplake", "memory") + workspaceId: env3.HIVEMIND_WORKSPACE_ID ?? env3.DEEPLAKE_WORKSPACE_ID ?? creds?.workspaceId ?? "default", + apiUrl: env3.HIVEMIND_API_URL ?? env3.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", + tableName: env3.HIVEMIND_TABLE ?? env3.DEEPLAKE_TABLE ?? "memory", + sessionsTableName: env3.HIVEMIND_SESSIONS_TABLE ?? env3.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + graphNodesTableName: env3.HIVEMIND_GRAPH_NODES_TABLE ?? env3.DEEPLAKE_GRAPH_NODES_TABLE ?? "graph_nodes", + graphEdgesTableName: env3.HIVEMIND_GRAPH_EDGES_TABLE ?? env3.DEEPLAKE_GRAPH_EDGES_TABLE ?? "graph_edges", + factsTableName: env3.HIVEMIND_FACTS_TABLE ?? env3.DEEPLAKE_FACTS_TABLE ?? "memory_facts", + entitiesTableName: env3.HIVEMIND_ENTITIES_TABLE ?? env3.DEEPLAKE_ENTITIES_TABLE ?? "memory_entities", + factEntityLinksTableName: env3.HIVEMIND_FACT_ENTITY_LINKS_TABLE ?? env3.DEEPLAKE_FACT_ENTITY_LINKS_TABLE ?? "fact_entity_links", + memoryPath: env3.HIVEMIND_MEMORY_PATH ?? env3.DEEPLAKE_MEMORY_PATH ?? join4(home, ".deeplake", "memory") }; } @@ -67375,11 +67375,144 @@ var DeeplakeApi = class { import { basename as basename5, posix } from "node:path"; import { randomUUID as randomUUID2 } from "node:crypto"; +// dist/src/embeddings/harrier.js +import { AutoModel, AutoTokenizer, LogLevel, env } from "@huggingface/transformers"; +var DEFAULT_MODEL_ID = "onnx-community/harrier-oss-v1-0.6b-ONNX"; +var DEFAULT_DOCUMENT_BATCH_SIZE = 8; +var DEFAULT_MAX_LENGTH = 32768; +function toNumber(value) { + return typeof value === "bigint" ? Number(value) : Number(value ?? 0); +} +function tensorToRows(tensor) { + const [batchSize, width] = tensor.dims; + const rows = []; + for (let batchIndex = 0; batchIndex < batchSize; batchIndex++) { + const offset = batchIndex * width; + const row = []; + for (let hiddenIndex = 0; hiddenIndex < width; hiddenIndex++) { + row.push(Number(tensor.data[offset + hiddenIndex] ?? 0)); + } + rows.push(row); + } + return rows; +} +function l2Normalize(rows) { + return rows.map((row) => { + let sumSquares = 0; + for (const value of row) + sumSquares += value * value; + const norm = Math.sqrt(sumSquares) || 1; + return row.map((value) => value / norm); + }); +} +function lastTokenPool(outputs, attentionMask) { + const [batchSize, sequenceLength, hiddenSize] = outputs.dims; + const rows = []; + const maskData = attentionMask.data; + const hiddenData = outputs.data; + for (let batchIndex = 0; batchIndex < batchSize; batchIndex++) { + let lastTokenIndex = sequenceLength - 1; + for (let tokenIndex = sequenceLength - 1; tokenIndex >= 0; tokenIndex--) { + const maskOffset = batchIndex * sequenceLength + tokenIndex; + if (toNumber(maskData[maskOffset]) > 0) { + lastTokenIndex = tokenIndex; + break; + } + } + const row = []; + const hiddenOffset = (batchIndex * sequenceLength + lastTokenIndex) * hiddenSize; + for (let hiddenIndex = 0; hiddenIndex < hiddenSize; hiddenIndex++) { + row.push(Number(hiddenData[hiddenOffset + hiddenIndex] ?? 0)); + } + rows.push(row); + } + return rows; +} +function formatQuery(task, query) { + return `Instruct: ${task} +Query: ${query}`; +} +var HarrierEmbedder = class { + modelId; + tokenizerPromise = null; + modelPromise = null; + options; + constructor(options = {}) { + this.modelId = options.modelId ?? DEFAULT_MODEL_ID; + this.options = { + ...options, + maxLength: options.maxLength ?? DEFAULT_MAX_LENGTH, + batchSize: options.batchSize ?? DEFAULT_DOCUMENT_BATCH_SIZE + }; + if (options.cacheDir) + env.cacheDir = options.cacheDir; + if (options.localModelPath) + env.localModelPath = options.localModelPath; + env.logLevel = LogLevel.ERROR; + } + async embedDocuments(texts) { + return this.embedInternal(texts); + } + async embedQueries(texts, options = {}) { + const task = options.task ?? "Given a user query, retrieve relevant memory rows and session events"; + return this.embedInternal(texts.map((text) => formatQuery(task, text))); + } + async load() { + if (!this.tokenizerPromise) { + this.tokenizerPromise = AutoTokenizer.from_pretrained(this.modelId, { + local_files_only: this.options.localFilesOnly + }); + } + if (!this.modelPromise) { + this.modelPromise = AutoModel.from_pretrained(this.modelId, { + local_files_only: this.options.localFilesOnly, + device: this.options.device ?? "cpu", + dtype: this.options.dtype + }); + } + const [tokenizer, model] = await Promise.all([this.tokenizerPromise, this.modelPromise]); + return { tokenizer, model }; + } + async embedInternal(texts) { + if (texts.length === 0) + return []; + const { tokenizer, model } = await this.load(); + const rows = []; + for (let start = 0; start < texts.length; start += this.options.batchSize) { + const batch = texts.slice(start, start + this.options.batchSize); + const inputs = tokenizer(batch, { + padding: true, + truncation: true, + max_length: this.options.maxLength + }); + const outputs = await model(inputs); + const sentenceEmbedding = outputs["sentence_embedding"]; + if (sentenceEmbedding && typeof sentenceEmbedding === "object" && sentenceEmbedding !== null) { + rows.push(...l2Normalize(tensorToRows(sentenceEmbedding))); + continue; + } + const lastHiddenState = outputs["last_hidden_state"]; + const attentionMask = inputs["attention_mask"]; + if (!lastHiddenState || typeof lastHiddenState !== "object" || !attentionMask || typeof attentionMask !== "object") { + throw new Error(`Harrier model "${this.modelId}" did not return a usable embedding tensor`); + } + rows.push(...l2Normalize(lastTokenPool(lastHiddenState, attentionMask))); + } + return rows; + } +}; + // dist/src/utils/retrieval-mode.js function isSessionsOnlyMode() { const raw = process.env["HIVEMIND_SESSIONS_ONLY"] ?? process.env["DEEPLAKE_SESSIONS_ONLY"] ?? ""; return /^(1|true|yes|on)$/i.test(raw.trim()); } +function getGrepRetrievalMode() { + const raw = (process.env["HIVEMIND_GREP_RETRIEVAL_MODE"] ?? process.env["DEEPLAKE_GREP_RETRIEVAL_MODE"] ?? "").trim().toLowerCase(); + if (raw === "embedding" || raw === "hybrid") + return raw; + return "classic"; +} function isIndexDisabled() { const raw = process.env["HIVEMIND_DISABLE_INDEX"] ?? process.env["DEEPLAKE_DISABLE_INDEX"] ?? ""; return /^(1|true|yes|on)$/i.test(raw.trim()); @@ -67391,6 +67524,49 @@ function isSummaryBm25Disabled() { // dist/src/shell/grep-core.js var DEFAULT_GREP_CANDIDATE_LIMIT = Number(process.env["HIVEMIND_GREP_LIMIT"] ?? process.env["DEEPLAKE_GREP_LIMIT"] ?? 500); +var DEFAULT_EMBED_RETRIEVAL_MODEL_ID = "onnx-community/harrier-oss-v1-270m-ONNX"; +var DEFAULT_HYBRID_VECTOR_WEIGHT = 0.7; +var DEFAULT_HYBRID_TEXT_WEIGHT = 0.3; +var retrievalEmbedder = null; +function envString(...names) { + for (const name of names) { + const value = process.env[name]?.trim(); + if (value) + return value; + } + return void 0; +} +function envFlag(...names) { + const raw = envString(...names) ?? ""; + return /^(1|true|yes|on)$/i.test(raw); +} +function envNumber(fallback, ...names) { + const raw = envString(...names); + const parsed = Number(raw); + return Number.isFinite(parsed) ? parsed : fallback; +} +function getRetrievalEmbedder() { + if (!retrievalEmbedder) { + retrievalEmbedder = new HarrierEmbedder({ + modelId: envString("HIVEMIND_EMBED_RETRIEVAL_MODEL_ID", "DEEPLAKE_EMBED_RETRIEVAL_MODEL_ID", "HIVEMIND_HARRIER_MODEL_ID", "DEEPLAKE_HARRIER_MODEL_ID") ?? DEFAULT_EMBED_RETRIEVAL_MODEL_ID, + device: envString("HIVEMIND_EMBED_RETRIEVAL_DEVICE", "DEEPLAKE_EMBED_RETRIEVAL_DEVICE") ?? "cpu", + dtype: envString("HIVEMIND_EMBED_RETRIEVAL_DTYPE", "DEEPLAKE_EMBED_RETRIEVAL_DTYPE"), + cacheDir: envString("HIVEMIND_EMBED_RETRIEVAL_CACHE_DIR", "DEEPLAKE_EMBED_RETRIEVAL_CACHE_DIR"), + localModelPath: envString("HIVEMIND_EMBED_RETRIEVAL_LOCAL_MODEL_PATH", "DEEPLAKE_EMBED_RETRIEVAL_LOCAL_MODEL_PATH"), + localFilesOnly: envFlag("HIVEMIND_EMBED_RETRIEVAL_LOCAL_FILES_ONLY", "DEEPLAKE_EMBED_RETRIEVAL_LOCAL_FILES_ONLY") + }); + } + return retrievalEmbedder; +} +function sqlFloat4Array(values) { + if (values.length === 0) + throw new Error("Query embedding is empty"); + return `ARRAY[${values.map((value) => { + if (!Number.isFinite(value)) + throw new Error("Query embedding contains non-finite values"); + return Math.fround(value).toString(); + }).join(", ")}]::float4[]`; +} function escapeRegexLiteral(value) { return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); } @@ -67605,7 +67781,7 @@ function buildPathCondition(targetPath) { return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%')`; } async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { - const { pathFilter, contentScanOnly, likeOp, escapedPattern, regexPattern, prefilterPattern, prefilterPatterns, bm25QueryText } = opts; + const { pathFilter, contentScanOnly, likeOp, escapedPattern, regexPattern, prefilterPattern, prefilterPatterns, queryText, bm25QueryText } = opts; const limit = opts.limit ?? DEFAULT_GREP_CANDIDATE_LIMIT; const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : [escapedPattern]; const ignoreCase = likeOp === "ILIKE"; @@ -67617,7 +67793,11 @@ async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { const fallbackSessFilter = likeSessFilter; const hasSqlRegexFilter = Boolean(regexMemFilter || regexSessFilter); const sessionsOnly = isSessionsOnlyMode(); - const useSummaryBm25 = !sessionsOnly && !isSummaryBm25Disabled() && Boolean(bm25QueryText); + const retrievalMode = getGrepRetrievalMode(); + const semanticQueryText = (queryText ?? bm25QueryText ?? "").trim(); + const useEmbeddingRetrieval = retrievalMode === "embedding" && semanticQueryText.length > 0; + const useHybridRetrieval = retrievalMode === "hybrid" && semanticQueryText.length > 0; + const useSummaryBm25 = retrievalMode === "classic" && !sessionsOnly && !isSummaryBm25Disabled() && Boolean(bm25QueryText); const shouldUseFallbackCapablePrimary = useSummaryBm25 || hasSqlRegexFilter; const ensureSummaryBm25Index = api.ensureSummaryBm25Index; if (useSummaryBm25 && typeof ensureSummaryBm25Index === "function") { @@ -67629,6 +67809,25 @@ async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { const sessQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessFilter} LIMIT ${limit}`; return sessionsOnly ? `SELECT path, content, source_order, creation_date FROM (${sessQuery}) AS combined ORDER BY path, source_order, creation_date` : `SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`; }; + if (useEmbeddingRetrieval || useHybridRetrieval) { + const embedder = getRetrievalEmbedder(); + const [queryEmbedding] = await embedder.embedQueries([semanticQueryText]); + if (!queryEmbedding) + throw new Error("Failed to build query embedding"); + const queryVectorSql = sqlFloat4Array(queryEmbedding); + const vectorWeight = envNumber(DEFAULT_HYBRID_VECTOR_WEIGHT, "HIVEMIND_HYBRID_VECTOR_WEIGHT", "DEEPLAKE_HYBRID_VECTOR_WEIGHT"); + const textWeight = envNumber(DEFAULT_HYBRID_TEXT_WEIGHT, "HIVEMIND_HYBRID_TEXT_WEIGHT", "DEEPLAKE_HYBRID_TEXT_WEIGHT"); + const buildSemanticCombinedQuery = () => { + const memQuery = useHybridRetrieval ? buildHybridSimilarityQuery(memoryTable, pathFilter, "summary::text", 0, "''", queryVectorSql, semanticQueryText, vectorWeight, textWeight, limit) : buildEmbeddingSimilarityQuery(memoryTable, pathFilter, "summary::text", 0, "''", queryVectorSql, limit); + const sessQuery = useHybridRetrieval ? buildHybridSimilarityQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", queryVectorSql, semanticQueryText, vectorWeight, textWeight, limit) : buildEmbeddingSimilarityQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", queryVectorSql, limit); + return sessionsOnly ? `SELECT path, content, source_order, creation_date FROM (${sessQuery}) AS combined ORDER BY path, source_order, creation_date` : `SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`; + }; + const rows2 = await api.query(buildSemanticCombinedQuery()); + return rows2.map((row) => ({ + path: String(row["path"]), + content: String(row["content"] ?? "") + })); + } const primaryMemFilter = useSummaryBm25 ? "" : `${likeMemFilter}${regexMemFilter}`; const primaryQuery = buildCombinedQuery(primaryMemFilter, primarySessFilter, useSummaryBm25); const fallbackQuery = buildCombinedQuery(likeMemFilter, fallbackSessFilter, false); @@ -67733,6 +67932,7 @@ function buildGrepSearchOptions(params, targetPath) { const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(normalizedPattern) : null; const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(normalizedPattern) : null; const bm25QueryText = buildSummaryBm25QueryText(normalizedPattern, params.fixedString, literalPrefilter, alternationPrefilters); + const queryText = (bm25QueryText ?? normalizedPattern.trim()) || void 0; const regexBase = params.fixedString ? escapeRegexLiteral(normalizedPattern) : normalizedPattern; const sqlRegexPattern = params.wordMatch ? `\\b(?:${regexBase})\\b` : hasRegexMeta ? regexBase : void 0; return { @@ -67743,6 +67943,7 @@ function buildGrepSearchOptions(params, targetPath) { regexPattern: sqlRegexPattern, prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : void 0, prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)), + queryText, bm25QueryText: bm25QueryText ?? void 0, limit: DEFAULT_GREP_CANDIDATE_LIMIT }; @@ -67766,6 +67967,12 @@ function buildRegexFilter(column, pattern, ignoreCase) { function buildSummaryBm25Query(memoryTable, pathFilter, queryText, limit) { return `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter} ORDER BY (summary <#> '${sqlStr(queryText)}') DESC LIMIT ${limit}`; } +function buildEmbeddingSimilarityQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryVectorSql, limit) { + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date FROM "${tableName}" WHERE 1=1${pathFilter} AND embedding IS NOT NULL ORDER BY (embedding <#> ${queryVectorSql}) DESC LIMIT ${limit}`; +} +function buildHybridSimilarityQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryVectorSql, queryText, vectorWeight, textWeight, limit) { + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date FROM "${tableName}" WHERE 1=1${pathFilter} AND embedding IS NOT NULL ORDER BY (((embedding, ${contentExpr})::deeplake_hybrid_record) <#> deeplake_hybrid_record(${queryVectorSql}, '${sqlStr(queryText)}', ${vectorWeight}, ${textWeight})) DESC LIMIT ${limit}`; +} function toSqlRegexPattern(pattern, ignoreCase) { if (!pattern) return null; @@ -69355,8 +69562,8 @@ var YargsParser = class { if (typeof envPrefix === "undefined") return; const prefix = typeof envPrefix === "string" ? envPrefix : ""; - const env2 = mixin.env(); - Object.keys(env2).forEach(function(envVar) { + const env3 = mixin.env(); + Object.keys(env3).forEach(function(envVar) { if (prefix === "" || envVar.lastIndexOf(prefix, 0) === 0) { const keys = envVar.split("__").map(function(key, i11) { if (i11 === 0) { @@ -69365,7 +69572,7 @@ var YargsParser = class { return camelCase2(key); }); if ((configOnly && flags.configs[keys.join(".")] || !configOnly) && !hasKey(argv2, keys)) { - setArg(keys.join("."), env2[envVar]); + setArg(keys.join("."), env3[envVar]); } } }); @@ -69676,12 +69883,12 @@ if (nodeVersion) { throw Error(`yargs parser supports a minimum Node.js version of ${minNodeVersion}. Read our version support policy: https://github.com/yargs/yargs-parser#supported-nodejs-versions`); } } -var env = process ? process.env : {}; +var env2 = process ? process.env : {}; var require2 = createRequire ? createRequire(import.meta.url) : void 0; var parser = new YargsParser({ cwd: process.cwd, env: () => { - return env; + return env2; }, format, normalize, diff --git a/codex/bundle/stop.js b/codex/bundle/stop.js index cda8f47..cf04f8d 100755 --- a/codex/bundle/stop.js +++ b/codex/bundle/stop.js @@ -725,25 +725,28 @@ Rules: // dist/src/hooks/memory-facts.js import { randomUUID as randomUUID4 } from "node:crypto"; -var MEMORY_FACT_PROMPT_TEMPLATE = `You are extracting durable long-term memory facts from a session summary. +var MEMORY_FACT_PROMPT_TEMPLATE = `You are extracting durable long-term memory facts from raw session transcript rows. SESSION ID: __SESSION_ID__ SOURCE PATH: __SOURCE_PATH__ PROJECT: __PROJECT__ -SUMMARY MARKDOWN: -__SUMMARY_TEXT__ +TRANSCRIPT ROWS: +__TRANSCRIPT_TEXT__ Return ONLY valid JSON with this exact shape: {"facts":[{"subject":"canonical entity","subject_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","subject_aliases":["optional alias"],"predicate":"snake_case_relation","object":"canonical object text","object_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","object_aliases":["optional alias"],"summary":"short factual claim","evidence":"short supporting phrase","confidence":0.0,"valid_at":"optional date/time text","valid_from":"optional date/time text","valid_to":"optional date/time text"}]} Rules: +- The transcript rows are the only source of truth for this extraction. Do not rely on summaries or inferred rewrites. - Extract atomic facts that are useful for later recall. One durable claim per fact. - Prefer canonical names for repeated people, organizations, places, projects, tools, and artifacts. - Use relation-style predicates such as works_on, home_country, relationship_status, prefers, plans, decided_to_pursue, located_in, uses_tool, recommended, supports, owns, read, attends, moved_from, moved_to. -- Facts should preserve temporal history instead of overwriting it. If the summary says something changed, emit the new fact and include timing in valid_at / valid_from / valid_to when the summary supports it. -- Include assistant-confirmed or tool-confirmed actions when they are stated as completed facts in the summary. -- Do not invent facts that are not supported by the summary. +- Facts should preserve temporal history instead of overwriting it. If the transcript says something changed, emit the new fact and include timing in valid_at / valid_from / valid_to when the transcript supports it. +- Include assistant-confirmed or tool-confirmed actions when they are stated as completed facts in the transcript. +- If a speaker explicitly self-identifies or states a status, preserve that exact label instead of broadening it. +- Preserve exact named places, titles, organizations, and relative time phrases when they are the stated fact. +- Do not invent facts that are not supported by the transcript. - Avoid duplicates or near-duplicates. If two facts say the same thing, keep the more specific one. - Return no markdown, no prose, no code fences, only JSON.`; diff --git a/codex/bundle/wiki-worker.js b/codex/bundle/wiki-worker.js index 99a2b9a..0353dcc 100755 --- a/codex/bundle/wiki-worker.js +++ b/codex/bundle/wiki-worker.js @@ -342,25 +342,28 @@ ${node.name}`); // dist/src/hooks/memory-facts.js import { randomUUID as randomUUID3 } from "node:crypto"; -var MEMORY_FACT_PROMPT_TEMPLATE = `You are extracting durable long-term memory facts from a session summary. +var MEMORY_FACT_PROMPT_TEMPLATE = `You are extracting durable long-term memory facts from raw session transcript rows. SESSION ID: __SESSION_ID__ SOURCE PATH: __SOURCE_PATH__ PROJECT: __PROJECT__ -SUMMARY MARKDOWN: -__SUMMARY_TEXT__ +TRANSCRIPT ROWS: +__TRANSCRIPT_TEXT__ Return ONLY valid JSON with this exact shape: {"facts":[{"subject":"canonical entity","subject_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","subject_aliases":["optional alias"],"predicate":"snake_case_relation","object":"canonical object text","object_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","object_aliases":["optional alias"],"summary":"short factual claim","evidence":"short supporting phrase","confidence":0.0,"valid_at":"optional date/time text","valid_from":"optional date/time text","valid_to":"optional date/time text"}]} Rules: +- The transcript rows are the only source of truth for this extraction. Do not rely on summaries or inferred rewrites. - Extract atomic facts that are useful for later recall. One durable claim per fact. - Prefer canonical names for repeated people, organizations, places, projects, tools, and artifacts. - Use relation-style predicates such as works_on, home_country, relationship_status, prefers, plans, decided_to_pursue, located_in, uses_tool, recommended, supports, owns, read, attends, moved_from, moved_to. -- Facts should preserve temporal history instead of overwriting it. If the summary says something changed, emit the new fact and include timing in valid_at / valid_from / valid_to when the summary supports it. -- Include assistant-confirmed or tool-confirmed actions when they are stated as completed facts in the summary. -- Do not invent facts that are not supported by the summary. +- Facts should preserve temporal history instead of overwriting it. If the transcript says something changed, emit the new fact and include timing in valid_at / valid_from / valid_to when the transcript supports it. +- Include assistant-confirmed or tool-confirmed actions when they are stated as completed facts in the transcript. +- If a speaker explicitly self-identifies or states a status, preserve that exact label instead of broadening it. +- Preserve exact named places, titles, organizations, and relative time phrases when they are the stated fact. +- Do not invent facts that are not supported by the transcript. - Avoid duplicates or near-duplicates. If two facts say the same thing, keep the more specific one. - Return no markdown, no prose, no code fences, only JSON.`; function stripCodeFences2(text) { @@ -540,8 +543,32 @@ function parseMemoryFactExtraction(raw) { }) }; } +function buildMemoryFactTranscript(rows) { + const normalized = rows.map((row) => ({ + turnIndex: Number.isFinite(row.turnIndex) ? row.turnIndex : 0, + speaker: normalizeString2(row.speaker), + text: normalizeString2(row.text), + eventType: normalizeString2(row.eventType) || "message", + turnSummary: normalizeString2(row.turnSummary), + sourceDateTime: normalizeString2(row.sourceDateTime) || normalizeString2(row.creationDate) + })).filter((row) => row.text || row.turnSummary); + if (normalized.length === 0) + return "(no transcript rows)"; + return normalized.map((row) => { + const prefix = [ + `turn=${row.turnIndex}`, + row.sourceDateTime ? `time=${row.sourceDateTime}` : "", + row.speaker ? `speaker=${row.speaker}` : `event=${row.eventType}` + ].filter(Boolean).join(" | "); + const lines = [`[${prefix}] ${row.text || row.turnSummary}`]; + if (row.turnSummary && row.turnSummary !== row.text) { + lines.push(`summary: ${row.turnSummary}`); + } + return lines.join("\n"); + }).join("\n"); +} function buildMemoryFactPrompt(args) { - return (args.template ?? MEMORY_FACT_PROMPT_TEMPLATE).replace(/__SUMMARY_TEXT__/g, args.summaryText).replace(/__SESSION_ID__/g, args.sessionId).replace(/__SOURCE_PATH__/g, args.sourcePath).replace(/__PROJECT__/g, args.project); + return (args.template ?? MEMORY_FACT_PROMPT_TEMPLATE).replace(/__TRANSCRIPT_TEXT__/g, args.transcriptText).replace(/__SESSION_ID__/g, args.sessionId).replace(/__SOURCE_PATH__/g, args.sourcePath).replace(/__PROJECT__/g, args.project); } async function replaceSessionFacts(params) { const ts = params.ts ?? (/* @__PURE__ */ new Date()).toISOString(); @@ -730,7 +757,7 @@ function cleanup() { async function main() { try { wlog("fetching session events"); - const rows = await query(`SELECT message, creation_date FROM "${cfg.sessionsTable}" WHERE path LIKE E'${esc2(`/sessions/%${cfg.sessionId}%`)}' ORDER BY creation_date ASC, turn_index ASC`); + const rows = await query(`SELECT path, message, creation_date, turn_index, event_type, speaker, text, turn_summary, source_date_time FROM "${cfg.sessionsTable}" WHERE path LIKE E'${esc2(`/sessions/%${cfg.sessionId}%`)}' ORDER BY creation_date ASC, turn_index ASC`); if (rows.length === 0) { wlog("no session events found \u2014 exiting"); return; @@ -820,8 +847,17 @@ async function main() { wlog(`graph update failed: ${e.message}`); } try { + const transcriptText = buildMemoryFactTranscript(rows.map((row) => ({ + turnIndex: Number(row["turn_index"] ?? 0), + eventType: typeof row["event_type"] === "string" ? row["event_type"] : "", + speaker: typeof row["speaker"] === "string" ? row["speaker"] : "", + text: typeof row["text"] === "string" ? row["text"] : "", + turnSummary: typeof row["turn_summary"] === "string" ? row["turn_summary"] : "", + sourceDateTime: typeof row["source_date_time"] === "string" ? row["source_date_time"] : "", + creationDate: typeof row["creation_date"] === "string" ? row["creation_date"] : "" + }))); const factPrompt = buildMemoryFactPrompt({ - summaryText: text, + transcriptText, sessionId: cfg.sessionId, sourcePath: jsonlServerPath, project: cfg.project, diff --git a/esbuild.config.mjs b/esbuild.config.mjs index 95b2490..e43ad30 100644 --- a/esbuild.config.mjs +++ b/esbuild.config.mjs @@ -29,7 +29,7 @@ await build({ platform: "node", format: "esm", outdir: "claude-code/bundle", - external: ["node:*", "node-liblzma", "@mongodb-js/zstd"], + external: ["node:*", "node-liblzma", "@mongodb-js/zstd", "@huggingface/transformers", "onnxruntime-node"], }); for (const h of ccAll) { @@ -63,7 +63,7 @@ await build({ platform: "node", format: "esm", outdir: "codex/bundle", - external: ["node:*", "node-liblzma", "@mongodb-js/zstd"], + external: ["node:*", "node-liblzma", "@mongodb-js/zstd", "@huggingface/transformers", "onnxruntime-node"], }); for (const h of codexAll) { diff --git a/scripts/__pycache__/backfill_harrier_embeddings.cpython-312.pyc b/scripts/__pycache__/backfill_harrier_embeddings.cpython-312.pyc deleted file mode 100644 index e4d1d656a257b1bab68e50e3afe5908bf40cee31..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 60971 zcmcG%3w%^ZdMDcN_fu-A^@Ii?gai_A#2X7qfOrUugfSAhEp%FhyU?w2bF&_JHbC*ZWj`_0?D3_tjVb!RfSV@Z5d$cj21%G@8GmAJStKJ=#ZcgGO^h z!)Z8eMANVB&}zxAi|9IZ?5*$6v$vtcz~06VBYT@VOzds$FtfL%!-BUyV(qte*jO2c zh`rz5VefZzIM{Dv#M$raa529r;_ml!c$nWD@%HC+)^`puEM(EM?liX)?;7q57r=Wp_Z+tj?=@VITaI@v_dHjD zcOBQot-yONcb2QfdtJz;)9lx9D__xYRqyJ=UVk?`5FP8eZf+G)ZD47tk)|fJ@m;+b z-LZ*PV+$AJ>hP}TdbqWCZ{>Qqb$D;%&T;GU-p+-&4S4TByEfvz z6YouU@8bHn&3NzT&U0Jv-or(>dc2?F`nj!mH*f>oHoW(8gWPt!8@VBF2i{G1@5H;A zdx6`9_dag-E1Hh|+#a|GIG%e7VJ%#gYry*;YxiEXyHRfUA>?a9++i-pHRFAR8|L=m z-O63y_Tzn&yT~2D`xtkLYr*?z?lN}}@8evYJA`+eRg?Q4_ab)~@$I=|tr{-D9YOqw zMdL@fR>VKUeTh4Y_sK=my~G_u{Id(=xl!(Egr8a%uKvw^nLCa&9o)-Y8{VDVSGab( zPjjzuC-6SQ{Q~z4-p_@C+{yE+G#cKCIrKdKo|7O&g6x;Imvz;@_z?j}wKik#8WHy=CPaH#pQsCapQ zxSJo04)(;>p6lZIaEPxRj7DoOtgqcrzpg*-KNpJ)MYpeA+cgxf<3gdKNZ0vLU03)c z3qItdr{IJ&qZ*B>@~2NUA3qgrZ#mL@^hA5`NSmti$D)ku*Wu|!DH_dF*Pi-_=8yPG z^I*?z!rCAG9`M~qpOEw4$@wWcpTemw%$U0e`}?~FxQw+Y90>)7&_5XibIovLA$}lZ z8sZ20hhiCLbogvnZ)hMEj9nfIWsK-C{&Ge?%tta#3`FSc;NW?&0NY@M3r1snc%V09 z8wg#L{HSaoh5|Ci==pFYk}-7gz0r&*))hV9!evZR4CZh&V@5rFq3&45&V{1ge0YdP zIHQXVX3YJeXcSGLBxp5ixA%8l3i6@oa3mJpU50PX7`g|!P{trO@$BVTD4H=2i&pUb#eJxxlBJOhJAhhsy-vF1xd=wFoS%{FB)av~aP92`cU_%DQbREY0EueT0! zy`k1n2B7v9O(fWpaHOHA02?_h9)M3Yx=T?UJih#FH1hC!xEa`01@$~PMY3Y z&1Frst~H~zRqHeQ=w*OLG{&LdGRBL1IED$#uSWhTH6rlYQ)|&?KDw6cx)6@7-H*R> z;R~Vu@Bp_~gqrBuvt8Zi0oIXVc2)*M{bxfQ2e6FR4PDNdLPG%282>cNJ3|GRz!}xd z8O&cj_~nDM24B+Pn=p@Gp45#$_km%>BgAUD$&ntp2i`^qdO+@-Ts;t@)a>aqNj~K* z^+BIi36p%B_O{$deRe5Sc}r+AXI?#Lkd~n~O|`L=r@;Zrvx_*30Z-htdfmG1>(*to z9n~hDMk=Ep7`(_+J2Amy-Gm?+FlVe`pt(W5zbkf2!yiXHe~c2Dkvxbfz7xN?@E6?y z2YqSJzqa$r&YPC6JKk`l?3J_j+N8bq&N9JXo3ihmweL&X_oeIyX6?t4_G7}+r-aVu zQud%=2(q$sgWEootnTcDCME-%^r^fjkrU*ku4)6|wP|hv*ffAOTR0jXz|WXUaB=ZN9eS%8u7| z35M-=bZ|cBQ}%w5PthebZJLB;RC`$)%MwQ#{FWf5O~VuLR_pj6Ui|aqoQ0!qw5cmf zIGFE2c$nfU;GoTB?|9|S{26n3+U*;2q|J`ek+j=8XLpSazqVa46o^fidl#7u_+yj) z8F+3`vvdjcq*Vp{O4CMk2_2_B--H&yhmSzY3e{+>(C|hrLc4XR@QD{Sm-MGJ7qu7_ z5lD3zJ*H>I_6&_dGh3?lJWarirKc+rq1lbasf&+>xF~f@AP^A8D31jOhX$kJ7;r+6 zCV4*+ynw%GBb-r9_FMg9?uR6(He`=ZvU(9hkE!vi#kxwFWO5qziwZpe@DAYKMz=G4zX@J438REb76+upsJqr z%2-$HTR$@n$9ihF0y-_B0jyU*XMkZtUlgmPF*4YNwZO`Uy0~Br>lkn*VKp@{SSd}A zlJmoedXb{DFl={U+kR#HICo><`oL80_&};~OUhm^80sGf1T(U+FZ=`^0)#%i7e0M7 zEDIh^%f2iNR2cr86BVI3uq3CCqco{E2)uIPyqJt5`)H6BJP9plKnsl0MuRg^nVk8y zMM55EldjLbARB%vfX!AaYis*0T?S+l4_X3bLg-RB8Ve4dj~nV@{X-wo2Z9haVL>8V z!wTdW3!V>Mj`9Q({2)0)l*Yn(t!{8{(XqXp@bTEUZhw%K{!xD!z z5aXSNxXl{CjkZa1X0=8$qQ^)RBrkYk@+#cN7$Wnd6MD{c+bosGS=2D8)m*JL03CAd zpOl8+r9!hF)9@m;>ZdeX{JWe*ch!I+{vv9~XfG;Iu@BSR0Rj=^T(B$J9S$?7!RkDM zl>C<{xnXc9G{7IF-vki+%W$dP>ArqjHqA4 zUz8S-#eu-;yjFarczolef2?@MT0Qm55A*$FjcG^TwZm5q-}HUGNU2SS!;FDTK!p+rsg--H>}aWzsA_G**p(v$|xm+L^rUB8PNg_ z$V3$avky>+?zUd)TsELO4CppCp!!@$M_TtW;QFZcYGFc~iy)23pwLGkx!_6YCB!4W zG1xnD4Ys(>%^z+G6W6zD14vw~W++}n07m{*@)OyVF?9`LR5@`(qX@~4DE}HIxk^b4 z5!#H*A5LsGvN%1$zm7z|gum!nI2g@k6{Ah*$H&fBGS-yN_utrlef#S>$C^IO_oqEY zX@}?9p(}^7vw2ghXw?Uf>YrFO1*L2hA38J+&*-6_Ix(VIzc{47wx=PWe?MSssF^>c z)Jit9Tj6=qI>=$Tr9q9lVzh>`hXV!l8#-m5@G{bYmese$3p*QXJGyG)!82LUy4r2Q zGd1y|mH`lMVJ^`2^x;65@GtG5`0W8KlsX=}x1Qlh1T<{-Ii8j+%r(P67dDqFI?oz? zArfR+`8SYc0)NqJI7HJriKZ2b)=zJmv2UGo6$zyqr<B!_?!5Pz5-`d^T6V~=Dgw@=O*^0EEPYp zdan4#^T)ZAwN&&MPUNMmWk0g!T`B%n`*$kuYVWT9qt&U+`|d5jclKUUa`V$O?a$7f zdQJ#-3EiR8sh(7OZ*p_5aDG7O8@#`HFqJ=)+&nmC9dDV4T|X*p9!%yBrK~SJv>J`3 zpM7l6_+HRPi3ECogKe+gG7nM8>*YTnUP6>WfanCGi4wZ=_4taigihI!a&o5=i!JJ! z7bov78T%~{$T0`ta5DM>+S`&ljS5s`&`!vuOT#X`QBH?J18U5eYYk4wEe4ms^rjv& z-rPz%f}Xgc7H&qnE^Y#ouHjVYXSy|kxc$ub?LDA=Vw*b8R2woz=I5`YhcX82UU^W+ z7<6#EG{jlGl5sRQ!`N@&zdqwF)uKC|qz$UZCiF59Z@8B^onKu@@rZ7Vaz z7%{Cfra`_Js4-)qH{s2Ul{i(R*Ku)>KOe;o8QU(f+Q9M*5{J;yAL<|EF9%~?XF-s- zqF}ZW-&XwHit(}pS^i)!V+#iR2f1NzBZC=7F!;i7SA;6TFLyAg;`s)Hd*K4J!dbe#eXEk^Dfd?vTzSATD9oN zt+Pm#%7mUOl1Cvoj1{ZW)0gBfdl^FV5#5(`7qt?*t2)pcx5{FnxThNod?Hx8 zBGEd=q>(oYanHUkNC~(=Y%st$T!GJY+XESwEVF7qdZ@WI^J{^-~%#lInmMM z`$?STmQ!T|jKJnCT)g#27^r!mH_((7R0Y%x3LGCE2s|&zumX`m$S$4_4Dx|!C>D^b zs0oAyE(9)sGgp@}qu5qjBP_H=FrVQ>Tl4XZd3fMFxW)sS0y*EwqsI@m9cyT8rUn+s z4V)Kjm)94PMTdVL1EJdpRYS??YS>0dkw@F;8 zGJd(tw&u3BmZPn0!FHCwFBiQa!Xhne)fU$4{J%rj^M63j9df=+&L5KVN9245PPLOK zte-KQ8$=(O7!N6`?Ed%|MY<2R9c^s_tDY0tpcz}+b>}RH}w7R`(feO$<)4QQH#@a?c|k{Lcz+Ev+BOHb86#z z^|$JUjZLZ5&G>n*;4h1RP%N}{q*^*h_kC#hykVmKT5&k#>Juz|Kel_*uKcvSAnh%r z@0i$@@>bsWuAM4O=lO3`U$34xn98fVpVxYO!+YCrZGUg~t=)H4rdBr~+cyNqJV^_r#{6?_IogQK)Z8tv!gJ_dlcNkTT}jR_x=5M+P-`8+b4zQ=Tf_Z_$hSv zf8w<|Ob<0yqv_L6@--gcth*xVt^jkJ#wV{#Sh{xFHkG)u;ZFCR6GC}oDzE8Yh2S{y zuV8v3#b;4kKL5AM{W~?^-Sl9AIv8Nqm2j|pR!8rm=v?v~L%N~GfPr+sve6`ue@f`l*P1D0a8 zdMSk7$_5N+v#dz4l=b)}rv^!)34+KJACMiIpiDAmMWbuM0%Qo?#4{O{h)x)uA?yO7 zFSH#7hhxF0C>F+77^0y_k2t>>?vWGo{|z}Fo9-aQ!vo=1Fc|l#KEF;1{cFmz0nVuA zp+{3#I%c?POZ%7H_|o++O?IdJHTdmF7nb2=PnWG4Gt5|u(@X2d3?Ep^#J!CC?I;7p zvDl9#_|7lD9@2*8O<^wn9Vc%!u?@~CK)_ZKJ)VoZhZw_~h7mo7UeCq!#WJqrOo$_Z zUhr_{oB9!GyNwtX0ze@K=QD`0O}~dNfhm?R71HOEe99Yep-UJz>uuY+S$G&RV}qdN zDOA!*zJwXAlL;#+RHM0VmpC=xlv)&)KwpWHM)D;redUs0c}ts=zNJc-6JG)NLef)lG)(*+7h`_$^^We(w?w`$K>eN^@4wNrWifyh*d~s$0{XX z!j^DwuG?-&*dx_=U3ayV@uEZCF*X!yE>NrC0_n7w>uY`qlUN0?y26Wb&OU~6HDgM^M_c1t}+;t$Le z=a-!0+N-|^9Oq;tF@9gvs``uY_JRlf)jHx-eQhCHzjZ+({G^#G)rK$pU)TD-UC{dN zQd`i{u3os81BYN*d;7ah_jiwyS+9Bo8yJl{)TyhNDpE1jOM>!{xL7hOoYqj0JpUc|GWH`4&jyb- zw?pNxE#m^iDiS^my|NdWQlR;`fKJ7cA9WaJJbWO$lonL?`d6zwa5ZS4*1C)$FI&^T;^1{*0a8lw?x!!mh$8`_$K zP(*Av6>Mv6#Acu^W5F(m6_zof4J7<`w2N!MSjF=wz1q$H6IG%IM>AtMJIGz;35D~Y zP@oN}lChxJG&AY?k&>StA59+>9pb-2DSt}NYvlYAIe!BuCWZtT&{OL`1{le2HXY!sMYka6H zcGk=lRLmBvP8O`5YP{o{-F76o?Z^iOt-|qV=9Vn~dfyv;Q@;0#Zxz2=I$OOXS-s=V z{#12SYDx3>eu%{ik7^&3Ec^QEH&#!EQzh%}mu#8dJ8mYL=*8g>4T z!;D7cK+#d{r*rvBXY;F*`PCn5EY6yApyD0NTb6IwW&`V! zf%Vf>cdLcK`c&ZXxO1+2ZMv{_s{6fjx6Vys zXxR58SMRy27iyaBHVD3bN?ZNQ#Ofbfk^D0z>-C*!m0RHdUg?tN z3e)$t>EQn}zp*)B{j)L$`B&v1v>E?wN5i)LyNuu8t|xa_A%gDZH)vYQj6W#WlUwFM z&<|FXA2b^O+-QWqwb~&P($52iMQp-+jHG5hhFqAB;fUWuI+~wQ%N)oUT1gApz&i=Q zu{1vTKJ$^Ns*{jLC$Qz30CbbjoB^3Xe?OD~*FoISOKQPcM8N+u>JXQQ4)Pr*=NLI3 zNzNQR(Zg_F(L8i%mT!CLbQsq@aOXWV;*D|7D|l!opGD*KJ+zX~rtz0Nw3E-FDOt+W zBd5*v$c?a1%BqY_TCpOkqg&p<;D;>aoWKt?Nw3IrNb3S*FbJ;Gzyc!%kj+^_5E~+y zS2kj@l5UW?Y?Me3`G0v3#oxD9R=fkDaSI1p30UbEk$GY@wPc{$;6J7w+f1DV6k|Kb<%RBWx_cbnmj42 zIdHG=o<%rzTxfenIDS$neD(v!sh^lN<;y<>yT!AdcKzND9Uj`-dp~1%e6nPh?vJev zR^$5?E!^BAi~Y-@BdZ!og$Oe!p%^ z&&VLclrX?=ep8lXh+;%}gNjObB;i-`w+T*H@;Q%+79 zIXLu#rI=-BaPruGyf6zHvhK227rzo2DRYz*_|(ub=PVq5G3Bfn-AB;TK3OwaHudz> zi&OnV>CV~GretZ;-L`ubsnXVz_vq-shdQItnYP%kS+7{ru9~UsQ){Ms?wq)@UuZln zoaq)$a{|{RIC}p@ud~`d{luhkEFo1$=Z6*tDMC6wi+WN2w@VueH18K!8UKs3yTPj8T_>j?@-TmxQK1h!Zr0bfo(rFNu2**xHNj(ydhG$t5dF8!C5!ee`o)lZFlzz_QQhVu-Nwrs<;B-VMh|>w&mX@uslEQ<)QY6Pj>VFieu1=KT$OTH z2$qUZ=j`rT`_iO+DJ@9_NcSpRlnTT#d(YQk(7bObXeiUaU#f*WZ_H>Ou`#2iMLlM@ zRP;&X6mu(!0Y7i#M9gJgMGA>$s^j|L>GeQZzlu7r3m-m98u}Np=e~geHD27pU8+8+HpsAaj*S1{QGF~&$Gs&gwt7q-&llJu~`$oaA@o}^wf0nq`DwBJ$ z&!SaV`Y^Hamf)&RC{>=Rp4VUW?chIaA3X)nVylHB<;X?aNzjdIaK_^x%7HY<*vk(N z58;p=6kq8qrjhAos{6*WGt$+6mh0LbFYAUR83ZzVNwx51W1poiD?p*6nyC}-b>8Z{ z<9+wJ+1iF=ZNp4$BkL&uER6)e6GMM!IQV$i z%UhqD$f<^?d006ML)hwQl`V>l8{CQ#P(qvKKC9+5c;vbmII7}crwR=HH%MBpjcrRz_BOJWoQK%5CVV~bxt-7^*;}l10zDKtvsPIaaE%gj4f>$Z~M`s zI40bB4Ci(CwKN}YYGbO03J+4G>x^KqbzKx$Yu`k*89h#fi1a2+T(%_?v8Pu&Wf!D# zrl9!IB)%23b>c5d8!!N+Yso}3<*FFnPl&YlW^Bg3V$Pj6UjHk*CeEhZ%LL0Zrl|NM zcVJAPw!6lAlXj+u=1YcyCzf zJ~z`87MAo01?NAoM4+nZS@y`L@oX21t9o^}q~X{&v3XKASwFS$t(}u~LhqzP1xZ6d04{6xxLDj1%=z2d!^TQ$=s?`@u`c^bdPX(Ueyd>Mf^erq578uo3A zOe4Jtg!NCDM*7F65y(*;L>r_pM3Z*GqiDKf8X0&K_L#JW8xq44&hgDUAHAg#OJY$hE^jJt_Q$n8G@x!H4%V?yaehpIh}m_q_I>Dz!7D-(7u zpXptnNjM%r|FBS@9v}fXm&cX9%m}LV{8hk4X zh+A!xbBQCV>%0Jcgv=@rSmN8MDUkIA!*zd5W{7rh-krsH07ZW z$Y^NtWNl;cL^psf9gampOav%WJt9S;XT;FH)^?D`3DnPDfm7|uMdvUHC%+bj@xM(e z*^d5Y@)6rAV?+=1M>Bbh7&_kEb_`0BkXs%yq7+GDN&mX47xFpLLB6TBL)L<-7 zkvt+8;7?FZ)JI~+IH+T=qZCiMioc+k$ZU!@#Qq4f(;fT~%Ggb9GEkc`797%r0GRA( zFk(tB#Be?Yv8R!3zcNO)1&9)UVWil6MTm{$8UpYTf zopP-J#fTEBZf=}0FHO4($6OEcOK$AAzGLD-Du3nZVTxSywqeFxnf6!A`m2)uswr#A zzhTTlgl6RJ#u@vnbV1>GA{%F$-kb8*3x!+n=; z>XJ+9rs8)tq?YU!%J-LGplqWD| z6nO^iAJ~`UB*%F8zI*9UEShC2ADInhd7w6zt(-7?V$_sZO~!7w&6KUb-jpsb|9a6I zMUz#j;+p%#p(&1}pwq+e4&ul6d?cs5*CYhFnTF6$^xA?R?Snw&L$k(P3@$24bZB0{ z&MH-UAvrWGoQu4Da4Pn#wwV>%-e~$x?7Q)A$M3Cxg6GVh?qpfFFf=@F5lS!6naXX~ zw_!-~SKfE5eCXGBHhxl~DX5%$F6G-adL&(1j`Ns>;$OunY0Nj8lN+0br%wo6cxF}K%m6<- za49)(Nx1x_hhT^6j_UAwPS>r2q<;{+i;pp(IVhYK5$h}Z`q8mwFiR$!Nq1$I9zSz; zb8>TwaN?A3u79RxV1~ai%O{e20>yj@s;YP$)wR*|jcU(Pp{vjRFDo?dD;_h9 zwY_S6Sg!G}{7FEwY3tQZqIrU;f*Jc-!O(c;88|=txE>?*vyUB`vWwa%?F0X8dqd$7 zt?B#zbw^C5c{~i-$zpzLAED{kTF`;h+@c6<0TlAnpjzm2r#DJul=KFvXi_+}i_{`- z7#4|_OI7g%a@Q}5+NAokTOOIQ1M(Rg7YMuJrcT;XZs(r|vdU=p#I0u_e8TP$2MKJ- zUNdgTiDFU70})OZAq7CZMS3o5JIe4LGb;kg#a9rj=G%DGm=Z#g2tSBQrwB`a4M}mr z(&icWGyK&#Rgv=637)kNHQIG~b9oh$``fpt zeQWjfV5(-{J@a4Me_$8dpHCg=n&}P;eJ{-R4JZ4C0j3wVdv%WpB{$NG(>4*ba@xZL z!UuGRDX{Pe{U~VFJv8d9m@xXBLK0$}GLZRoxFX0fW-OTJV9|o%!s!^bqNg*LB2yhq zT70AUiAp?9a#6t)|F#-vGj?#6%c!DZ^D76JcbuFM{NS<_(Oq za5zsj*bDVD0@8gTbV+K?2;Ij;%cL#<5>Wf-@8Nker<#x)~xoy|t)40@o& zfFt+x61NC4uRUXmFcDF;kzWUNMfSCD_L>eJaX9%z8kB|@bV1p%4iyj7#WFz;Q*2bc zS{CX;M}|@TiWFde(m$p}=F9ka$O%2Z8`kSqp=52!vras*z4z4}pwh;0A~^3_-<3Y0 zcx}qHPGo8>e|6i0ej@bib}@lFe|+EgiLnH<5#epTSs^Uhlq%Vr^lcV=TLgFgm|@QC zy>{Wsh4JuY;na$0->ucN)w`0_yHeGAQtqb&%TtWM$*)CIM0;u}tjtAFQnr_Y?JIeq4IbhWtn!*lw@(<69EtNiwC&@BTSGREcRedt!^!?86U zwWx%-Ov=mn;fy$7yF<1y?4fkD@H45sD1Z4igyD=_2}QBZpdK<_egug!dfF9_lFuQ2 zf7bqy8H&XH{SYw{v`hQ4EUWx6u;5=q0n_-4{x%$v73WWEO44R0Z_M@~Sm71pUDsDm zH2z903nAHJ{+tL56Zx+Vj_IKvWyu?__*!hnT=sU&RNH$eZ=IYznX29Oo!HFsroXU3 zqiVLTBiYs=bcRxGJu~MbGY9%5>14%Z7Zf|)WCh1fnwyUdR%3(qVWGxfI@(B^E3w?k z{CE6s`KJoMv2?oi{<6K;=e*&1`vTN7dvEnlhf{S=rK%cIMSBH*qu^?~qMx%80QzRE zOC~qIQ~y@|)Ru4T5w^74uV|SoD4l5h`u;cePaaN{txpwfz&CE(HBlsL{v`ct?_%qg z_LNNQoT!`9-P-+~l~B<1zwf_SoN8!GZEsJlJ|WaRBe+iHu4MZB{fY)j*Z|F2vu*T6 z)@?XYe=U3^{Mz}^{r}mKpSI@-g=>@cwX%JbBlj-O*iS;waLk4c&gj8UKWWjzevBfq z#mT$8S8b$5lrKtbzgDnsgXaI-P_W;o|GrfVcOLaFlVZQaNHK*xQOnm7+9(X=RqG+SEXeT6aPs}HuE>h)N@&)qAXN^?(fY&%X&TQFbn91GN#|Tigb;}p1Q#K@^ zs&oEW@$xS+CSqtJ%RXf0ITU24Bj`X183P^)L3aWMUid+n~e%Nh^?9CuIs&q8OUEvq-)gFOj=S;{kRp z3}X&vToP|IC=$yCdhxGQF^pQXDXor@s7YFVvwA-MMLv#y6D9m7{6&8kj>r?em~vH( z?*Gu92Q#B-M?Tpan=q#wfzjqHV{ZnGy)47;V9HY^GyD*Ks}h-h>?nTVEWLSh^5i?6 zZ*@+Wrz-cP%AXP(4QVA*Sid(_*_bMC5**DBJcV3V3kSZ$B=Hm7N{84LEZiqX~|dJ2fjE1p;ZmZ+P! zazA$D!&uLO>jx%IrF>N>2TnfdoyBwR!tvOROV=+=no|C%lzY|3dW8O14h0*uI_0kU z*oaW*+-?!}6l)DzJ zN~`l@8&=h{qh!{xEa_M_*)Z8Vd1~tHv~SuZEU*8-vGw6njkoY&K$9n)ao?Tt*5Hi3 zs4SZF?VTK+>H}=0e0x*&y@Fw{D3K=^mLAep$C9gk^cV2VlgP`eDNNrMOCD4)x@9s~z z4+|E?wy(OmX~rB-MP_w5E()Q(nWpn8cSNv6(st+LV=S{qNxMZo#>#kdTGeu{$z`)k zA+V&y1q6)#iMixOMwLrVI^?cPKJ%f$N0fM=ytgRS-s%dn)WVv+dQRz z&wR@~?M$uOoeJy`JWu6L`_OK3x*q9)W*_Ehta*1Sa=i&7<68L?p?I!Ta;r=V+f!i?a%QEE z7HRck{Q;z)vO};)uE+K#$YpzCT{5dyYBh2>o|x+~njg7b2@_m*LI)QrBXGS51NJ&H z0+~1M^IAn($w9OeY&Wy(DMTeh#^oT&3j!93WM_Rut3Sr(3uO1%7NVDUt%kWxq-GiY z6}PuB@;JcAT~C%)mI>gvtsMmhSb-S>U0V@%un_5b3^NF75{_5fM1qh}inPoyk&U>X zh0!wp_sDskoJ|xAwyqyMh?+vdBbKXRwmAGlZXmsLNg5{`GK4xUbxo=N3DH)hEi#hFNeWD*R%v^SrQ zba;iltf&%=^MH>)Nq z-l=-4YHCetd3~y6tKiv|8}q5Kq_A#pYE@$@&?I=8)86ulxZtTtd;Jr>aaY>Abh1qF z)S--t1A?dWq1Q_I(hhuCMEJ7QMflPs;Y;+K{18Wj;Y(tJwfu6h}Oj} z#hwrJgkjgG4#&}qtelL!rGE&9>&QUW#18I34<&f|0n3Xz0`MG zkOha(aL6zJhn5nY>vgzBTR#27s&V+SYay;DY(DqkShd^64g~A0`LOkbh|i)PWPRCS z(TrJ_LtVIB|E6)dzFX`s(UrTG1UBmKW!OJ?c}pp-M~Oy4{igZvw>X^jMPBJruyw* z+X51cgUi9+#USzTlk**N*ytMhKZPF_HK@>Fk1EaIppzF0NXoN=E~bXDcagy$Rd5eQ z9VRGfL0*CaGeLpxN0wT_Qu`yzHUbCe0ZnY1>=)Q^l7)cLw>V%FlxyC!*6V-Au|fa) z#(I6XJeN?FjEL^y2Lxz1#-vEc=PkzSTu4g~hogd$Sc&#F~i&*ENAc;3tdJ66cFYA0}Y=e z91V6|fJBSX8~+yw!nAdWqa@n4r7Jp^b&&;!rofAd4n}Zi2^YE$2E!_aywB1Qwxh8m z$ncEsyKgQ@yNd)%(VW>emY8*~NV-={hNjwQ%p0Z?v-O9Q^@nHck0tAm&DNhu)}NWF z4?@`oA`sCW->kVZX|9|xSCQE}yGO*Dz>@MqEFOk6i4>sG%cUYCEdgXp7Nffg@S7)z zHF-zwQNxa#6mAG@EO>s-&>RqvyT(OBWrn}f5Twl!%IR-fupNLX6s6p>7l@l8?u2LH!uPW7Fv1` zZ#kPy881D73m}FF|HJ5KZ7oNan!&6lZo`2kdn3P#>iCeHzar;{uP@KpdkZ^@0N*O#We%fUPXAL5z&=F+s? zJAUHTJ!!H;QkBfF5*!3o4?JbFo)t;Y3OPJ2hsYi&3&@{YZ)MV3$NLJRXM;p-eKB ztyukg+y|W&E$sj4VG^J002(+8rzd-WFo$oz+jQAjZG$mY@s_`4cAZ#=@0M9o=qaj{ zO`m)4(DZ>7KlsE4Yn~>Ee$DwhvZGwXlyM8p9QwJktco>OcALQ6msl!2A~l3T!j z$%u~qq6y6&_DkL;lJ;BBF2_;JWF;W7k{6`?H+x`v5G(Efg?2N&3I^QOu8f7wNYNz; z8IO$2vUn@wR)w&;_-u4G8_bYiF#SF3%zThoBGKUBK;&}9+TGQC4oDBT-{^w_Lm3m! zJ<+Oa5#PZcSOsUs$7xANEUTHHBbc^b05fHfN8Wzizeja3C1Pw}{t1oe|8HtFOK23q z`fn-V@5uQta30&UFmo#Lq6H%?n?Jln6N0u(SWfda#mivl(3uXbBs+)Y>7^^^U6wAv z`>Hz~SSJP-FUL<8DHIs4TGE9jc-hlMWFpa-E?I(?TMA_66z%B}va0A#mjuQPzvLFd z8cmfS8Z!oA{~a#VPm!Y(&zGIKk>8hb${aB{kTBn_$U$>1l`^a-wJ;6J9A!xy4amM^ zIQ_z9(o+5TWy+8#FGE^|harWXMj0cz5pZ)LhxAHqrWu&aD7w^Qa8=~PAuO$Y{6i`M zCpS$JfdX|WV;^Dt1rO?=ZU2G?p=Aq#@Ka$;L&ip&pB!nz+7T<1sziQ`6S_TC&a@MG z$y#Mlmh=r&!3JbDCbmqf2})L3=;^DF!j!jS^qsTNhp5YgTACr3;#Tgj>(Pca3G3V1 zU)I8oqphz}sYCKfBLn*1)>kEkDsS{_wxvix*vTiwF}$ga3t=aDToM-ilII63KC1C8 z;Z3qx%O1sUVGNQ!@j**ANq*&xJEF6AS!xl=kq@=Rwn-^D)Rt?cq5o?~Or4ZYc_W6% zBRbl>;6V({S=zBJco5@yQ|5<}QJZRaYsP$FU+}=uBh5H&alE0mZQs%3N1Bhrd|X50 zfo5pqG8D%gqDAP|QQA>39GKD8@xv_eT$sbQH5dh2je9$@o6<9|>kSisTp(@;V8cYp zM=aZ}xM@uQXm%I>Wn``{=l?5S{6CUIsFeR}G@JiVa56TWmd%>xW?NC7b|Q?H<^P6! zL|X7ahLdsk;BwjE5G-WzUAWeW6Y(``_CHag=czbYRf^z*jUfcuhp(Yp{+{CKN>ctG z$RYgB6ISN`BYb3Ki5aW@ZxsAAf~)P~VnAwPY=gyrpM1&&j1};R^5SAw5b|u9afs{N z`HSqDS|j_#F0hgXppvNr19e#3w{X#*&~aKYh?M~f3L-b$3@O-`!*tzCZ@nZ`?ZxF; z4-mTh)!lOr*R_LJ4rXO|)l=>7J$LK58OP3f;llDQcMRWkecLtT*#Bc^@oUeG>E<>b zOKm)!v_Q3O?BaxVa@UW+Kr&9oW~+84t9GWmyT(j&h1FA))4tjD&B^u6cQ2$0TgU8a z-?CZX`lN3?-S~9m%8_*Wvf1(t$#OckTd?7gMqgkXvtobf#FcNefz8Rl=7)OSD%+R? zqDzN=V%AYHR=?C?Z+5;WhE6=O}mbo4>Z=4`;+l4X;tQ%mY*muyNd*)-jjTC(lV z^5l|TV@+3&rajf;)>%(=(o;RP;=S5iwMox`J3HZs2a;`9Y%m~}@&CFW^$l9;P+^d4?pQtt&5`0O9N-YMax3E7z${ z3BgK8i?nvh3ex;qp}DThRkQjo5!|#0LJ?Za+QnOeQ_A{W%B2aYO5~EmDtGW<+0MfX8TRf|#SpP9`h|BVWM-Bgxw3A@kD7`4posXt%`e` z&(b|%3`OF?=mFU2!~Tf~YzYfwt(^o}g$#S(=F!1OJg^WQEKKo-gc^PyB>^=+Nfn2j zFa_K`!5t7R0l0Ji(uwDOw_%E(YR1HzE#HzX-;yfddZ%@E=dt8YT$g)1xw9=<-Zooy zDp__aRo0pEpB{63X!ob{D}SqNDsM8B%wHoo)}%|TXG?37rM1(`gwonn>89xylBHY0 zKS&o;%@))q3u>pjk_GFar3a(;Yp$;WJ94xT17P}d zX9TevbD8xPXDH(2tq=Pe=E!j}doo8HaY+|Ea=yF;5%?*`*&mN1k|&ojaNJ*sOW!bi zvixbnEPXOjqm(1{GER{juEBO6yMAq%Vr7wTy!qn9s)R#uZCeFSUNbTp?aR>@pJ%{& zvqp;@aqj-*a2h+IX9Ob|R$r=oiR4R|`T~+)dDH!yM5;<-4{V~zbzVIexeFp1m)+gA zDg->?MsOUnaHQ3|Wd1zTHCO*JA~09Kd@jW1B7mh4905t2Ym?Gft@MKAOUMGrzSUBg z@|H@BZj9ARp-&)9b8K9)r0F`T1m(?0Q+a~{E|?{Ish@ttIAS0p=<~0^ygHq|1k)q3 z?jaTWXQ1%*jBY3fMF-lP(>1mtGKS(Ac!_n<8ETQ>(Q&h=8-d0Y$_TAR~S}mI5wt_A+jaBkERUkL`kWxh#!aQx`Z6YV>k| znWdJvZp5vldv5TX@zVle0N0781B)4nn=yhjF&O0;&B>T@|1Z8pq>dS#tWc|rcx7ye zGn9l8w2VW6ThxQSd>6;t@gW)SV=Gw3Dr>qEUoQrp3I9jbW0r7K{8(5djo`91+|ol5 zV^M0vI3Pq!LjmxgFmjMsKvEp#i5Sip+3Kb`E^QX=-(cNS`V9tfm{FWu+JA1AEXWOZ z4>QSDeCfhvZ$W}T!?#7LO{1C*-M%?*`Rhx@_NMa-$n@~U(;(B_Z@KRWHchV=$~NCQ z{@spmcZ_dM%Q)WHJh}*^u|lqUrPIm;7gY-eQ4Ks1CKN&uWiiu(52ba zr2X#sZ=e58|BUDGxBI_oo7PXCz@3nx8|SW{n}~h=#W!9|6|PQtSKsL$GbTNU(@rm) z+Maa=lFq;f&gJRCWw=}W{Ppw6!kWouIAi;9Jb4yJwTtVfF3zq!kX(Dd4U2^AzyIn%@x>WIjv4cM~Yg`qu*qc{2>s_AoE>C$^%zCSm z-l{3%bo(E4ezS9S^MT~%1A@0IA_orLhWwE?Ad|sfOp3uy&>NP z>yl8Mgk7M*G3U=76>Gdlv_En#7o5vKV+ZN~y?*8XJ*MxO8axM#`g=y>0hhTI#swHu zJ`ZpdH*d^Gm_XD*V6%g5=-5_JOB0*+n4iH?0v|{O$da;n8ifqxLKn(MS()F&lL{?Q z6q<-6n2*Ji4@jmI@t;|l)n$}+M$F6N%nOVzi?)^7M5Zap2gvE5l8=*fjGPBlGdUL# z8SO+6tSsvajmsXutg;br?3iFy*-SoI-OYb!C7(@GQ1sAFK8FTi<|Lm><6H93O+F8q zTK2;CXx%EK?NPhdYTWfn;Syt^STEH!Pj{KPzlAi!u4o*kkJ({xbyl+2v*Cv}>Lk9Vw z6_$PYRd}BC;nG@|^V3zDtcrq{q6dKeke!%gSnmvq6i7MAOX1AUtdY`*-Q}!2RoP$3 z)m9#;JW|<~(c@C~xaXWGI;bUqAsFBh9#I))>%hI(or#a zH09Vtn%C}e<7+#F;?+s}>gj@M^Yjbf@!u^Y1C=A<4brXXzXD$clROJFHf$UFKm|HZLi*u`Tfi^*1YG$S> z*KVEjC`x;r_jaB%>DXRHW^s#sMXm(DfG%Ha{Q35agSA7@lnF^2O@-2dZ5R={w#y_B zczWGkk$@;HiPjAaT^4ms)&yebLIZ)}A!1h1&4SW>4RzRi6S>LLl|nELgX=$_#Y2Wv zm>LT=4*?Mrup_gg_AtgZJH`l?A5Pl~>Hcqt#`p+)@vv{rmDb2pe zVqgn7usHyl>t*C7v5C6-IrZw|rFc=VY;cI#=#E#4HfV{BtDLxbEZM&hqMSmXf*oVA z&svILV}GLf{+bq{?O8BOW-O=VV4=8rdJ_xVstQ{@-Oj?ctHN@ntDSz4rE65B+bHa6 zWr0Ukf!l<}4i?y{3fwB}rRIEKIVB=v_2iMK;lIPT{Hpc_HbhszR8S$kMJA@A3a7AC z7M*k|gNuwU@uaD2QjDWvA{ytRId$vN_4Gia*dY3F_q@D?{sQ?_MhX>U6JH>oRjCgs z;K}W?|D5R^N_qw1e}Q@kiO3ADMMuN^g;IIg0+U@*7cDFQ3#Ia56rbF#f-h87;rvuU zfqocr08S&-g4!_0uS1&&cR`twkJV-kvC?$ZBD$j-U6*S4!Hc*Csusi*ShlsI zR0~~; zxqc6BRR- zB{Ke+D4SWkhvBrRu$+!BUVAea?a>N~WG+g>F(37EkSCK^xUU;E7{_ zP%j3x(>Q8{Fa^m^lusL2&}JoP$rMg*A=NfHh?bT$2>ZaY=X3CT1QX!@AAV;?;hu!C zVLfC4VPbCmEWfr^q!LwgiT^D$gx^L0w2PeW>rARh}9@j zT{x$qCa+-$dUznd8LT>yZl6zxi&xvJ8xW4lW!CY{)VIGS^*wkRG>7AUP&lj?>l(83 z(J?Z<&#Sm2X4;FOKKgZdAVP*nm?_bgU|UM@3m))c9BkEI5KccyoGaQBg5f1?E_G=+H@b7- zg9EojHL?ds_+)P4kfh82LXgkQQK zi;~XgTI7plIvtPqIGy72@ z!-46uS(lRSY&cYsD~6QMQ##tghXBvY0B_)z6N7O!^A%WKiN|Qb1{!YhR9pCcb@!g5f z9O_K1Kb>-)flVc|{j2dW$Hz}FjrRQUKCq-A`i7p&=%L3KKY7ZK?i39Z?4dKB9H+~^ z8IP_9R@wo!ac6(mK)44i3wE44ZX@(Ap6rg>Rd`jD;b{3&_!{*#(j{eQrYWNQ8N_d< zuU)~u_JPGW{!-GiGL3g4X{n(1NYb(bRscxzkDC(d}FG7!~jDwNr zL6JnqVTmxVoE0JKAV7@{JY-ke`QYV~!-K3?cI5&eLiM|-yAPpq23%hq`SOThE@Km& z?ySfCFLR#!@rDV*Y*BTxs2UvOBe#wui?-c)Ch3Q<*rw5zIlF5tCf;yAc?ze+1jAYp ztO%5(hXns@RQ?e7L!>JrL&BFq125!1ogmrLPHO z&xt~-PdR~(RN}exHMr=me0iXuP2nr5`8O0@x*(eE+5-EIA3cJ|5?q#dya`toVGd*(*XRi3=8mQW%+hB9xj$CM=XD z417bvoSOl!TI-$!v4%IHo-!{zt)~kf71ZJyjb3@qFq#HPeBnc;Mf-BFz!hC)z`TW7 zGqN(1nl=XivML1NABGqR{;RDS9oPt%xj5xa;P5&*WY>oOU2@(dhngoE?$9#0T@2>q zzFYtwH)p39e;c(xlOQ_*cIB7=pz`H2;4~rD1iEp1GgC<;2B>O|@V#h06yN0Ofm^IE zLHmK@&(VYllS3<^c(IHn7yM^D;eqbRFc$)x$GT8{mog(Vu4pKNlM~Wyqd8RC^29kJ zo!(}XPhV!Mg_2t5%#|@D=UTruQAXd-C1^;Y6pVL6qlQ2(m30apP{(Y(CztGT=ZmC`-cqZg#O7@eJg{GO;|V z$iyhcC!AnKW?PB7F`Of=geR&S*AA>p*c3Al3E9+G93?=(-vbkGe-EEmuDeimaKei= z%L@8Z|0o2=K!NI}3Q9}UbHO7KAS1XpB5J}dDHc|<{p!wGxs)`KCn1)R{Ftmimhkp1 zlTs*esPto&(<8k{d{4|HFQ{^!0OUAX%264Zz@U4U-6|-M z`w-!tgbl7YVTYTSaKO!fQ*IRyJ4|Qv)jZwV2~EP|LNh{pVF*%k>#4JP1b}6OuM$GbDV}bCBgbnVBg#C4G!V%jgE1$ZRIadX6}%UxiR7AK*;5smLqk zQVtZMtjDkoXHsJuIT3OQBZ03b`(;ao%r*t1AIO+s-1h&KcJ0A!U1eU^maO;FdRczSj^nqTI&t21 zTsMgmr?!(OP9I5}G_}$+X?GPUa$2 z%NRgPl><4W9gyS6iDflVy)DI=3xq9`vs|`GXI}BirIOL&)vriT5M{AkuD(;VlE?&} z1cG^zSzWeC%@&V@HtJ?{g7{=t8b(b8L#Nn7u$b;r-bhVe4p7Jmd0c=$N&?TzZUsU` zN05l;xHd?H6D592k|kSq$_S%`yf)SFM9H(0J42QDlT>-u0;2VKqGkBsQ4~Tj_;nag zrXXwCB^?Xsz{~D{_#fikvPYhJkRFg9BIx~#`VA{>+vf(-ZT%^I^W@<4*7&&ly4gd} z@okywOPB7vyeq4>*h4FUro@`WMDobIUpR7ZG4RB+J`-L)ZNKJ)HQ{${Zs0^;E{^Z| zk~w9nnXzR($%&=SL#fR}Y2SY6 zceJ~&S#;n>byd!~Q?A-+J6UlFZAwn0LtRt%u2ifM)v*b!JJWT$(iOX>23IOkQBvFn zUQJOy=F|{brP=oRZEv(C2j`D2bRG~6j|!(BUphUJIz6#)cp~Ep&-MZy7hGXvpRom( zY}F}S^#`G_q!-TnwaRp8-AYBRs(dI6ynvHYi8?~YP(al-B}-n_;u!ngnp^9Kh@Zu4NwSlHPw?4-kwy$o@^O~ zgeYeTtIC#BNQGL@N_wtB3`~}Rbg(H~O#xw6nQRUDYjx&eyfV9n{B^p@wv{yv3440Y zrc7lO{qu)19xyD2uXiBA5-u9Ay-i*UvtgtJ#sf76x(E;(>D6N|isx zdfM8XR9r^u(KJ$x8ki84cjC^n&GX=sP;f5OxGWy;zny*fdAK7SrT zqDJEtdRZbeMX)abQ4$@vU9FT*J$t>7ar#}l|}TK#?bBBC#0f{p(Q zWinKwy=(QyBxCe08Ou|~@TqqL8 zwvaFTj@*0aylK-$?qxfd0h9+~Y=!~~6&H0?q)qs?rgeh;DAg(@$< zRrYLERqt8@SITq;;z7*l><^cbly}~rs$x!JE?2-nT9`58?-*w!MSTS1$hZiz1&U>EQ!9F5nspyxu5Fa- zK`RZTUjVa^5lY~D-vYj8Q zc8;-ITy(~q5kJjn*c3MFVQ2NCVh{m{BrLd6e&sq1u3?QRc8GbrKdWFFYAKr8Xh)B+ z)v5Mf%#NJ~Ynv~M_+X1mS<@e(PYbW#V9bvBQ^rg!D7zy}Ensl}n$l<|Ahn9rN3cU1 z8seX4tw6h@orBW(N?{a=$R}rfo<~nX)f@vT#9Q#N$j}HzE!yHl2WHlE{O8G`Whlxr zK0}YpZUQj~L_H%!8aze_3&0fy6|mzjZr~W}0SPiPkxnMdsVZ8E7V$0MhcrL&#L1@^ z-*~iqw*&#K5=vlV{o!a;I5Ku>oKYdnmaPJ^NbifDZGnTEncG&QIk*TFd1R4p_igg^Usgz+a#i zM5&H;@fu&w0rVDSB?n9K$LX7ye^L*NM|B4)LOrV|P|M#FEtP`@*QjRSOI4HiKy_QZ zEbUkW)3a{x^aHO{KnHKIJihlU1FsDTO7=!U`3&ypi@2zZq z+HpWI9k>ccpZi|;M8;Vz^7$+}VJy}0@}A3krUzzgzuEXkV`9^s=U+lU3C%X9Yj-W$ zc4sOZrcBe|;tyqNYT>I$fe*E@Y04zH>r$q*E6p9>VGlTBHmrb8wJiZg%=OR~0tjGB zd>~cYBzT&IW)N20i>4hJXJE?tffE*Y%i_(Xu1=IWl}w1y@w#WGN0H{tz}7~w~LhEo2Qa{{(kTWgX#8rgtnc+&XdCE6T;X9;gP6t@;Sj3 z)6j<|9VBb(XX;;Vn`v9}w4^*OINWF4J}R(zra4ne%7;D8utibYn$sCdEmKCEiP0Oq z#B){IsPrI6Th@cO>9!Btej(79XomFY*bm0uareP;hr>PH96vQ1`R3RgW2vf*X~!nm zSPPPNno%LxJY~-W!%M-tQo+04zL@S9dHdr0{mJ?{{}0=*TugP0q=Uy1qq2F*IBl37 z6-*&g54}0QXOWELmXgXEsFGHZ9uDX9P~GI-sRsc!t<<-D?abs}2)Cy6Yw`}(f4W|&TYsEm ztOi#$+3xjNR-K1QjEa%j!&r7&GMhe5^bJw2m<;NbqWlqqs+HUDmayCJVpY`!a?@*b z!$-)VA;}eD8sR0YQW^77nKV2?AYt>~YBjM-M5|nX#J#!LOr_h&4g>5bD3z}P~J`Q$O78q6o_)3yzQwKKVEaclpT`t;WO zge?aV(!=sSieBTAI6l)pP|+aXwc=AL=$^p&&ezK4X}M-b&G7gz~Y#h-cQ6N0~C z_UWwEU~UF#D091DZ76>bB^1}nUba@4B{hvv-CI_ShW|uaG5W}DlpK8$7vHzqs$$$* z%F$J+iG=514WTCDA4(V1k4PbyiCP;7AkOpVwOKQNxrin2t#wt-`D?a8O$tu|-v zI&-pJ!S%X()%tJ`iLgI$sjQOTq*FRhDqN0-M=^bcTUG|pail~sva0*#Y|->p+O0I< zTGJcaO{e&Eqhv;_17@`JWJasW5G`tKh!$fB(&}u87Bw^tfi9(m#g-WE;hNCd{C8;~ zeUBVkQsY<^j3{Arh-hf5z0evO7g-fm(Lcr5)_z)8{m6zH+XCxVc?rzPx84PvW>bCB zeervt4bAEPLLlycrCM;+%$6>$-I#ndy|!DZ+c7u#R>PH+^gZ_ry$=WvJhW&#@|%`* ze`1w-VdG|w|2C=^bu)e;@ZCUBw00{B(AFcex3w$RZSuy%(@{#Krr#tV?Ml%yO$rTP z9HpUqqNuY$WIJlSzjxs9?nB||&L(2r6Qec~^Pi?1O$JTUJ}1_(oo#ng#m+=l?Rej( z;b7Yy+wmlhJ4TegM6Za?j{iRD0+pzp_|xZZVZ1B9!wB&|Lh3#m6Qai_b-4I| zVJnk^nCl8`d2ZjUWiw^hlu^r>F@7LoJSCI+0P`D6e|AU2uC(#5*aMNg_rvUa!A!7r zI)>PF^)P>9Zump7N#6CeT)eal8HMm~=mIT;pzLWSDrKND8?9n8QdIF=G-#(zMs5 zVcb&Up#^SghZYR_v%RdaDBS{gLQ5)2-i4C2aA&E!i2`V{Kmk5i2$&o%)6~^_+p+Pf zAR_wPCyoYSKa`7n{qnQ=EB7YApWg$Rv8v!CQdkS8Sq1*sz}9BhlmzHT?tA13rVfBY z#u)pK0roMAzO&_O4P;-$FZTl=pprr=##ENHLaM{$H8bwh^i&|`z!(MrHfXb}g8b!fsjE(7OtxD zVJ2djk?=7w3jYG7F9fYg94NPk)}X&%6b*#dT6Raq1SEh&u{*Z%yjwD$X;51W-lX%V z+8OE;ZBe?5UO-Bb@GwQKnfQ&R3pZ*{)DXr;zMJtYMH`w(hncMGOorFOkDNlmrzBM$ z0PxzylgW!{AfJcw0XGwyhuRK<-{ha^+kRWj>>~AZn4&Q3>kE`2p<2ssF%ylb2*bD_ z6+KRh+W8>GEF))zqL)mJkya)~28gFD6?3ac_5i}z1t2XccEwi+4ddUS{Ju)g*U6#r z)8Ji_I!_vBvVR&m^)db%6obghJHcEschO1d#+3g@BzTx;mEXXFgBf%-`EQ4G87)&= z=vGh&wxq4C3)YS#mxi$p2(I;z0Uk#nZeI$vrh=_PpiOA&1J?9%>~c(~SeJCCof`y4 zXRo+&3Y!iHw)?VXuvXtNnhnIx0HgM&FQ1-1ml&No zop!7TYKJClela#3T`<7w z_*d1Hc60-8+PWPEynLbf#@RLR`0A&2XM7d$!!yUH`XtYiuQ}ywPHav4HX;mQ+4h<3 z@n_Pd^-HDAsnX`f($*D!IOFe~UppTF1=r7k%L3dVufdkO(6}Gt;k%ENi6)f6GSV7FXb^eAM;QFYW-I)j`w-L&y zLA2_d5=T%ZJVV@#^&peu8)i+3id1;hoJr_Cl-e@9s6U*kttULI zDy6T!6S#G&I38xbNUh!j&+VipC8<`N#UqgVU`kpMJOA%AE;UHfl?#YzifajBr z6!@X_u9eNRigKe&pp^e7ROH*V4k?x>DW=F2;%*%J0oMv9nC*3udhMw$K_3v?qQx6b zNXjVhU%>OK4m6AwQByD{P%wu;!RR$qIH{DPp1|KKYv;JK#WgRkm1PbLW{m&>0T)f6 zK^2?yur{cEQ>EOVd5sfG*GomMvSEpbv_(19ejVlyliJOh?T0f)d)} zR&`dWEDoT$4RSuJvN+76OPDW3nt6cPqRgXW4PK;BlMIT1B?}$Bf8@FWeU`lA4oy2Cu!4D|%zv~-VY*m#{?wl2NU=BOJ=C8`XIsoyx;|omqJ+7` zBc&i#n6H<~raI{uT)SFL6_JUu#xX`Wi4;Y&x?iCk)rpNc$ryz%YzvFCJ|L4Z%8jU~ zo&lUh#4Rh{_^C@k;Ykv?kmw7~F8si*q|zO93o53bB3~ZQ5$z=MoEK-jhi1G)2gy_Z zUy<^sH08gI=$P_WC$t4DSS#3t*WGDHuVCuE%@XM7nYX1o22-Z?Df{$=+18A+LX>cY zZSurJOKsh$w(iBYp843)o+GI}M;7-S!<~^(y<;vm_oPrcnDz__HVwPdiYX8;jt|dn zNsP?(&&L)`!QN9O$@l(v+-KZF(lCf?)}E{Y;tohVWZYGR4A!l6!1;>&@uEa}65D+QPt zDVUPUXz_N9EC$Mum+u|YiRN2+ZeOB$a4vK>kC@JAxP?@;UQB~FbYjPfRg+Dt>ZXV> zQ{7bQDKHckY~Si74`xB&uRlh|@&7>1-;whZa`w>t$A8G#l5ZcJ;iylmBSM<*SdK>! zt;xxhdhoxcB>zQDpd zpUb%XuR3NNuxIu%bO&dA|cuoA7*@;CH z7Nm>x8qlkcK{3UO2{vqJVHOHE#>$KH3&sjut(Pt%CObOZ47PwNu~c7r@@5SSBIyuv{!_nB0V0LX(Jg)5Qx@I-y9J6h6$EcbyrjO0V;0OhHMvOG16Ck8F(D{o~Im3wE#1!ql~{-4)c-w9iFr^LG@@{sd9LyzRwNaER0_KWuZP z?<1RtteUl``l^ku&>&FEsD)LK<|55PMgix|7`5Hfu1a&W=_HIbo_O>;6djy8ae^mu zihafTAJej>D`wGB*cZt+NRAONmaU|pU=%hFAZ5qn^Q;q9ViR>p79l3cwm2I#z=Ri` z0({9fFQ{STA@BJJZ>AJ_DuHN3{{P4!Zhn!qkAxWbtMu4IzPrikCFi>o7NBJROFlFC zO31mFoa5wNqImbw+oR+}$a$2UC&)QZ&Zo#>@_7s+eT^OyS(PSMzIK z!~446`#S&oy2kf)tw7cc#!LI2-*;)~`5~dSJ*{7NT~}mi$QaF}L+aAUpZ~Z3Ei1G8LeK?3$-)0_s5+xBxivp$!Tg+8jgFjAUtUDD zuwFCk%dN->nLAyR){HNR442E+3p)SxqJXbR(7CVQRq4mCC)=efr$Tcj9Je-S;=oEn z?)IF?!1;47BiEkua@>}jNzaw%9C}z>$T>`$E9WnUV=LiGavmeM9uj9o);|`RxL{TX zCs&NeEIsD@L9XP+CXO@5D{koU^GDMrZWnh=M{bVijGQ~?HFNe{(15I525ykcxy;!;B`42S znGV;W{u2iqP)z^iy0BfTA3Glcyo5<-Bn!5b+J6~%d)_)lmq$149F*@Cm)ix z xOeC9wE6dedxIQiy@^jtU^*X(6D!Ncy_O8V{GyKZ^X-kEmugDhb^dYvi{~vRCEfN3# diff --git a/scripts/__pycache__/backfill_harrier_embeddings.cpython-314.pyc b/scripts/__pycache__/backfill_harrier_embeddings.cpython-314.pyc deleted file mode 100644 index f17d278138a305c5ce1009c639b494192afadceb..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 72656 zcmce<3tSvYdMB9menM68eu|P1LP!Dx=>0~31VTb|fC#%{Ep-Dd9B-eK&vM{SQi>e=0&6BX*JNptS@dDeIK?s<{b<8I5| z^Zmcb%B%zxpq@EjN<>CQMnpzNeDTHijM$u>X6JC-d;Hh^o8IQQzoH+?Ws)Ph$DbNG zZj`&g@wyhSSr^uoP+Z@l59`_85H_&8F>GXaQ`p4r=CGOFEny40TfEU!1H@CQ&-C;M2TUtEL8R1M8w<4Yu&Mx83o7%T1O-D4lw9&0R<2_p(O5bo?c+CYvxP;FLd$)4@FAwU%YpXc^e>tfOufy-J z;CJZ-U3fj8nalB6x!iy|T*hZ3p2OlB_*}&ESiGFiN4$W=H}Zvu7qNH+UyS$~7T?5| zAns-H&HP%#*Rl8(z7+BGEMCc%A-;jdtN3!nH?nv&UxD~07TTDYoW<{RY9F-n=kfajdw!hvA^rl3pWu5CzsTZkd@tgD z7C*@c5WmFY?R+2Nms$Kdz8~=yS)AvuAU?q29sD5TLoD9O49`P@*_&NTIh>x=PdH$CW{}PK| zIPc@Xyz4Z_`O|6ko__)J^uk(BfgWX+T-NdWQm*2we?tIMp_3dvLqo&io^b#0P^b;R zZT_BcPwzlaC{&@BtoRWK_YVdnD}|Bap#ZWN+k3*7aW~WtUB}H75QJd?5sMIbX(SK| zOZm+$T}>xi+8TY$^*rCw#QRR1Y-~F2YiX478=IQikJq<0J@qJ3JlNkW42OmKS35Vz<>!b~#UZFo24pm?5>AiBPe_+6u=mB3~@M6I4?;q+5RRyn0PM8O`x>I zf~igG;+!1M)#??XD#0R(Z`VsYds~G8-+VnJnL}YeAVV@;6Z*pd8zBQVuBVCHDMXQKB*tS@PTo?Kwr6NV5qTdV2Th~ZeXgZ zVFtm2EJ79qnH1z8K$CKDlZSvciS;N8dguXVlDqW<9rff3y75f;F1H1b)l2Qt-$v4E zUAmS#NBP;+ZdPK+yV9-gwq%+T1FfOknE=gp&3dOiSLS=FvIM3IPDHO51?oWnqeGr~ zf!^UEf5g0b>()J6w@SL_E6f6oy<`{~z9vvJCBsO#m!>)9UT;9M^<%O!FlPP9Iz=8@ z3rhI#RH#5H)|Yz`8Rh03nK$>ny6=wl4d?65sG}_AsGN0F&a4$3l~KpOnB&N-<4Dxe z9CNhKI@-nO&WL9(L>)fS=zDtb)eiTvnpd8ccTEfcB-7QNfsueWSoZ-4=|WCFVB58l-YL_}(5o27SnFxb-@7HaW;eNhQ(*nH83$&v5$ z#)@hm6xEJxzq$9u-mmQ!jeBPF2$G+rG++HJFDt!rw+ow*Y1O&bk4$R!nshmjI0WOR z3v`<(WxtZ`KwfsK?dZhMXYO8xvb)fIUEC!WC$x1AG5PA^l$l4!tWBswa)n+X+FM2& zHHc|D)!Y;Ep~HpkNN1k`(iW|mw|K_O?qtR+#SbjSaaY!uGj4Hy>GMBvdFCDIVR?=C<(1CuLgyAPf_$ZW zgZfS|S-ZZetfHM*IXjWttC<^S&W|1;W~Ng&qA$jv^>cd-#azGcuC5KEvK#M`>|HdW zn-~FNU@$N2qI7exM+gP{AsX5Z8@w`B%3z7U;Bcrv+K1uiY_lD16xVLyg zfSq4;e!YT{>ip8_b`iqbHQq?*YuCgD_A3dD$jdGj7~Al3_4O<@%HlO^l``}+v!0kn z!j~#{-PK9fz)&k{L$`*e`o@Q%IXj|`ouYAP@)Lia`DSMG%_7e?Y7QF@$r9|j zPHhqDDT&RqY>KTm*}jNO>SQzO3LkL@CNLzpO&wsj89z&Wda}{t#n6NmA3{`5I}(;& z?F9HsE{q0^M`GfZEVIm@J{tQa7q8a}C6uuY>bvx*=!|nr`a)H-({xyJt>wCO7xZY0 zVP~S1yir>_(3@G)N`1R)Nd@?s#MB@?Er2O&=UqJwhY%PFhX7VD_lLs1;VThiRd_H6 z!qnV91P!y1&yG&&N9dpj-Z@m@{bG#Ljo<~pfeBQCbUwz6)*PD)Lo2(JaTygUSCgmm?*C^$j%_* z5+cAsM(bBwf2lQQ%$_x7Pxz-gv0BU-YvZoWn5$&gRWfOO%kidT>e55+wy10SXmi}? zj2Uxhjk$M>Z`gm`KDl$c=ALmbuj#(=Vcv1E`+{i9oiqAaE+VPNJ3lZM{Hx3cqcD6e z^dxHlU4M2ljynGt=Ehgh2_foDBE5~eCXn6~j{9j^H0i{3JLJgBHu=HRGlZaz&5$+! zDl>p9QaW zc_iJn8^{f!WrYsPZyXK=h6ExZ*(?yc5d%YUh!V~s zkj$5RhWrEIe+ejzNWn6kSce_SDKH@T@rVt1FCxnWzb)-%{*Cwi z_2-$nV-0a<#?9k5j^D|8qu|#ICL5*$bNTyb!VmKsK5#ZZGH_XWpBOm%CKSQ{2eyhQ zA>x_7zpH+)>Ak&pK2*|BE_ah3%I~V9CuBl}| zlXDdL*rlR=I|J-0E~o>#Mttc-h@RK42%>s`=wSw;hE$8iwnUWxVS@@tm{dhvYcbHT zE)|Dz&aAYfT?KF@MFwjHc$GVVxG@3521rm0NgyWLab+MTdNBdSMoA|m!MCl#B=q3D z!d3*5xhDun^~+Ft6e(IL6^LM!j062c0U1WU3XIwr{XtlIKzI?E*nnL|1R%P$bhI&^ z5+Jh*#v0?9xwrS++VhQlV~rnX=KjQ;7k9dEw%%w>VCQYoyp10?D<0Xn>_P^b3r^1I z9&P<34Pg3#ts?PLCM18e-d0~@dauTDSpV|@s-6hRyT$||In*;KvxzdKu12Ly;B!^N z(Lg|ofMiR!O@J4`?P(Y_q7XyA7g%S}Y>=;1vL%SRX(>;TBr2S_3C_-`awi^8zDMuW z$eS)Bg$T(=&e{6P=X)w6zVnI5*2>+!^P3`hEkj`S`~BXI=Z<^(Y31*~1cBlnFX&9Y z0MW&zk%0lo-+C_#`_Me_%Y(QrqA^1+4ft3|LBJ!{7lemNh>=D@3^A`}dRxq~>w#m} ze0rW(SU27H-Cbh-zG(V>(YinGaE-PmdqFR7O=QO-^K>8=)0tuu`HOjeIHPb0cq3 z%j0cJujG)QBYeuP3%Uz7rHq0$E;ueYs}@nDDNCkb@GdQ>T}dx|u}Y`??dDElC9md2 z^hW@CFL`+LMSUyUL2K-YKB!aPUW#8OETx~Tut>Ud5&gMylCC_WFE5vL9FrQfVeAN; zC0khN843+Rj813M}-q~;xmb8 z-$GMVFt_NiW+Iq4OOlG(sSft*X)sq;&?l@vo>rHHW-p`_ayF+)#Nf9C8H{T6S;aemJpHX7T`p06-UCKeZH?URfU z{R&{z0kGh8zCcMP@Lb^#gJzl0QC3hl!LVG959*wr>%tiFvr)UkIzOJ9Kic%?p4>50 z+~I!hz{II<2d0|7H859PI~{m;nnzdfcESo$lI%-5?jSL2Or+OuDkM6O& zqMzr&I-Y~x|GQzsdf>S8Mqa-GnOV%B#*Avrq{hr@%%aAuYRtxB_OOX}V7YQi=7!;+ zOZ|NbQEhnmN?=Gb4-0)*Xe2A$X@!<-BcXsGN1fM(g)1S*Z6U4lOBSfUd?UhuK{>_L_Q0tP_$13spB)iWyIP4!8fYyj~NH2}_4A7hK%jNT3 z>K8&`B7ug6yE4Aq7SG6z8nfeR?k^utM6+_iy~;IF`9*kza3WikiV&ey zDV6H&DGd*6%WA~UAE3q%AsV8<^JtYsFy6&(PXmloX~^?f8devFDbzF1X-yJ%o%UCd ztCs0^mCnIC5|>h*mJqu98wsaCmTH!NL$G31V#zxgM=q60h!|EyWu#z@pza)~Ect5B zP-4kD(f#ct%UW{r7NrJ_(Fw%-B&S`bgf&G8ypfgArF@5K*b+1@Q)eopm6coJ_{^c1CXcQS+-0n zbC^wXnUzO1WeZdT8qJ6%TEKgj>HiGnD>Y@H1X%$JGE6T*yyRR_TS*^Q|C738fbbm+6I~6lGR^OL2-4 z%3Id(B|8(?hIn!f4kE0$Leb{sJy9De_79)gZhK|ex0J( zsqjW@s%=Na-75qb>((<6s$$aq2E2_)Ya{L>J^cd#zc)PWWm+BY6a5}}UmzwD$f0T?;TxzaFf(dJJi$BtKq8|(EP@lIkVQ%8pw#dtm40&Hr zEK$4z!+rfjFL;LqZzvGFIh&0u0Ulz zBxS4Rx=-@09qsiEP1L|_wSkM09V&Iu0^-0QxYRQ;5SH9(9rf)kzEk{h$*mSwQXm!S zKLtWU(_i(6k2h)ii~Tmsgoc!>*4fe2(b00Ut;5&JGUTdNFL_{9h>{8`>viEZ8aooB z2){zXO$x>-xJAL&5mcnfS{38vVf2xi$?!u-!Hv+2Jd*2J$H}%vm@4>XwV!0~xE=}z z2Af{)4@>&tkYquB_Y2S@8TtZYMv~C@9;rWdywjKXe9%UA8p#lo&r2p2mkfcIdl3v> zO}3ZuJCF&fVj&09Ji$(asJK5wdl(r}13CqA0Z;br{IBPKqj2Jthn`Jh<-v!ZgQLgd zMQcWz;ucTLk~eF~i&+XDSPGz9w6*Exv+`~q`}(noNUWr8wxn*RTP&%ImNZ4Pj*K;Z zY~>(j*mPslCeyon8GMlT+>o{;dD~e)04R(Y78rWiAj_8&et0u8L)&f4ZkjzgPa8z_13Sk=Cj3Tk5dfGnqxtZFT-kDQkaYHnt@qVf3Jn>IZ6eGtISOWrl zHnbez-an9iY?I-~nHk5*3_mV2A>NBe>XqODb&rXp9fhe>f?0`&4ZJQ4Rt2nuLCR2Y z)N?R!xz04=2}Zyqd;+*9ZF-QT{5y zy_Hp-n(~LpSkI#ape?~t!dsvRu~k#Wc3E&n*j_f=V2FlcAi_J6i>w)kN5Z}kvv~>$ zzf3u^C?Fb*^~*BmVN#x^$yHi=y#E~~Lq`x0N8vP8Kce6xSH^+~_XRVTk-cD{n3eNn zE!Ze#=W+`c929eM1tkk<6iesq>5p89$xMh!@S|SpaQxXGUt}-ob`Rk$U(hophC@lA zds1-G#SIZJ#jftHLu%gcLqkS2y?sb!MHF+sYSQ|!Mbiu>5tcJNjv%v#)=aw_Xd5VYvY1vi&d_?kVQyXN*6Zhc|0 zH=4U?%=nr!o>TOiBVM#|&YB-DsrtZLB=;6gmBe)6=)3Tp&uX?Qi^vTSTF4N?fZ-hK zSP2+}KS757hQAPL!f4#fO^r}kQaF*h!0zJO?Q(+LM0FTYAOYmI$M2xySLZ4rE7w)4 zvYb>J7>H*RpARKuN<}{CFnW@Ld&Cn%tuY8YR^EghcC>HFMG|;Nezz$fF@oBa!Y$x>QXE4lPKlrazM=O9MM;im>UMfLFc(r<=yAXlvww7 zco*+h%rlghx9MAQAY`MF!bx(W~JP@{YtFU^mMCvm6uU!ysTSU$9Mc!WDQ?J z;O^|_z<-cTja}^z?|C+)gefp2$n%`21Rf(sMzRwoa8_EtE`f zc@VBM?Q_Igtukbx>PT8O6VHaKSq>?!ek6Q(Sm=*1UyF$4Z~(?if;ZxXOgIb$Wo75} zU?Ad#x_Y3$m*uG@o;Q+ty0X5P>=g%whl7}>Z6zk&YS)7HNQFw_=F*XG*ejbzEc6EE}jTnf`S8LDcaRtQC$+T0Z-NmCKLl??v^ zGC9F`L83BbCNYATJ7$R{b+1qF?Sxo zt+TGJ)45UC?lEH=EN;=QqVe>JfvIh=&HH9I?-MuepE)n4w~iS;au!Zpn+(4dc{4IO zAQsofodsf1RouCDa@$*b-`u-kF=pDwEFg6^ZW*)B=N8>LJ5}?)`aa0596LIn;f-Z% ze2}qmD)U~h_+0l~#_6$!1$}tV!Kbw?miLAZhM&BCh}eLYu3H- z;;+9rmGyT1-TdDvj8*KNt=KzrG+NOZUDGsv6t)#PCv_hctbJqi>zgP0qXk=I1v|uo z9n*)$EksSea_g1J9iS#-xm(5Dty34rjX!BQ5zTFTKkHuU9pjyj$y}Hc74Q2XZ~yyQ zv$<^#+-;9sTwc}3dpOtHZ)Z+EA1kXD%j%~OMAsgUrZrPhORX4r0&!C z%#v7U#cXE9$DB27Q`}qnmi0~Rx9l-*&8)X(y8K>+=&gx*kB_I#7gxt~DyMqizI^xc z)brEAOeIYB_M8xRw~1LN9~<;*GRE!TAF^_9@4B^Xu4MaM=JuKLAGXfzIhhb4%$3wk zKR09f-RB>c9Gc6j|BI}mc#cE=Jl(IfsRdmTp{jz2Z0A^xYC*{!*z zKizoP(Y()ee~+HReK|;aKT}6xR=wlcM$?Zs=qcRjMADD9(}N%HC~D0x{ih5Q;;;lt zvLg&dFf2p(2KJt*_+kkHEm>Bic#tI~xA0>W5@`OUWS#$r9#AlkzYr0gU*W*Zt=s(x zadn>*CHVrGo2IOb7+zYr8M{J6&2p+P@-idSKw@>PjbLWnj%_P_YEaKPn=+Y)l9#wQ4G#8SpOIvf|6=0DJAj))I zX2$54K#G`^B^-)!=p~uOBvjxaB#lbDKp22nBEq_uYduJ=9ExFCB(45o>FC;_CJ-ng9B0!4837=B% zj|e1_thRN?3$v`~HTeAlk~y(dLz@8f&G2`^b0J}kNh)<^IVVP!4T=xBfw!I6<1a>? zYoSsvT03ja8*|Ki3MTeOJ>~FTfV|N|a>SM!EfZ;zfyr)hOY{Al`&O}?7dyH{zFW*W z{ekn$V+&Wj?o-Gf-Rnq<<&m6j5>|Phum$Cff`j@WIO@|)@1^Mw{@l=OB=}{}Yn8<` ztb1re$bCesr0_o}U_AJ00vYD7S3=NP!8+m-&ZbLyM8Tv3(m7q)3>b0T?!y8KH?gCe zJzN1eMOKdn@RD4$L6fRlWCV@LRN~PY05V-iN)Xj2ji?mR44N1~YbbU!fnB_z3qZ_6 z4~h_aNQCBERR=2@ziV|eT2Lm#comeblCj_avQN_Ce`Tbofp4nsZ1TcKx9Rj#aBN=Z zJ=q3$&R^#})zQ*+)ay^^KOnX72VVAWBv7v)8z%vcWQ3#!-pv|#XT$i!}|(q(8BqATF&?@(X`UhBLwN4lba@srkpzRd96dL6<=vw#o^ z!{J`m)MbU12~S}ecSBJE@*cFC@C2n~m}gUfq#DZzAsNhXCYUc|Z~6qjau^T=M?1hg_xIgBMq(Ly6GzOVeAzdW+ zWw5j!kyYpcy~Z05Tne0~6*z>Zl7p~+X<}D|~)@oo?N1sJ977}NgbSEIrq;SGO zaCJgAmAFqECJBU!rQ$d&_wt$`(dF3l<`pz=UOqf((OVpF0E#kDF4GSYfV7Q@7j##{`x6We`C%KDc7=V+*eZ+w3 zGSDwVCIZPmL{85G{gD8)uJGUipXOiU;*h3PDmx10p&0Ae=TH}50c=#kt%7e9jUJn~ z*vIyK`HK@3bC%M$D_^wc&!>CFH+^k*()KXDV)SS{J#V65qC!kxA4}gno4z@kUMZ$k zO%2W*o!NcwsOUH@8jmL@IbOpqbqKupxf1Yb{0wC?V*+&sCz0&ME<6d}(iNrQkSB6` z(FPSO*>pQ-vE4BNqjB8C^?-lelMO}zcBu_koZ1rZxsWg>*VYw1r-9Cxg%s7qu6CM1 zOD?jIQg9h8pp@={hiTGfO)(i(ke0m919=;e-*vJKTBE{W>M~@bPGz*DW-7`1qBR@& zl{H(~i{?ZBQjMFelsG~$l5z;|Q1F)s5|+dQNnRJ(5WB-L5R^S$hcCVed?*y+d)e@W zkrz-X`y^*B@>V8NF>gLQ|Mv4=e}2*w&E7C}1Z!cz`pF~TX^iF9JjkyZyDD1uPKOc9 zXRVnS`L;inwfRBT=CSRfar<-@f=@rTEYgPNNtwAsM#4FkmeAkKso%)Gw=w&$&hST^ z4&l$Wgg$K`uR`vsAbJ%2pjr;wblC)x*)oV`Pr|POzgB?Izh#}b2o6;2w-gTg*n%fJ zoTt*)s8~?frEcNm$MQ(_!xuQMRGHW9Myq95r23x07Eq;H83d}cA(dta5Qyi*0Chqy zp!Kx|c+uBI^k-jL1c`(mWpSPgjtNMVtV~fD1Wu%qib=bBu;=>4Kms6{F%J=SY$#qp zHV}4>?D4>zj+kTZ1IJp~Y2KA}^OYN~eC_k2%^x~4$6ub*{pu@I>*sQ-*i%5NDO-_N zZH&4~MQiD&@c)fDN@g7;M7w1p->Zyz%YG8dz)RlKXV>R(@8#vz*Baid(INc#VNV!9 zg{y*EhZfXyI=Cz%7zewI+XRf%aeErnhz(#1=(G7=lUD}NT4EVp3CxImi&!?2tIYX| zh{1QR1`FLV-i94pMlO=;^)<*IUqV6%?N;a-duRwd?f80oV6(Iy>Fk@<;5m(1+RYuW z?ik-RacR;Yb!?6~YGxfZQAeFJvv{(peRu>eIW<> zUHAA9t2s6Kb3vV)saQk>E#NJ>zoFiP1GA}57#;~qrV+Ta(Pm2~=DnmHC+og}p23U$ zo&!=*FO19}DZq$n2;jui)6`|eRF85~r`|q$_w0=4cP_*#>t`$L=PDamPhln#R0!*k z^=WWORxVT_0~-P&DwE)_dA6kCUT4&COf()#c0GLvT}J#p3m7cok_b5%USwT z1vt`Rkn)J5Z~}q=x=+Fvitfo4vgRS6b)B_v0W|6x(Xw!*=gdz6Vpz%sjXl_jyDh z;Tq$3;R$2x8yLPQIbb&wgy|3*DxeG+=m-4|AC60xVD$-cEG=p>!Sssc6 z^P$T-W{5k|=N!z%#-01CPWa@=o;DMP@sWwc-?2|`f44NY@xX(P2VNifVdIaE{n4@e zBVzC6xyF8R&5L68l@F`~aKmx0eQf94dsuD1azOFjX_(kPsh`~WTl*%f#Qd5MoVAZk z$n=Ss^OlWmBc^!#RMb%@8lU^l`nNazyG_&G_qWe&dJeynGZSCOE+X>S%-o2`N=t|$ zC)dguS@%*6Qabdou-5HK^d7wQs3*xKM%anxpFs$71uY8^fll1=9vvC~P&m=M!0yw) zHT3)59{Dcvr{oOsr>Q6e6{xcDc_52rkd8q!nGK%-=@?cFiWSeS#rVmGck-X5mLXr%d`Sgg1jE2aO&Drxqi@m zrVF@+U4BeTl{;OdR2FopheS4%L=W;x1f~}wk}g+(w)VssnNRp z`1xmp*tf(N?+<%h_wfa5&(Vbmt?+v%^=;v&~aMX9~on|5%OY$s5-pHg6VW~28L!32jJ#)$?(LL;Us1PWCs zd8Dl(Jrym??6Cw=TMO5bZ3zi1ned*$kd(m)X1=MT9gapYvf@v+Gsy$vH!Ct_I(ZLO zLgGwDG0P2N{yf4#N~E3>4k0Evsi}Cvc4|xfQo7M8_mEHC7nGc28Nm%*DHAEjm8JbUBpL|!y) z9WB_7+S4@Yx%iaEPx3^T`P z4v3k@MCY+&X=^mM=Bd)f69cg|RkLfVrXn-7(KQFe;)6574@>V2L=Sg}^_^npMKQBi zboTz0s}?+oJNLHZmSdtJ>h_MAWN~9>%&~6Hu@0{3@riAv3}06p0-YK0BXwS9A${p#}9rRAHV5= zV-r4p&Qj6(dilZ7%i{GfEO1<_{v>&CUC{UH zKjye$ou6(Mem(X$B2_@>8z;w_Aab2Zn{$;VSh@4}wm;n7BAz-UULKs=G&Cn%jR~K7 zAbbwhd;z{(xSiB@(CwnWk7^6)F4JxE<$s+@CG5tOl|N=2>v+xfsF=&$@QIh(w(GTR z@}7}X*>jF+(bzE4h2S6mekaE2AOGIT6Hg!etg6mH*rvcmjx z$&j_eEt%!7Cxn$n0Z~S)N|vX9J5(ixqWzy3l9m6N%P9YeVg#zuJ^p_XA?1H+=_G9) zrqIfuzO`IuN@XsuT4pH}8cLax+n6l#7L|ir=DAvHQhc^dPL(oL-mjsr8I?qaGKz#8 z{EZo69&5yWme~G1Y!Q)k2P3xguup+71?#LmDO!m*uoswYT#E;3?17R%CNN}S%)~DY zowKcgVH`kG9IbHHtz7_MkuV^f#{jZHBr^wW?PGV3=Q4DBcB(Y$sS@4Q&=PFTn9nGk zJo;Aao2}E%==%N9j02+QARZjjWzT0hfaBbZy)bcgkiX{BkPn_t(chsBj&cbw*3;?}G`PS0{qP+E4Ie!---VX_;3 zA=3oro5xHOl8s*6JrRgT*ZQcni=q#TuYm|I=zMmqu}v~I``tsAP9uhjdQX_yp!5^Os)GF@e2 zsl??KMqFBydQ;1CF(J_rbgjV7f__~jYfdR^#V&iR*5pTrqbB)<2vbXfv4=Aau}NWK zc=KWb3vYqXrC!e4I0m+ySWFK6PxWqLO`GTH3Qo$SkLva_pGgbVwAhl5>bDt2P zr9I%UWZP!+2SNuTS^KJ=PHl^r&kem&x8?H@z1{vd|ALErs4u}f7W)IzO=gi*C9Gr~ zU`7@2XCN~)I^_YouF+-{e#uC_U$FXv-xhP(*XW7TSs{2$$*qzqR)C1bSs6^cf{%om zkUQ(P?Uqd}sE)d~vc1~I55Kk-yvP`~^~$*U;*A%@{OV}>R>p0NuX}CxgyD`ue(cH| zKQex5>~nA&a%J8r71wNw7Hof*wO!2GA-Z;s8RuP|n^#}GI^I8-GqwKTZ;n;$pRL#* ztvDEU9TKgF7{I~PEeyXn_-)aiXo*90ATXhy=e;t6@4;b3*jlk?2uiucsXKNYy@Q`r zG-CB`WWV-Hx(X(oh}h4nC7$TddlTv(>ZC~RIhiOtd*R&q^PA3{KNs38(}aked*$5c zaZ`xDJ&B|PL~=Ya%T0RXNY;HG8=4%!GmXU-X^NQHmP|G>83;7I93vYUNyrQjW%YXy z58$P8h@KO{BpG1HB@D|k2#3NHbIM4Cx-2ZMkwyyEPy2k%(hlfF|d56W^1~I+yhGE{8Io@z5D`s2sz_wg+n9!Xz=A`@6 zi-}6BH)35JUB>1jNw3SQ8?vrUy@zIU34lF?2_M2P?CMF27Q2?x1%1q(V42#I)XOv0 z=7*{cO2H+j@63W!RlR+pA#-m9{}3T0S_<#5$fDr2oFWm&b9s#sJ?X_#mT@X0!G z)jCvFr&33qzDngcf(BT(7`x!!1=S3eaHxTkBV$^DuVKr5E2(CZ{bfpJIK`_}sPcZz zHynNH8&cPysu5Gv!D!G${gAp2RZW_r4)!$=NZ7IO#cJR46wjjh9$5S?8I#m)QI%>b z+Vb?crmo{@4#m=?QsxOY>VJnC#hB`_4&Ke&@jP7?QY|a3C?Qn~W+QXR%SdU(qvj1- z`II^6_|EQKb$}?9AOrrP3j|pfTkM4%qzD%gM4TP+KB-U6^5aWG50VJf0;&Sst4N$ zd;_rIGfw9^OwG7T0pOThWHCteLme>!yAVC*C9B+#u}COO+>VaD{YA-bC&pI0($gcBW?M4l*Z&By*s;%9v1?oq|WM zGGf-@*$ulV{J-CMyYuUv(G9y3elfNJ-MGC#2ivDY`Z^{KV`L5^Zrqesi#Etq4|r!z zwsx|y^8jVhCk+vFKRAwQSV~uPEDpkWsUwg9IA;P4L+$DvJOy%S*^L4`4 zQ3G3?W)YEfE!U#y<)cSGbY$RQuDCOkHtCq%)4_Z zEN?hocTBcM^R`CaH85WFY=v0QvvoeRU}EPRdtcu>c{N&C70s-M-+Ws7g2igHKQ`FY z%%e@9jMkQpw#~bUp7eZuL}sun0nRt_!Crl@Q4CkEwFvT{c>cT3b&2~CJC?Gro5nH$@%M?lQEb{2aC z)KB(Jo|(Ehoi%M1*X@iscRecMJUI(qE<@ga?m*PD3A^dZUR1MLhbKn>jnS;bQO9A? zcsNNpU>6?(up=h&fHQb#TI;TI)Y0liN&4xrF{%O;Ty4*|L_sjeJnk$&ny^ z(m&_s(ou5>HN>X?DnYV{`lWK!)K1w$1W;U-TjNw$Hfoyv`o+@zEx zu}vo140Ir&x+Zlqe5`4HYdmF$wUJQ{L=U1e-SW;efHxlOI}g;;uEKb-zq2i7@jkG47iUQrR$dbWFU~byiMj?v>%dPOX}=h7v!l!ICXO!ar4Fd4 zAd@P=ai*|a3^`3BAC3eK(~7UblmR4EfBY-AzW2Gk=|Yl;YrgfUUTLLgh>`O zZpoFT>J=&i^sts9PhCo-KsBX|8rm~;Db-w2Q_8$*Db+VyQ_8w(sZ?~c`hM-a#MXw? zFAS0;>yd;JhHOhh<`|NtT^J^BFUb;2M6k!A4Crt&mK=V~4@<|$;2`WpK*-1usI$mb zk@>F5t2Qm?5Mwg9A&`;jiG;^%;l+{u0Y8K;Y%}_#<2Lg^{kO=?0OnUv1tyClZ{C{y zktg>~Zjz0U=GTbs+PH^S&yKgc-|U`xK3cXbTD)6y?}>W~?i`(Lcvu$p zc8cy@anG7>Tc!+eTkcw>)1n&>M7;+^_o38z7aZoa^v4FQoC_J8E#q;vgS>4Wzo@q@ zn^3|8_0wugLaBDbD4WbrLP?AGy-aqy4_z&OJw0L4&~0o<_)}D(oiLahNMCv-<*W+e zf$>Eff)l7G4N1@Bo~^#CR#W&M?f*ABUx_}u#=0xGVgk75-yWZCHj?xKy90G znG)(m31S~mFjXpp4clT8E3hlcUe>9)ZDk4cGqz64j4AKzlI7K~GsN=gQtQ&NZAwn% zj`yqTo^z`DgEuGpY?-fks(*mmEUT7!ifKnF+p49WVv|wIv1+NOm|B!d>oOxu@6sc5 zbr}%4yNqD^)pjjHULKi-cM?sHgB2O_k-4-(=v#SZwzG$9qX>CBXj9Ww$Z{onfNo5r zD_J^Z#*6U<5l08(K z!O&7K3uPHE!xqY4rZzIlP(_j;JXF*u^L@WX1&NW9+{q6~*q8k_o`nc~CnU2n%Tgp` zQH9c2f=2%X$}luS07hsuW4!M6&RaVtc6{St)Pa-8h+(LEW9RETCl5pmYonQUqsQYe z54lXb-f-gKT;bMta%K!auzcTg&;0wYXif9|(myMIzg*7}ibceIKpeRDf}^VxZKGR5M`Xns{R zyBd3Xt%Tz9?vziKzE%Ea`P7!^x}DL2U7~w;YP79E@#5CQ(TxpJZ=>jLihGJDBBFa! z+><+zHJ%>#luQl7CbgW_6{KXJVN$`>4falQz3gJMu1^fFCr_j2C^Tb zp9XX_L{B;Z)fz{q+ee%a|6V+}hx2 zyq}72Qp6)5sC7aEUIBL|%jKTXW$da4W7Eq$wcB?H64E8x<-p7S{=Pscd{@UHT5H8p z1T70robe@LG%?$#D&cq0Q>-IOQ4s^xn+IM!FtMKPz}6QnsERtOV~*Xkj@?nmUeUN$ z?h)iwFLh+vkTeQ&3p(w-g$sgxW2##8vV1`;3+@cZ4Cs<8P|`%A6INurE~q=G(;O{e zV2;dfk<-ei{y2Dx%sYpAuX|q@!WpjDU#Rl7&|&HW1Kt;8iYWx$-Ag0=LSPX1AykQ- zGz6R?SE+t}s4Dz&_ysRJL8l5{yfe=hgyj6YWeNZTa+L8|TdZm}5#994dGTJFdCxY=3FJ zEfc$nOdHi%U;sLq|?X-!g$cA83?015a<&dw!|Dh z0YAC{wmar&B)jIfld+0qzXbuI@GhYPQjZEV6tK}X34e$;QbgJ)v5$egT~ z0>z8V14VWr_ZyC#hTnG98vZw!>?~3beHLMf_UL)ZDSw1~xD?YB(aGJ|QxZoq_~o@S z3!kwn{ya?|yq0}%a(B5&P{_2qoP3)Bp^RTo6Ur(Y{X143(gbd?)z%`yMnxgWoVb{| zBM>oNDoH52o~1;^sq92nLb__5cn2(bbkJF)+=2Tu)RAzst1wAM2%U!|1CF4Es=Rl^ z-y<1%gClrg!SNS#VwYsS*wc%7#c?Dbg3a$B)ce3gWIj(V91JNgw-M%(Z^jwSF=%)fuzY zKCskIe=fH3`0UQ(v7PO+JKJMB&(H2WAKU4Bu+#SwR~Gcm%BdJ-vzD@$rTl@V{3ovL zpExw|xL374TO zc|-x3AL2GG-|p41U-#?SGZ{=1K*c_QFf)GZ9(UoBKy~Qal}}@j}d>G&;x@o5+z zg7hFoBClh=z!lFZ$)4X9zmdO9K!qPR2j~Vmx47rBnvx% z5E0|$o`FlSu?g1ICKBsv*|86j@!};sw_Z%-j1->2(bqv*z_BG%Wu>2tNW9EJ1jomk zgi?A#oPviGNE9Si0d*D$4=6PU0nY3j=!bVsNVd%(!e6HPGnKg|V^H=MVICzIsJ)He z!UXda+%CCQ67{Tu*H3> z_&)hrT$`FYgKNAf<|&)?lqK?(&AJiI7(X7%s+i5H09EZWj~-pff}zJ8#Fcc=?T+c| z(^tg&#J0CEUwG(PBO2E{P0b~sNB5XSu*m~LcpVNd!NDMOB*=gp2bkbk5Z=V%*xD3v zn2jRcOs1o}jm0gzJ#0;!q3CLssj#NOivd5pbNb1{Z_#R|iW+G)twi6$c$Ll$Nt_%) z2MqAeYWVTOZijey$x1%{bPBM%lF^__CN@%%owink2UK79GLxc8Znn`J4pSV!@rXDz z`a0kW*5x?iwWs$oFlfJ^aV>{DVZ}93Dq8Z9iw+ujnUGOC`?gQG9MjrI6nuo^MNGIu zVT0pEEEK~&o|y|airKmByaflvoLnIe8lhM^m$hcWMKL!WIpRS~9ztSJm5YWDNXz#4 z4-pyVv`3C~dxy{;@`WQs5`&GsDGq6l7irfVFYD&KvVaKa`#&69{%JY{ghkbq>r3SsQ9000=2 z>;`mFXsp{ zIcsseWc{4AC|W=@r1-UTzHypfpG{fD2Y9)UotZNx4&OZBjuvl`Uy1yHNt! zRP`#dPMDT__BxkY@ild-6G5dfbV+HZl6DQ>E0ki#WmSIJ?M-zws8TN^oiv2(LQWEq zEV;VOihXR*(q(GTU-B3~(Hq$JbLoXtf}|x^a?=@@gAmn{B2Tq0XzQ{htcySD&-J=?$ zJvpRg=el1Qf{Iwa&ef=~Iwgj;D@{?7aObTGEGVr&U%;eDzRr=*lwFM0OEJ!HV6dZS zs+4@mJDxc3gq(PnTzKNd6B3aux$q>dR#l{tLK{fm)h1b*kNBETo@jzwF<;--apWX_ zqKU_u4)qPqO^}8$)XoB;9VLf`9+PxclH+nes0>J}f!ZUUvk4K1Y z1lbrf+`d0z-r@zS-!F`yGNBJaMX~UA6#u^|pe023k7&5?Jp_^+n+7G0ZDi6PftY{B zW(ofn#fTmhXa_LKbx9Z=^aTYRY1o6Kn*H*^!J555nFvL~EF5Qtge7AD)T*S%hx{!) z`fn8ccM2F)%0$`!1wU!KadwX3|Db1HR6z7IZo>B|fkYa@A5)MNd9Vup0cm71osiYw zFlt66UAfjH^uf2B{R$B#R!wshn%793CS@&Mz9^O9`EQ|SP`$A6*n49y%xiF%#24TE z;)C+TKXGP!gwz8!4~!j{cc$MwcH>yWY@}kU^X&_FFU&djElL;H?U*tCApQI4bIzkb zP0Rno&B6&vvzWGbWK%k&9>P!+on6B zYj)49d$?x*SmSFa;JyW)YnVru={m3J#=-OV%m5Xc)c*>Bi!_8E3&vfi+c=ge92 zKFZ9d6}*tv?-E+QOKI(Y{(iwT4==o;PPmVlDcr)tdw z7~Rl*2Xx_ZHkGI*vs}m~R&8O>jQv+s;#;nV!tS0_Mm|aNo}i^Wx64$g*Y+0hw6P6) z8XAB(fsqNzROt7#wYA9z)FPv>pCXwysvHEKyW9iH{5eW6_rQ>+FCZ7P%J_@y76mf} zOfKsj2wV!o&eQM5S-z4xMDj_PWG2|RkYvBu6NcQDFuY`eo)Dt0P{M&iMqx6??)EWY zWr)ugB;L-u6q7B@!2t5N&?~@daHGNQ1DMCItOqWyX!VLN?|g3I#D%G*hsCwA;vKWa zJEFzAX4+!=+GqE*i_h^7_jSw`W9NZ0vqfj3MQ5YA=f<2LI&$NgWs`x2nOj8XmUv-B ztgv#nuyT5>SXdb?+&2Bv!@^w<#>KPCW7(Cn*_Bg053{$zi2w(-Zn?DuXU*2O&epb$ zZ;56cAG5@<2i)ep;<~*LUHe4qzNhCkC_;H&d)YTEFLcxFBMsR)bgZE#Hg@6&+=>#X zjL@`RQ4UPx{-4kcCQ5;3@ETLjBMK(ZEYNKlk~Q4qRwAEz(LBLSv^^e0dr7jU5K4Uq z7?qBCVMi;?gCc_bG%fu1hXp9HpWA&Fo!|Yr95UaiC^$s zWoVPFT=bk|9ZRl7&q?aItt)_Ada8}{z~bvywY)idv!jz zca9i*YU|kOr09Kk2&RE*H!!N5WT7R=FH>S&=5~C~(hCw@vli`kF4YcRsd$hs z%LP3!w?%Qe4q70=!as%2D&bCBR^lU4^({{l+7xiKGIXcnMwF2aQjIAftG!}rpMV2Y zEk)WlKmAmxQJv99fJ_+58Tn$IMQZ+mIE&Og2XPi#l^T+F;w+L!t6bJ~SxeCbfdbUK&GZlWEClG8y>2JUnq7Z@+OtfUOMzb0L|q0UcBy&IEWLGl^XkTl0kwW z*;M7_!8Ba=vJ?FjaW;v;>8N`AW?B$0bVgM&cFt#!HKUw=j^2bUB zDB~q3KLSc{Fd*yGNp2?)&jXKx_c3}xCN+#$7RkgGa)HPic^fB-yjLrh_++nPvsSi& ztL#0X@F?;U?_$|}{^8ycrtz0bmagziGO+cEn7UE!Ls!PoY`X^GeH7;uu=DeE8oBJ?%+(eS zjaOcOC7QGOfoJpg2ggja?&I+^5A7orOY_dAd1Gno9;U4$PT|VeugvCbnruQab`+Z< z#qz6W^Q)$=#j2ZUtDEnaiR~{uJk}G=O%@UJ7}229y!&W_G$n32ERKv?E`nyq$_}Ylgv>j ziYNP~{BIB69TqDO;K{+g;NCoY9~+FB>DZ(R4j0(RHfJpDA0Ov)o)fwc)7IgWpD;n> zALnmqE;s*heSKDQrRjd9qj|p#(quL#*i2wkBsn`6$Itj?#-}rmolO=7$BfG-2A@r| zHh}au2&BJB!LL&AYZUyL0yh6>CJ7rUn4mP0>o9tVfv@lvR1pQF5($xb>?_>Q^bqX- zY%8RuKkL+i0s16IA*!sxTKc)HBh1pTRonucxvrBs^*+*fQ_EIvC zYE9u3YEjzaXNAupl&^Nwxb(ux%yz<0#qA_*gOru>X__wKZs0z90sG@)U-zw*yQ_Sa z=Qj&R>ee6Ob&?exRsEOXJeAltU)~{|))L_zWYU^SSwaINef^iN3zcXp>wpc&MpjX5 z%X~4TY%(;}`1Y~8$EHKSb294K7sGbxj$`-iDfdkOEDW|ua~hEF&(t;&4#@(eEQ(RD zu1JZKp!RQR%;YA+%|~|%j=U6-O$Obk!A{eC#we#hO_i%n{!P@jx^%fUlu4VPxX(yh*_pDzGJj=RbJ>Zq4#^N6 z3`X3S6ZT$YKmoHnnAn}kvNOYi31%Zu(lChlszUvvM|{i9-4=R-%QXzY}S1g%h~Hr^mlZ9)yts;7C9 z0l6MKN2aQc_InKWz!HQI4>)zA?itXplMxUB4m+Fde-QK#u+faKA$th~V%7$Bmm7wM z>ZQU;1kLTStPU+S(-r9jSqG+s8pvdLqr!~^vG)k{$Dcx}R%}oN ztLa3F9XTk)04azHYidg&m*$;(np!JtGwv-zO zk|#@<*}F6nfAAS9lxtd;k+gch;N>$FUV(8mswOCzUzvHB2U(F9IdPz0k+#xptg{ zhJ92S5+wD=N?;&B!P?>;H}#wCOmYzR~3l^JiE-?)~|WoEl|t(MF3Ou4MQ z4Lg6WRu`F6tey)K>KSs;6w=g18_idEB9_WZW|J(eLt6k#mfp+5B)}7<&=ZmLN_i`u z>vEx|J0ihZdx5NYaULWoydQ% zrA6#G4H4v=^^BS<=2uK_V`;lIX`81zS=t^=TB>}N)33064Vrv);{G<4cv6$NTWolq zC7#tJ?h+4Eb3U-1Nlrv%3K`M*XGSiid$&y*_@qquszvYx4~?8Uiw9MVvc6R`v5l18`^c5{aSnuJ=}er1*N zDLHrb!b^ZO84*+Fo|h|y;cFor##vd(Bsi6_vK_^Gdxo@0K+3YSDJ#Uz>g+#+$rj1d z;B%^vJ)-*Phis-7u`0qYW&uVE2NRJ|1h)6R0chi)GRFoV!bsq*4vHn_5I_q7BXS^r zgK1wSnf65-m6gn_k5-B&s28@JK(F+nLV$U5RR#xohH#Ql6TV+^D@V$oIN8{QgMb=k zi@^*f`EY$_L$j}=<@u)M>GK^8^=&@Q12yxB`qRGSO>IXzo6(Pi%Y`EdWV7v2nZaNf zT^3ZWiqRKkW4P-O#^%#e?kCp#iPAai8WodH6wOs1WZ3nPng+c7j$!6GmU>W4ohbTF z!%P-SJdjLW^PS$gy(d^|o0=--Z+WL~?oc;NKCMZve5c`_T*VPhYV|w4_vCd1!ZT&b zNm=nk!BiKkbXziG{?rkcRHsdX;tNZac3G;NhTlEXlFQ9R@wAa8ZBLdgn1VAua_v@= zXp!53v=6KYlgM2`ss9QqjR!=bdeW-FwC53AgZ7Bto!ITev}dM~{Y*AQL896!`AKGFrOXp(X0>pPpty~KRtm`Zr42p_GAlaL7h9i!DQUtzGS&6I(GQmZ#_%oNWf{)BAnqJxIgU*|_P#MN6I(V` z+;WDJ&dRv6a;`&s=)CBxOgJx09ieB>FB{8JMqAPtiW@^pzcPlFjUBgao}{fLZYxRX z^yZ09D71pwkFm{vT!+X;Vj3B`hIH2p^iIzHFLDw75D&sK2uoLC2fcQy#NR(d$f8Ub z$dXn9IR^bBQ_sozc*m~LZ{Iu*jD9tK#2{Ql`N9_|XhVPt?1Maw0)=M~$f7_QK%-FlE(F{$03Kw3 zeV|K7#$Nps$N+0A7=CDmSK)Va=N)vkEeF44S;Qas6UJCULy?y5+- z_KL2(b5A8}_Qq@WlBZ^?PYiBdu+C44{ z3#G~LJ!ZjMG@jW~z5q*kdHaExld!i-Qe#QpOe&qi37n$Osojl`=|(5k%bx@KTv)eb zy-=WiYxXfBXeISK8?5v2Cz?WlYMnWp^i{`w)d^S48^#Y^Rdc;>Kl!yM|4VSuw-{SK zdOESKKjAt9pkI^i_1J5%nN!RzuyBQGIzgjz>gc^4&WLgtX$Z3dH|}sc?IK}}8qAtb zG%OaJz%(*^n%pQghPCj_C$7^;Oud7;9qB5Q^iJ`gG3R6cGsK2=qGmEEi!wftmQ$;| zlRX*5SJ!_voby@d>$WDg6QY$N=78Yy;!}`$G9as%l{g0p(<)}u@eE2EsL6VXjZ=A? zVHb`IS8$&-r;1hTmf1J+>WaB0h2O~)b0z)0ykf2bkQ_lG{7x<%fc78J8vsi^jg9>s z)c+nv4eqb2N$-E3e%KrNhi-ES#Le4KLH?mdjDGwqLE{TQ_YQj^)U4s0?C0tk`LzwHD$)JonLfS>E z-T|FXq5-$kcD95bke1M}_q}*o8Rzuk$?96>L)Q;>pL%_OWy}243a_Ae_5qHfa5mIm zfBChS0pZLhnaVg*Bv^G9&a}@OlcD-}s2*FD-GAA=5_)(sx)Ov3rNdKQt2XEKMAAj6 z=biwxgJ|4(?-;kD0QRNmy*&UgLti{weKGg!M>GB zEcyzSZ%jJ6XSv;b6h|V>uuITu4yzLm>8+_Oh*AKU5xt&qPR(2;2kdX=>2l_CStaSx z8lvP<0<}Yk9Vsw8DlQN$)|ehsZ12gAV;zTj!xx7du_HoMmgFGBu{4Hd1RYpBXrN+9 zCE<=WhAU#F72)=i+6IzdzD>7_vu-o7QMfa5q8m4hD#9lshdUzSN1k8>Bd0^*V_n@{ zz2RS*!zVgBPj>W@JudZSLBQ)0eu5xfqRGW|(sX!07`bxkayp>hWrfMt(OA~cSQlfx zi*+n|S8Q?}C-OjMc#0*1=c!>*8J3aHXxIv$M?F~AnnYUKzXkYT!rna9kuuq5_|L}x z$}??QwfkqDPT0$F)#fDE$C*6dVyCM^$-u67U{}JvTeLk4y@)-^)ot;>wuHR}cn`SR z>PdM5GbS;(ZJ~N0xM*GqESWPVgXJ;)V?#bs_xxOdH)#HDd&^pq$;U5N zdSvEGF*E0IXDk%60x~{hqnMp5D7x)HOd7HJQKeF(7F2745t|>)f1nXtfGs-92_71p znn&bE*uuv2yN~1+L)OSAW;g55kLDIbvuYd9mP|UG2<0)_+Uv=&2dr7Xfff`nxMo=F zA#P@LLDkVjcd#0eRs7ZiAr1lG4I*+>^aT^;Zk z!e@z)e2xMlBoa!oIVWBC0wq$3L)tk$JOU$MnU<)g0}SL#lE```a-l$cF?#VIkO}MR z-$!Xv9Kq|BeBD9;)wm^ywpz(K#?97vi=5Y4#dk2Bzy%3#g2T+!sv%820rL3Nrs_XUF>SvE9yjw(1 zBPdOiL}?HUb@Hjsqf31^9u-fW7JK{2H`rOwn~k~2-!r{YMH)I5fY#Cy3$T7cAhgrB zyB`a1g-FaX_TKMu13ZuAC54R0P~?`~o5`mL$c(KtV`5P=Q5*}4TDiht#>S$cG``yo zir!0V$k>!I2XG@vbk~@BpPwnSpgnZJ02oh5ho)BYJgx~*I3!mIS)Vs*P*#~^X5i{u z&ccWk|2F*FBXH*ob@MzjBN}sRDpO!}^Kvo;a_M8PVyL#`xle&vfl4@3tyLn@m)Cc- z)hN|$%-!vQFEo{q&^@@DHmIuq?q0{3Gpah!l`w`o`0jI2s|Jb~bt~knQOybUwMN~{ zLjd01m`AM*RE#TJJ_y zZ>MrTRK?DH(L(kuQFFA4^1$`%gQbw#ZO>}A{pb%lm?-B`_K%qR>y?}~Xt_FVD6IfA z<^7&f4S3aOtkFUk1ADjXl&s)60A!rt?=@p}m<5A{jW~4uOU(<{olz@7SJZ~k9knC$ zw5XmQU_AX%4@o++j&*&&g;F8i+sZ|}{dn8b@fki+7&RdDMa>8|c*D38=-;Y5L%R=j z;A$}E)escBh*irvkbYCtw85Psd|O)7WF2^>7H-3RsTNaY0VXUq#Ak(HTbWM)J5te z4UsL7K$s5kV}bqyO6tCg%2)>dq#1x6(fnvYyB-Zxa^dyJbz5juZ033$>Zrl^1!RD6 z%%SU7IyF6w;ysb~#8og;I*Ny^-yVjMbSCr>N7 zM`ywclN=t$qCfl;V7Z2e_TqPBgtnAO;f}QD=;k=H;DGHyc=!c)yTWazMFi*G!-LO^ z3x|-k@CXHOP;d~XsxB@VI-{4T1PEf*Ghy1Eh81}@=9jmv*me;W!?v=N%XQ<=TpSx4 z9s=Ow*u;zbVue`qDOco5NS5y4fl!Z2=LX1a2CP~nNibVUkpMGYsLC7}7l$0rP3SIa z8cYymU@^sDbkcTN7ABp!wWXW1n3H%l;@Z;1+C42RnI98gqW3}2C~3RgY)Q^yXU#=b znAF9gHjJ7IE77cWrh{~4KzaAr7Ly^O(jIlI4y`AwmvQ|iX_refq&W_#P1rbJfzNcg zCTW+l3np*()t3t=sl$*=PoSLuAw~gf1FbLxVjG1eN)o2PPha&v(JOY#i&2szG4%)G zZ|JgD>1(i~R&IQU9wK7Fymh4WnfikyX}&;LcTn&m3Q7mtBfVXn?T2~?P692VyL})E z=!C8l#|0we!X*m6Oci2mGh?9th)(HkOUu3?Nh;Vr~ zmEysauWYU^?!zM=I@~iJRW$FYnd(egohh4tHneOjUoEVfJGo#@6droxaK>nL6s-p9 z=PMVU6rUVf4qlourot^Vj=Mh2?R($ufz=;Y3NtboXj0DoC*JFv5|%rc56vC+B;v@0r>2cGE)l z(&^>a9`V$mc>bB>`HA@XiRDuhDR+3TeZ^fZy2B_SWe+9oo8$J)w|t?m*2u{6zOR=i zingYFMPKdI+`c7Bwgb~)ZZ9s+9?n?uJ>I(plfwx&Bfij`X3pYDIbAc>>B-qW-xvf& zam_BVdbhZ{`^Lx($N>3%zARO?WfoWn2gHLt;(^CupG}Ymz5E@5M1T;&;!1jJ;@%oa z-rp?97&o~KQqT`6+LkdON;3JUGx><&rCJ~Z%9tpQr^sJR_qFbfh0d*S)BT>0CWiFn5Tn>CQ zLn*U+ralE#qaC-mA^p^&cUrl8_l#xPShtEz^hYHIsu<9!ki^rCI&#o3X%?~aXiDw6wi-%?or>g4T>b~XMlyM>TubKDuAJn^{ zUGn2R@6la`ANxxw+>wWHow7NYU}?ej`w^7=CJWbCt+bN`^nr=fu$pfSw_kqojDAl` zir;@{D+zL=%FhZ3iy8r2+Txe3Z1F2EY$+zKYbhozYbl0RZPRTBV)rg;iTOhb{Ko`W zte2nNyIck46H;a$3NNZMUGr>lQt&`&zsS<&<-*WX@;g68X|zSJW@iDrWOGi730289 zYGBZ@SS0C}zCi6F8F}9Qo<q~=I|+)izr%>^pR`K+>*c^C=7@nV>!g7a{yPauxo9g10I7CIZ>U zmkI6_(Y>EcQU;)Gv3|DxTO?;A=cg!>iQ}5v^iy-nQiGgwM4n6{o*PaB4-`A8qT_eQ(Udcc@@(`_7Sn|LQkRt0E9W^ zWlH-|Z%a>|lvW=??)gApbLf=(HI>h+%Oq-CXPjv3hlE2qqPCt!C9eG{{i-jvJ&#R% z_+;yPp7TL-z_)QxE|~n(Y3l;t-HLvc1D+$746A3mRxc%n_PBHRL@En55k@KHm@R6S z#|6F^BsgC52G%G0z9#j^-?$&r8kik@ucII2kdi6VcQ@C$h_7zf;12sHb3dW~07{P^ zbM%cw9dJzV?6vfeDoEB5bpj^GbtSKvKyo09fA;@H3iZAGc-~RSsYw!gC2vgi(mLjj zy8FH#byRaYiJC3qQ%Y63&Je~vNxOSgNt#x?)?fSL5jK{UciETR4e9F;qhAqkHvq?! zxBVNxWzqyu}#mPx8wVHy27fJcP|3W!yb08_q5F_N`OJeKzi@lqE^q6c9f zlT|Tk3)Bvndf!b-`Og&mPYQlc0pUi(@`Nq0M6q1DbZ(3p6UK@=WU<>i)ouod7LJvL zhen6T8QXS=5Y1F6pL{>>`7Q8KVkEhSRGO8lNz9n!mMQ?N1V@s{$jOS(WJW(kjT!gz&xk;Y#NvOmZ0ZrfuFq~tI4h@)ta^MieQ%Y( zH&dv1_R&8(_LXB|eOsbrU$W#8vE-2@Pautj@mtYY^AVX^frhMZK7-6 zvUz{X6`Xc`=qgKjHqADy0Bl|YxJfy^SD$|4X%hJyxOPBAk4eGhYxA|{I}Q^`1R&_h zcf8*HTKDHq;PE61_f$PLHZD}n)F!;UrcJ9h_e}75 z>9x{?ZS&k?V&&dt+g^!x{^sz)k?(eYyF1bRh}d*UJakSRyd-`mCZ2mqbWiHoX#;0x zOJe`^nrk)Jo31q_y<6hmEs!{+Jbo&$;aWqguoUggSh&K*kGcFp>$C}S8oZ2lQrIr7 zQ(xc~_WE+ZvkyH1F<1wbn$d5MzUS$Lhhe8@reSXA?a{A|u9WRaICoAP0PZ9^oM>Dyo`#box^ebFS7jef{FpqoSiCZmeYD97UM(?;n+NEoXVAR`nyV z=jdMR?<|HXAozZ^BC%&H4yr2>EG0*uSK$291=9EwUL_JjqTmMu24MIW+rfCKG8Q-K zC?Q}n7X03FLPrU|Sdc+8$wV>eD1|Z>idi`T_1h>08H@F{1F?ITCm~dcEl<7xDx+VX zV4~&p)4#&N>6a_Y#0`Yb4fF|_W{=)GWEN(qk}D=(qi$)VE`xAAU(<9%oz~;pq0V$5 zl}V=wo5jrQ%Fz2O%wpnosq}fNEGEiW|FkXF+sUel%1Yq6T~|X@dkWVbEqrxhhLjd^&D>R?SO)!sUl1&h!t!HBxbA#?+eEaP-|qHA-~S2^IA5)|Jqnfm(}f& zn6ozA_h?6?14@-F(Q|lP$Dzt7;8)sNVH!x&bV}NUP*-A757v=yP*4341+35hAKWKf zDuxe$T{X>TfFkhlNjG&)+b{}*pCAX;i{C>AlKN3#=ECgcN}w@eZvxVcC-l}P>_Qk# zuXnxHm9lzgcFfd55MIRe5-&hgdnM44ux}S_tqX^jAMUzQlX&=Xad!ksPV!2cT`Pgz z2|IjM?OCjOH+Ul?M*3H}&LrB;iU$dg>(gq&eJg?e3Ht%jc5tcS-REz-lsIx)?CeL9 zGis8ymB7A)eZOcsu=w1&=Wjfn=zK!#!1|v*%?DS5WwXz{^{N=CoqIN8%eOTA(qPJ^ z2&u|0Y5nyeijd5*%CaFr5l$nYSOr5mmBhD9a?8${I~fRY2?14xK=mW_eT5&LL|P>I z^Ia}Tqy+&ZE%;@m1-~S|g(G!}nTd>85R=G=Dz!jah4^P=WSWr?om*FlcPB+;d9Y%& z3#5@;gJ;a3NkUZ80>>#i6&&HxC7S0}K{S;i#2`6U83}47A5cCfos9=2Ci=RrlME>3 zATNtrN#=Hi0eX0GuefTvG)&K}L=b1u)9 zg0q3QHjD1^xx(e@9Sct-s`rUi`xgh_t-Y}&@kmr`?-To;Sg}9(^DSHdoz>{GyB_9+ zAA``vJWL%Gy2x;frl;CLqSBI{c(bZQpwMvPABf7b725xx7!mN;CfyS{1#^0aw!UMo zT1l(5F4}(VRL9A1>`*=Fz)MN%NQ+)qjxK|Gc}kG#*vw`IxndWG*3AuHpl`t@2sSs! zs_SfSAaW#GgZ(4w0^c*|M&OEXVEow;MS~sN*3!zO_!B;lu!5%<9r_tEgqZ|f4!Ykr zJ4sJH#}wi5U*B|X6D+3WZ}zbTF|fnlf~m*A<>Z_1JA;6V@Aw_&3_RZcAoD~h6{?t- zMCz&<_y)Js{?cI1HA_J8Q6if}?jB^8fH2$)_7fheU>Iq_5Wj~7{k@9EKTSiWPx><4 z5hfm|kj)VkVLI{@!7Sl|_9}~-vCW;auqdD{eHj~z+DVbs!JV#0BO%mEZpk85B(=Z=1>v+n%-(FlEH8U|3rpk39OCG4xw55@CfJzc# zcTV9vMDXbFbRd->YDQ?(BLQo>X;Csc&eu;DIA6#8MofeZzFu%0v9N3F#MAP%b^Iz_ zYc?u^MF@%uatY_7`Rjzd7TuPFs_=++^E^a!#aZvhnJX!LR61-JM8LaOTSCvIq1 z`{r|QXsA;nHZ-iNFcR`n@?Vf^<3UMt9%{VnzBbgLH~89x+ExKPH4y8?j=p)65Kzvv zU*)Yc)nf=+$86eGMBkgapUElEwL=-(p)2qEO5cK@cn0}8`(DxapD|b8pX$$h@XfLh z2ySK82ZG&#TsY>MDs?VAa%Gfm_!j+9Cwf63>WT#Qb-}ezi>m&JZ{^6P;GF$b)ZJ5= z-2x5u9jQ0z%h;~+CE!M|yU&z*tCi>b@}dsq%Rou@oroJd6kOkp-QNVmninaevwo#n zD%*)W#q18r9of;>qI?A?h^m_+qyQT!)z=0sE@R5Uj0$M`_?tN>|6*QXwZnFroa=pP zfmU-;j{WUw%gn&Tl7QNxZo|hm(MCNitz@;qJy{~#Sb-#|mN-pP(Tkbcf~58I8MZ!+ zg?c+a)f+y30{^Fu9Sh4?MB&<6ZJjEdM>B=LqlW{sER!KTV)+OOIWW1g_n$3tCS6Kd zJ}xB@{)C>-C0ds?BR3YU!xN_jjdD&_I$$QSkV-QJh2ku=14!24&j@<4SB9IJ)H$|A zeO+_|C@mx#S8>!O-iS09s6Da#FqpG_OnEj|TTh_r3c9)?Y7c*A7zGc@pq3EkH%lw< zIW$nnlK4Z0E;SFe?S<+i>BQA*>4l!7D@nxtESRR_B%^P7sI>p5B7MEC#?FivI}5eOjQg)*>_u-jyv-c7m` zVrWak)|j+y6K&fT_yqh51NY(p1l=iTaMqCwHO51YVz5bU>V(Ak>f{@fV#(G8Pr|ib zbha+EFFd`_zZ4*?y)Kw98C+HXz*wwztXzH&SU;qSRU@-tcTNzcyZxqAMM^D`s! zgVW~|&KAh-(TZK4n*?r*shm~0>KiBLgJ18TJ1(}I6noE#CkI6Przka?c*|0y}uwSe{ARc&9w4Yv=xi`q<&dHs};yaHeJ>9D0 zIa$*duW4H;YfCuyLH7Lc-f8Qqzi4*Hd;Xf~j+DRT%V(!M*-z5n5cfCCKb-LIz!jk0 z@4dEn_PIo1O|q~dUf7TEQykcc6+` zHFs!X5A8CjlI@!Mc+D=_si=sXD}M`db`s+3t(^Bbf2Rejn%V91B`e{bi)OL?vI)p*H#XdX~K_|d9|T-bq}rRsGeLe=Y$uCEJa_#UhSu+7d# z@|potk~?KHkCl)?B2RyVJQyk|YE2SL{7}2}s1`O2qi9^Xg`2Dq#0f|M1uCkDJ2VR? z3DqKYFyNXEuHdHIL}O5Ej`FsDPr|-jN}Tj1Vpb;R>w#kW{dF=I=!1}f5n&EEc`TL@ z5as}W7GSoGA8~(=^m}T6-IW>yL7jT24f-L=2z;hNCl6BPKHxJ?fX_5)E3&>Sh5Nxi z0lu7cU@Zgf#HbPak@7X>0rb4^hQ|H()%3j(wJ7(oA~EDzL1`M6;I6_PxJB9UJ#C)b1oK`?aAsoe z<)xBkGw=`nNi#N7OJ@6*%?;Al+Mbz4v8-jGZJ}9owZCV6?;g-NNIstEM&d*bK0@FBGWx#GOF*Aj?fZJ4 z0P3oP$yPhCS7yxR%`SW8G^D7~sl8lnm%92;>Mp@eVZHVMY0taZx>gSw#EQl?cd3gGZOCG@G0UeH_k^YK98~yURh34T)$>mzoA$+<_9dGR+-y3qG?_f| zWc7dT2T^Uhh>&9=7+;7!95rNEwX1uHeSBY?E9k^=Gx`fjRrz!{%02|45 zvkn=6O&Wdbi0LZ_?6y1>NkNx!IJutI8_Lt`Vn<(Cp*WxTtbwWV9%SQwgOdA+F0&(- z#0vgy3bt~ZluCmNXTIs+5+G{0tWpil1h=npef>1EcAx;|d`s&oQN0P9KyRX20>oJ` zNTlE@1*d7$o}u6@g5H>450fYlKoAJl`CJUnQy}~<;T;NcWk`_MZ2lE!*a`y((iYVT zN!s`vJTwSwwM^s$wPL(7V+UxKOWWkZPe7P#Lr9|yMncOH{tY>@zWxIwhq{%`|7P3N z;Z;ZI%`;P-t5$!?>AO1e#z@K?xbD2>Ou775;dU$KFHZXF;{LijmV8^m)FIONvA;g~ z+T_gh%ce3?g{qvJST;Al?{X6w9^H5W=!~o8qS@`s=4##LE6b)5E&XbsEE%Y~8K{c~ zc7C&U!T8sE5P>7&((-BBGPHT5=P6qT=hz76y#0UKKYv{G?@d_20!y|!Y4ZRqJuhA! z7}!tq<1fC2G5L!-6p-KMU)-hOR|x6@0(-sPhzkelZNWPoeC`UD0pSV=+OdhW0}!s~ zAO)EiV8VurA@4cUBHmNINnZ`rAXH$c5D2~;2>PNlI6+Ja4%q9(&o!oE_D))H;WyE zC++dHh2_xu;@B{GwH_Tg2@_phm0k1=1F3fC3cT#Ws<|6Vg2FKDMh1u5;g`+^Kwc;Z zUKs;QFZm6J_Fyl0#U4-FM}>>$FJ3-3LS-@NHW>XLxK4A*?h8FQi?poWoj;+?PjBFo$*>FO1=JB2SN8 zmC+H{0-1v@C%PXXgCm`H4hPM@YgpkR`MX$t<30!Ec8DXS$qeTM=jo+BZe;HDr% zK`{lJDX5~LnS$LE^ignzf>$W`A_ad$0aF zKHwT5#>qFm{@81ey?*?)<6>cR!nhSa#PXp&gdkz1ho zcEr4g3=tcrTq%D*m&eE-0Cg;Z{V{snU%(iAR z7Tb^~igCH8Y$<;TS*JI(h+N>Kyr4f%Txf;&5T-ZEQq2R`sDcg;Kb!&p;MxF+x>-m~Vj@Q$?*&`8(Z`FuCO=C<%V z*FqNFzZT;8R;tZW)X4$uowe{DdMw7X#9MgRT0dXQAL7aJAMacX*^x&X3S2{hZ6BFU zyz?Vu-ntfI#d!J6cN<1{9zK^TTsw2x=cdcDO#`~$!?lm7P0@$2uPQW9s z6#;2&?NRv9<$Y_14KN#BYb3L+wPSfuZ(OT^a?)BW?JTeL=0WnbW{0?At%8<^Yeirz iGcBCaJ{?;&Y; +const DEFAULT_EMBED_RETRIEVAL_MODEL_ID = "onnx-community/harrier-oss-v1-270m-ONNX"; +const DEFAULT_HYBRID_VECTOR_WEIGHT = 0.7; +const DEFAULT_HYBRID_TEXT_WEIGHT = 0.3; + +let summaryRetrievalEmbedder: HarrierEmbedder | null = null; + +function envString(...names: string[]): string | undefined { + for (const name of names) { + const value = process.env[name]?.trim(); + if (value) return value; + } + return undefined; +} + +function envFlag(...names: string[]): boolean { + const raw = envString(...names) ?? ""; + return /^(1|true|yes|on)$/i.test(raw); +} + +function envNumber(fallback: number, ...names: string[]): number { + const raw = envString(...names); + const parsed = Number(raw); + return Number.isFinite(parsed) ? parsed : fallback; +} + +function getSummaryRetrievalEmbedder(): HarrierEmbedder { + if (!summaryRetrievalEmbedder) { + summaryRetrievalEmbedder = new HarrierEmbedder({ + modelId: envString( + "HIVEMIND_EMBED_RETRIEVAL_MODEL_ID", + "DEEPLAKE_EMBED_RETRIEVAL_MODEL_ID", + "HIVEMIND_HARRIER_MODEL_ID", + "DEEPLAKE_HARRIER_MODEL_ID", + ) ?? DEFAULT_EMBED_RETRIEVAL_MODEL_ID, + device: envString("HIVEMIND_EMBED_RETRIEVAL_DEVICE", "DEEPLAKE_EMBED_RETRIEVAL_DEVICE") ?? "cpu", + dtype: envString("HIVEMIND_EMBED_RETRIEVAL_DTYPE", "DEEPLAKE_EMBED_RETRIEVAL_DTYPE"), + cacheDir: envString("HIVEMIND_EMBED_RETRIEVAL_CACHE_DIR", "DEEPLAKE_EMBED_RETRIEVAL_CACHE_DIR"), + localModelPath: envString("HIVEMIND_EMBED_RETRIEVAL_LOCAL_MODEL_PATH", "DEEPLAKE_EMBED_RETRIEVAL_LOCAL_MODEL_PATH"), + localFilesOnly: envFlag("HIVEMIND_EMBED_RETRIEVAL_LOCAL_FILES_ONLY", "DEEPLAKE_EMBED_RETRIEVAL_LOCAL_FILES_ONLY"), + }); + } + return summaryRetrievalEmbedder; +} + +function sqlFloat4Array(values: number[]): string { + if (values.length === 0) throw new Error("Query embedding is empty"); + return `ARRAY[${values.map((value) => { + if (!Number.isFinite(value)) throw new Error("Query embedding contains non-finite values"); + return Math.fround(value).toString(); + }).join(", ")}]::float4[]`; +} + export type CompiledSegment = | { kind: "echo"; text: string } | { kind: "cat"; paths: string[]; lineLimit: number; fromEnd: boolean; countLines: boolean; ignoreMissing: boolean } @@ -270,22 +323,73 @@ function normalizeSqlRef(ref: string): string { return ref.replace(/\s+/g, "").replace(/"/g, "").toLowerCase(); } -const INTERCEPTED_SQL_REFS = new Set([ - "memory", - "sessions", - "graph_nodes", - "graph_edges", - "memory_facts", - "memory_entities", - "fact_entity_links", - "hivemind.memory", - "hivemind.sessions", - "hivemind.graph_nodes", - "hivemind.graph_edges", - "hivemind.memory_facts", - "hivemind.memory_entities", - "hivemind.fact_entity_links", -]); +function deriveSiblingTableName(tableName: string, expectedBase: string, targetBase: string): string | null { + if (tableName === expectedBase) return null; + if (!tableName.startsWith(expectedBase)) return null; + return `${targetBase}${tableName.slice(expectedBase.length)}`; +} + +function resolveInterceptedTableNames( + memoryTable: string, + sessionsTable: string, +): { + graphNodesTable: string; + graphEdgesTable: string; + factsTable: string; + entitiesTable: string; + factEntityLinksTable: string; +} { + const memoryDerived = { + graphNodesTable: deriveSiblingTableName(memoryTable, "memory", "graph_nodes"), + graphEdgesTable: deriveSiblingTableName(memoryTable, "memory", "graph_edges"), + factsTable: deriveSiblingTableName(memoryTable, "memory", "memory_facts"), + entitiesTable: deriveSiblingTableName(memoryTable, "memory", "memory_entities"), + factEntityLinksTable: deriveSiblingTableName(memoryTable, "memory", "fact_entity_links"), + }; + const sessionsDerived = { + factsTable: deriveSiblingTableName(sessionsTable, "sessions", "memory_facts"), + entitiesTable: deriveSiblingTableName(sessionsTable, "sessions", "memory_entities"), + factEntityLinksTable: deriveSiblingTableName(sessionsTable, "sessions", "fact_entity_links"), + }; + return { + graphNodesTable: process.env["HIVEMIND_GRAPH_NODES_TABLE"] ?? process.env["DEEPLAKE_GRAPH_NODES_TABLE"] ?? memoryDerived.graphNodesTable ?? "graph_nodes", + graphEdgesTable: process.env["HIVEMIND_GRAPH_EDGES_TABLE"] ?? process.env["DEEPLAKE_GRAPH_EDGES_TABLE"] ?? memoryDerived.graphEdgesTable ?? "graph_edges", + factsTable: process.env["HIVEMIND_FACTS_TABLE"] ?? process.env["DEEPLAKE_FACTS_TABLE"] ?? memoryDerived.factsTable ?? sessionsDerived.factsTable ?? "memory_facts", + entitiesTable: process.env["HIVEMIND_ENTITIES_TABLE"] ?? process.env["DEEPLAKE_ENTITIES_TABLE"] ?? memoryDerived.entitiesTable ?? sessionsDerived.entitiesTable ?? "memory_entities", + factEntityLinksTable: process.env["HIVEMIND_FACT_ENTITY_LINKS_TABLE"] ?? process.env["DEEPLAKE_FACT_ENTITY_LINKS_TABLE"] ?? memoryDerived.factEntityLinksTable ?? sessionsDerived.factEntityLinksTable ?? "fact_entity_links", + }; +} + +function getInterceptedSqlRefs(): Set { + if (isFactsSessionsOnlyPsqlMode()) { + return new Set([ + "sessions", + "memory_facts", + "memory_entities", + "fact_entity_links", + "hivemind.sessions", + "hivemind.memory_facts", + "hivemind.memory_entities", + "hivemind.fact_entity_links", + ]); + } + return new Set([ + "memory", + "sessions", + "graph_nodes", + "graph_edges", + "memory_facts", + "memory_entities", + "fact_entity_links", + "hivemind.memory", + "hivemind.sessions", + "hivemind.graph_nodes", + "hivemind.graph_edges", + "hivemind.memory_facts", + "hivemind.memory_entities", + "hivemind.fact_entity_links", + ]); +} function extractSqlTableRefs(query: string): string[] { const refs: string[] = []; @@ -297,23 +401,21 @@ function extractSqlTableRefs(query: string): string[] { } export function queryReferencesInterceptedTables(query: string): boolean { - return extractSqlTableRefs(query).some((ref) => INTERCEPTED_SQL_REFS.has(ref)); + const interceptedRefs = getInterceptedSqlRefs(); + return extractSqlTableRefs(query).some((ref) => interceptedRefs.has(ref)); } export function queryUsesOnlyInterceptedTables(query: string): boolean { const refs = extractSqlTableRefs(query); - return refs.length > 0 && refs.every((ref) => INTERCEPTED_SQL_REFS.has(ref)); + const interceptedRefs = getInterceptedSqlRefs(); + return refs.length > 0 && refs.every((ref) => interceptedRefs.has(ref)); } export function queryUsesBareMemoryTables(query: string): boolean { - return extractSqlTableRefs(query).some((ref) => - ref === "memory" || - ref === "sessions" || - ref === "graph_nodes" || - ref === "graph_edges" || - ref === "memory_facts" || - ref === "memory_entities" || - ref === "fact_entity_links"); + const bareRefs = isFactsSessionsOnlyPsqlMode() + ? new Set(["sessions", "memory_facts", "memory_entities", "fact_entity_links"]) + : new Set(["memory", "sessions", "graph_nodes", "graph_edges", "memory_facts", "memory_entities", "fact_entity_links"]); + return extractSqlTableRefs(query).some((ref) => bareRefs.has(ref)); } function parsePsqlSegment(pipeline: string[], tokens: string[]): CompiledSegment | null { @@ -363,42 +465,45 @@ function normalizePsqlQuery( query: string, memoryTable: string, sessionsTable: string, - graphNodesTable = process.env["HIVEMIND_GRAPH_NODES_TABLE"] ?? "graph_nodes", - graphEdgesTable = process.env["HIVEMIND_GRAPH_EDGES_TABLE"] ?? "graph_edges", - factsTable = process.env["HIVEMIND_FACTS_TABLE"] ?? "memory_facts", - entitiesTable = process.env["HIVEMIND_ENTITIES_TABLE"] ?? "memory_entities", - factEntityLinksTable = process.env["HIVEMIND_FACT_ENTITY_LINKS_TABLE"] ?? "fact_entity_links", + graphNodesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphNodesTable, + graphEdgesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphEdgesTable, + factsTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factsTable, + entitiesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).entitiesTable, + factEntityLinksTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factEntityLinksTable, ): string { let sql = query.trim().replace(/;+\s*$/, ""); sql = sql - .replace(/\bFROM\s+"?memory"?\b/gi, `FROM "${memoryTable}"`) - .replace(/\bJOIN\s+"?memory"?\b/gi, `JOIN "${memoryTable}"`) .replace(/\bFROM\s+"?sessions"?\b/gi, `FROM "${sessionsTable}"`) .replace(/\bJOIN\s+"?sessions"?\b/gi, `JOIN "${sessionsTable}"`) - .replace(/\bFROM\s+"?graph_nodes"?\b/gi, `FROM "${graphNodesTable}"`) - .replace(/\bJOIN\s+"?graph_nodes"?\b/gi, `JOIN "${graphNodesTable}"`) - .replace(/\bFROM\s+"?graph_edges"?\b/gi, `FROM "${graphEdgesTable}"`) - .replace(/\bJOIN\s+"?graph_edges"?\b/gi, `JOIN "${graphEdgesTable}"`) .replace(/\bFROM\s+"?memory_facts"?\b/gi, `FROM "${factsTable}"`) .replace(/\bJOIN\s+"?memory_facts"?\b/gi, `JOIN "${factsTable}"`) .replace(/\bFROM\s+"?memory_entities"?\b/gi, `FROM "${entitiesTable}"`) .replace(/\bJOIN\s+"?memory_entities"?\b/gi, `JOIN "${entitiesTable}"`) .replace(/\bFROM\s+"?fact_entity_links"?\b/gi, `FROM "${factEntityLinksTable}"`) .replace(/\bJOIN\s+"?fact_entity_links"?\b/gi, `JOIN "${factEntityLinksTable}"`) - .replace(/\bFROM\s+"?hivemind"?\."?memory"?\b/gi, `FROM "${memoryTable}"`) - .replace(/\bJOIN\s+"?hivemind"?\."?memory"?\b/gi, `JOIN "${memoryTable}"`) .replace(/\bFROM\s+"?hivemind"?\."?sessions"?\b/gi, `FROM "${sessionsTable}"`) .replace(/\bJOIN\s+"?hivemind"?\."?sessions"?\b/gi, `JOIN "${sessionsTable}"`) - .replace(/\bFROM\s+"?hivemind"?\."?graph_nodes"?\b/gi, `FROM "${graphNodesTable}"`) - .replace(/\bJOIN\s+"?hivemind"?\."?graph_nodes"?\b/gi, `JOIN "${graphNodesTable}"`) - .replace(/\bFROM\s+"?hivemind"?\."?graph_edges"?\b/gi, `FROM "${graphEdgesTable}"`) - .replace(/\bJOIN\s+"?hivemind"?\."?graph_edges"?\b/gi, `JOIN "${graphEdgesTable}"`) .replace(/\bFROM\s+"?hivemind"?\."?memory_facts"?\b/gi, `FROM "${factsTable}"`) .replace(/\bJOIN\s+"?hivemind"?\."?memory_facts"?\b/gi, `JOIN "${factsTable}"`) .replace(/\bFROM\s+"?hivemind"?\."?memory_entities"?\b/gi, `FROM "${entitiesTable}"`) .replace(/\bJOIN\s+"?hivemind"?\."?memory_entities"?\b/gi, `JOIN "${entitiesTable}"`) .replace(/\bFROM\s+"?hivemind"?\."?fact_entity_links"?\b/gi, `FROM "${factEntityLinksTable}"`) .replace(/\bJOIN\s+"?hivemind"?\."?fact_entity_links"?\b/gi, `JOIN "${factEntityLinksTable}"`); + if (!isFactsSessionsOnlyPsqlMode()) { + sql = sql + .replace(/\bFROM\s+"?memory"?\b/gi, `FROM "${memoryTable}"`) + .replace(/\bJOIN\s+"?memory"?\b/gi, `JOIN "${memoryTable}"`) + .replace(/\bFROM\s+"?graph_nodes"?\b/gi, `FROM "${graphNodesTable}"`) + .replace(/\bJOIN\s+"?graph_nodes"?\b/gi, `JOIN "${graphNodesTable}"`) + .replace(/\bFROM\s+"?graph_edges"?\b/gi, `FROM "${graphEdgesTable}"`) + .replace(/\bJOIN\s+"?graph_edges"?\b/gi, `JOIN "${graphEdgesTable}"`) + .replace(/\bFROM\s+"?hivemind"?\."?memory"?\b/gi, `FROM "${memoryTable}"`) + .replace(/\bJOIN\s+"?hivemind"?\."?memory"?\b/gi, `JOIN "${memoryTable}"`) + .replace(/\bFROM\s+"?hivemind"?\."?graph_nodes"?\b/gi, `FROM "${graphNodesTable}"`) + .replace(/\bJOIN\s+"?hivemind"?\."?graph_nodes"?\b/gi, `JOIN "${graphNodesTable}"`) + .replace(/\bFROM\s+"?hivemind"?\."?graph_edges"?\b/gi, `FROM "${graphEdgesTable}"`) + .replace(/\bJOIN\s+"?hivemind"?\."?graph_edges"?\b/gi, `JOIN "${graphEdgesTable}"`); + } return sql; } @@ -406,13 +511,16 @@ function validatePsqlQuery( query: string, memoryTable: string, sessionsTable: string, - graphNodesTable = process.env["HIVEMIND_GRAPH_NODES_TABLE"] ?? "graph_nodes", - graphEdgesTable = process.env["HIVEMIND_GRAPH_EDGES_TABLE"] ?? "graph_edges", - factsTable = process.env["HIVEMIND_FACTS_TABLE"] ?? "memory_facts", - entitiesTable = process.env["HIVEMIND_ENTITIES_TABLE"] ?? "memory_entities", - factEntityLinksTable = process.env["HIVEMIND_FACT_ENTITY_LINKS_TABLE"] ?? "fact_entity_links", + graphNodesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphNodesTable, + graphEdgesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphEdgesTable, + factsTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factsTable, + entitiesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).entitiesTable, + factEntityLinksTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factEntityLinksTable, ): string { if (!queryUsesOnlyInterceptedTables(query)) { + if (isFactsSessionsOnlyPsqlMode()) { + throw new Error("psql queries must reference only sessions, memory_facts, memory_entities, fact_entity_links, or their hivemind.* aliases"); + } throw new Error("psql queries must reference only memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, fact_entity_links, or their hivemind.* aliases"); } const sql = normalizePsqlQuery( @@ -430,14 +538,16 @@ function validatePsqlQuery( throw new Error("psql mode only supports SELECT queries"); } const allowedTables = new Set([ - memoryTable, sessionsTable, - graphNodesTable, - graphEdgesTable, factsTable, entitiesTable, factEntityLinksTable, ]); + if (!isFactsSessionsOnlyPsqlMode()) { + allowedTables.add(memoryTable); + allowedTables.add(graphNodesTable); + allowedTables.add(graphEdgesTable); + } const tableMatches = [...compact.matchAll(/\b(?:from|join)\s+"?([a-zA-Z_][a-zA-Z0-9_]*)"?/gi)]; if (tableMatches.length === 0) { throw new Error("psql query must reference an intercepted hivemind memory table"); @@ -495,6 +605,13 @@ interface GraphCandidateRow extends VirtualRow { search_text?: string; } +interface HybridCandidate { + sessionId: string; + sourcePath: string; + score: number; + signals: Set; +} + function escapeRegex(value: string): string { return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); } @@ -587,6 +704,183 @@ async function fetchGraphCandidates( return expanded; } +function splitDelimitedField(value: unknown): string[] { + if (typeof value !== "string") return []; + return value + .split(",") + .map((item) => item.trim()) + .filter(Boolean); +} + +function extractSessionIdFromPath(value: string): string { + return value.match(/(conv_\d+_session_\d+)/)?.[1] ?? ""; +} + +function extractSummarySourcePath(summary: string): string { + return summary.match(/^- \*\*Source\*\*: (.+)$/m)?.[1]?.trim() ?? ""; +} + +function addHybridCandidate( + map: Map, + candidate: { sessionId?: string; sourcePath?: string; score: number; signal: string }, +): void { + const sessionId = candidate.sessionId?.trim() ?? ""; + const sourcePath = candidate.sourcePath?.trim() ?? ""; + if (!sessionId && !sourcePath) return; + const key = `${sessionId}@@${sourcePath}`; + const existing = map.get(key); + if (existing) { + existing.score += candidate.score; + existing.signals.add(candidate.signal); + return; + } + map.set(key, { + sessionId, + sourcePath, + score: candidate.score, + signals: new Set([candidate.signal]), + }); +} + +async function fetchEntityResolution( + api: DeeplakeApi, + entitiesTable: string, + terms: string[], +): Promise<{ entityIds: string[]; candidates: HybridCandidate[] }> { + const filteredTerms = [...new Set(terms.map((term) => term.trim()).filter(Boolean))].slice(0, 4); + if (filteredTerms.length === 0) return { entityIds: [], candidates: [] }; + const entityTerms = chooseEntityTerms(filteredTerms); + if (entityTerms.length === 0) return { entityIds: [], candidates: [] }; + + const phrase = sqlStr(filteredTerms.join(" ")); + const where = entityTerms + .map((term) => `(canonical_name ILIKE '%${sqlLike(term)}%' OR aliases ILIKE '%${sqlLike(term)}%')`) + .join(" OR "); + const sql = + `SELECT entity_id, source_session_ids, source_paths, search_text, search_text <#> '${phrase}' AS score` + + ` FROM "${entitiesTable}"` + + ` WHERE ${where}` + + ` ORDER BY score ASC` + + ` LIMIT 8`; + + const rows = await api.query(sql); + const entityIds: string[] = []; + const candidateMap = new Map(); + for (const row of rows) { + const entityId = typeof row["entity_id"] === "string" ? row["entity_id"] : ""; + if (entityId && !entityIds.includes(entityId)) entityIds.push(entityId); + const sessionIds = splitDelimitedField(row["source_session_ids"]); + const sourcePaths = splitDelimitedField(row["source_paths"]); + const maxLen = Math.max(sessionIds.length, sourcePaths.length); + for (let i = 0; i < maxLen; i++) { + const sourcePath = sourcePaths[i] || (sessionIds[i] ? `/sessions/${sessionIds[i]}.json` : ""); + const sessionId = sessionIds[i] || extractSessionIdFromPath(sourcePath); + addHybridCandidate(candidateMap, { + sessionId, + sourcePath, + score: 1.2, + signal: "entity", + }); + } + } + return { entityIds, candidates: [...candidateMap.values()] }; +} + +async function fetchFactCandidates( + api: DeeplakeApi, + factsTable: string, + terms: string[], + entityIds: string[], +): Promise<{ entityIds: string[]; candidates: HybridCandidate[] }> { + const filteredTerms = [...new Set(terms.map((term) => term.trim()).filter(Boolean))].slice(0, 4); + if (filteredTerms.length === 0 && entityIds.length === 0) return { entityIds: [], candidates: [] }; + const phrase = sqlStr(filteredTerms.join(" ")); + const entityTerms = chooseEntityTerms(filteredTerms); + const topicTerms = filteredTerms.filter((term) => !entityTerms.includes(term)); + const topicClauses = (topicTerms.length > 0 ? topicTerms : filteredTerms) + .map((term) => `(predicate ILIKE '%${sqlLike(term)}%' OR object_name ILIKE '%${sqlLike(term)}%' OR summary ILIKE '%${sqlLike(term)}%' OR search_text ILIKE '%${sqlLike(term)}%')`); + const entityFilter = entityIds.length > 0 + ? `(subject_entity_id IN (${entityIds.map((id) => `'${sqlStr(id)}'`).join(", ")}) OR object_entity_id IN (${entityIds.map((id) => `'${sqlStr(id)}'`).join(", ")}))` + : ""; + const whereParts = [ + entityFilter, + topicClauses.length > 0 ? `(${topicClauses.join(" OR ")})` : "", + ].filter(Boolean); + if (whereParts.length === 0) return { entityIds: [], candidates: [] }; + + const sql = + `SELECT source_session_id, source_path, subject_entity_id, object_entity_id, search_text <#> '${phrase}' AS score` + + ` FROM "${factsTable}"` + + ` WHERE ${whereParts.join(" AND ")}` + + ` ORDER BY score ASC` + + ` LIMIT 16`; + + const rows = await api.query(sql); + const relatedEntityIds: string[] = []; + const candidateMap = new Map(); + for (const row of rows) { + for (const key of ["subject_entity_id", "object_entity_id"] as const) { + const value = typeof row[key] === "string" ? row[key] : ""; + if (value && !relatedEntityIds.includes(value)) relatedEntityIds.push(value); + } + const sourcePath = typeof row["source_path"] === "string" ? row["source_path"] : ""; + const sessionId = typeof row["source_session_id"] === "string" ? row["source_session_id"] : extractSessionIdFromPath(sourcePath); + addHybridCandidate(candidateMap, { + sessionId, + sourcePath, + score: 2.6, + signal: "fact", + }); + } + return { entityIds: relatedEntityIds, candidates: [...candidateMap.values()] }; +} + +async function fetchSummaryCandidates( + api: DeeplakeApi, + memoryTable: string, + terms: string[], +): Promise { + const filteredTerms = [...new Set(terms.map((term) => term.trim()).filter(Boolean))].slice(0, 4); + if (filteredTerms.length === 0) return []; + const retrievalMode = getGrepRetrievalMode(); + const phrase = filteredTerms.join(" "); + const clauses = filteredTerms.map((term) => `summary ILIKE '%${sqlLike(term)}%'`); + let sql: string; + + if (retrievalMode === "embedding" || retrievalMode === "hybrid") { + const embedder = getSummaryRetrievalEmbedder(); + const [queryEmbedding] = await embedder.embedQueries([phrase]); + if (!queryEmbedding) return []; + const queryVectorSql = sqlFloat4Array(queryEmbedding); + sql = retrievalMode === "hybrid" + ? `SELECT path, summary, ((embedding, summary)::deeplake_hybrid_record <#> deeplake_hybrid_record(${queryVectorSql}, '${sqlStr(phrase)}', ${envNumber(DEFAULT_HYBRID_VECTOR_WEIGHT, "HIVEMIND_HYBRID_VECTOR_WEIGHT", "DEEPLAKE_HYBRID_VECTOR_WEIGHT")}, ${envNumber(DEFAULT_HYBRID_TEXT_WEIGHT, "HIVEMIND_HYBRID_TEXT_WEIGHT", "DEEPLAKE_HYBRID_TEXT_WEIGHT")})) AS score FROM "${memoryTable}" WHERE embedding IS NOT NULL ORDER BY score DESC LIMIT 8` + : `SELECT path, summary, (embedding <#> ${queryVectorSql}) AS score FROM "${memoryTable}" WHERE embedding IS NOT NULL ORDER BY score DESC LIMIT 8`; + } else { + const phraseSql = sqlStr(phrase); + sql = + `SELECT path, summary, summary <#> '${phraseSql}' AS score` + + ` FROM "${memoryTable}"` + + ` WHERE ${clauses.join(" OR ")}` + + ` ORDER BY score ASC` + + ` LIMIT 8`; + } + const rows = await api.query(sql); + const candidateMap = new Map(); + for (const row of rows) { + const path = typeof row["path"] === "string" ? row["path"] : ""; + const summary = typeof row["summary"] === "string" ? row["summary"] : ""; + const sourcePath = extractSummarySourcePath(summary) || (extractSessionIdFromPath(path) ? `/sessions/${extractSessionIdFromPath(path)}.json` : ""); + const sessionId = extractSessionIdFromPath(path) || extractSessionIdFromPath(sourcePath); + addHybridCandidate(candidateMap, { + sessionId, + sourcePath, + score: 1.6, + signal: "summary", + }); + } + return [...candidateMap.values()]; +} + function prependCtes(sql: string, ctes: string[]): string { if (ctes.length === 0) return sql; if (/^with\b/i.test(sql)) { @@ -597,23 +891,49 @@ function prependCtes(sql: string, ctes: string[]): string { function rewriteQueryWithRestrictedTables( sql: string, - memoryTable: string, - sessionsTable: string, - restrictedMemoryAlias: string | null, - restrictedSessionsAlias: string | null, + aliases: { + memoryTable: string; + sessionsTable: string; + factsTable: string; + entitiesTable: string; + factEntityLinksTable: string; + restrictedMemoryAlias: string | null; + restrictedSessionsAlias: string | null; + restrictedFactsAlias: string | null; + restrictedEntitiesAlias: string | null; + restrictedLinksAlias: string | null; + }, ): string { let rewritten = sql; - if (restrictedMemoryAlias) { - const memoryPattern = escapeRegex(memoryTable); + if (aliases.restrictedMemoryAlias) { + const memoryPattern = escapeRegex(aliases.memoryTable); + rewritten = rewritten + .replace(new RegExp(`\\bFROM\\s+"?${memoryPattern}"?`, "gi"), `FROM "${aliases.restrictedMemoryAlias}"`) + .replace(new RegExp(`\\bJOIN\\s+"?${memoryPattern}"?`, "gi"), `JOIN "${aliases.restrictedMemoryAlias}"`); + } + if (aliases.restrictedSessionsAlias) { + const sessionsPattern = escapeRegex(aliases.sessionsTable); + rewritten = rewritten + .replace(new RegExp(`\\bFROM\\s+"?${sessionsPattern}"?`, "gi"), `FROM "${aliases.restrictedSessionsAlias}"`) + .replace(new RegExp(`\\bJOIN\\s+"?${sessionsPattern}"?`, "gi"), `JOIN "${aliases.restrictedSessionsAlias}"`); + } + if (aliases.restrictedFactsAlias) { + const factsPattern = escapeRegex(aliases.factsTable); + rewritten = rewritten + .replace(new RegExp(`\\bFROM\\s+"?${factsPattern}"?`, "gi"), `FROM "${aliases.restrictedFactsAlias}"`) + .replace(new RegExp(`\\bJOIN\\s+"?${factsPattern}"?`, "gi"), `JOIN "${aliases.restrictedFactsAlias}"`); + } + if (aliases.restrictedEntitiesAlias) { + const entitiesPattern = escapeRegex(aliases.entitiesTable); rewritten = rewritten - .replace(new RegExp(`\\bFROM\\s+"?${memoryPattern}"?`, "gi"), `FROM "${restrictedMemoryAlias}"`) - .replace(new RegExp(`\\bJOIN\\s+"?${memoryPattern}"?`, "gi"), `JOIN "${restrictedMemoryAlias}"`); + .replace(new RegExp(`\\bFROM\\s+"?${entitiesPattern}"?`, "gi"), `FROM "${aliases.restrictedEntitiesAlias}"`) + .replace(new RegExp(`\\bJOIN\\s+"?${entitiesPattern}"?`, "gi"), `JOIN "${aliases.restrictedEntitiesAlias}"`); } - if (restrictedSessionsAlias) { - const sessionsPattern = escapeRegex(sessionsTable); + if (aliases.restrictedLinksAlias) { + const linksPattern = escapeRegex(aliases.factEntityLinksTable); rewritten = rewritten - .replace(new RegExp(`\\bFROM\\s+"?${sessionsPattern}"?`, "gi"), `FROM "${restrictedSessionsAlias}"`) - .replace(new RegExp(`\\bJOIN\\s+"?${sessionsPattern}"?`, "gi"), `JOIN "${restrictedSessionsAlias}"`); + .replace(new RegExp(`\\bFROM\\s+"?${linksPattern}"?`, "gi"), `FROM "${aliases.restrictedLinksAlias}"`) + .replace(new RegExp(`\\bJOIN\\s+"?${linksPattern}"?`, "gi"), `JOIN "${aliases.restrictedLinksAlias}"`); } return rewritten; } @@ -623,22 +943,53 @@ async function applyGraphRestrictionsToPsqlQuery( sql: string, memoryTable: string, sessionsTable: string, - graphNodesTable: string, - graphEdgesTable: string, + graphNodesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphNodesTable, + graphEdgesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).graphEdgesTable, + factsTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factsTable, + entitiesTable = resolveInterceptedTableNames(memoryTable, sessionsTable).entitiesTable, + factEntityLinksTable = resolveInterceptedTableNames(memoryTable, sessionsTable).factEntityLinksTable, ): Promise { + if (isFactsSessionsOnlyPsqlMode()) { + return sql; + } if (extractSqlTableRefs(sql).some((ref) => ref === normalizeSqlRef(graphNodesTable) || ref === normalizeSqlRef(graphEdgesTable))) { return sql; } const refs = extractSqlTableRefs(sql); const touchesMemory = refs.some((ref) => ref === normalizeSqlRef(memoryTable)); const touchesSessions = refs.some((ref) => ref === normalizeSqlRef(sessionsTable)); - if (!touchesMemory && !touchesSessions) return sql; + const touchesFacts = refs.some((ref) => ref === normalizeSqlRef(factsTable)); + const touchesEntities = refs.some((ref) => ref === normalizeSqlRef(entitiesTable)); + const touchesLinks = refs.some((ref) => ref === normalizeSqlRef(factEntityLinksTable)); + if (!touchesMemory && !touchesSessions && !touchesFacts && !touchesEntities && !touchesLinks) return sql; const terms = extractSqlSearchTerms(sql); if (terms.length === 0) return sql; - const candidates = await fetchGraphCandidates(api, graphNodesTable, graphEdgesTable, terms); - if (candidates.length === 0 || candidates.length > 16) return sql; + const candidateMap = new Map(); + const graphCandidates = await fetchGraphCandidates(api, graphNodesTable, graphEdgesTable, terms); + for (const candidate of graphCandidates) { + addHybridCandidate(candidateMap, { ...candidate, score: 2.0, signal: "graph" }); + } + const entityResolution = await fetchEntityResolution(api, entitiesTable, terms); + for (const candidate of entityResolution.candidates) { + addHybridCandidate(candidateMap, { ...candidate, signal: "entity" }); + } + const factCandidates = await fetchFactCandidates(api, factsTable, terms, entityResolution.entityIds); + for (const candidate of factCandidates.candidates) { + addHybridCandidate(candidateMap, { ...candidate, signal: "fact" }); + } + const summaryCandidates = await fetchSummaryCandidates(api, memoryTable, terms); + for (const candidate of summaryCandidates) { + addHybridCandidate(candidateMap, { ...candidate, signal: "summary" }); + } + + const candidateEntityIds = [...new Set([...entityResolution.entityIds, ...factCandidates.entityIds])].slice(0, 12); + const candidates = [...candidateMap.values()] + .sort((a, b) => b.score - a.score || b.signals.size - a.signals.size) + .slice(0, 12); + if (candidates.length === 0) return sql; + if (candidates.length > 16) return sql; const values = candidates.map((candidate) => `('${sqlStr(candidate.sessionId)}', '${sqlStr(candidate.sourcePath)}')` @@ -648,6 +999,15 @@ async function applyGraphRestrictionsToPsqlQuery( ]; let restrictedMemoryAlias: string | null = null; let restrictedSessionsAlias: string | null = null; + let restrictedFactsAlias: string | null = null; + let restrictedEntitiesAlias: string | null = null; + let restrictedLinksAlias: string | null = null; + + if (candidateEntityIds.length > 0) { + ctes.push( + `__hm_entity_candidates(entity_id) AS (VALUES ${candidateEntityIds.map((entityId) => `('${sqlStr(entityId)}')`).join(", ")})`, + ); + } if (touchesMemory) { restrictedMemoryAlias = "__hm_memory"; @@ -671,9 +1031,63 @@ async function applyGraphRestrictionsToPsqlQuery( `)` ); } + if (touchesFacts) { + restrictedFactsAlias = "__hm_memory_facts"; + ctes.push( + `"${restrictedFactsAlias}" AS (` + + ` SELECT * FROM "${factsTable}" f` + + ` WHERE (` + + ` f.source_path IN (SELECT source_path FROM __hm_graph_candidates WHERE source_path <> '')` + + ` OR f.source_session_id IN (SELECT source_session_id FROM __hm_graph_candidates WHERE source_session_id <> '')` + + (candidateEntityIds.length > 0 + ? ` OR f.subject_entity_id IN (SELECT entity_id FROM __hm_entity_candidates)` + + ` OR f.object_entity_id IN (SELECT entity_id FROM __hm_entity_candidates)` + : "") + + ` )` + + `)` + ); + } + if (touchesEntities && candidateEntityIds.length > 0) { + restrictedEntitiesAlias = "__hm_memory_entities"; + ctes.push( + `"${restrictedEntitiesAlias}" AS (` + + ` SELECT * FROM "${entitiesTable}" e` + + ` WHERE e.entity_id IN (SELECT entity_id FROM __hm_entity_candidates)` + + `)` + ); + } + if (touchesLinks) { + restrictedLinksAlias = "__hm_fact_entity_links"; + ctes.push( + `"${restrictedLinksAlias}" AS (` + + ` SELECT * FROM "${factEntityLinksTable}" l` + + ` WHERE (` + + ` l.source_path IN (SELECT source_path FROM __hm_graph_candidates WHERE source_path <> '')` + + ` OR l.source_session_id IN (SELECT source_session_id FROM __hm_graph_candidates WHERE source_session_id <> '')` + + (candidateEntityIds.length > 0 + ? ` OR l.entity_id IN (SELECT entity_id FROM __hm_entity_candidates)` + : "") + + (touchesFacts + ? ` OR l.fact_id IN (SELECT fact_id FROM "__hm_memory_facts")` + : "") + + ` )` + + `)` + ); + } return prependCtes( - rewriteQueryWithRestrictedTables(sql, memoryTable, sessionsTable, restrictedMemoryAlias, restrictedSessionsAlias), + rewriteQueryWithRestrictedTables(sql, { + memoryTable, + sessionsTable, + factsTable, + entitiesTable, + factEntityLinksTable, + restrictedMemoryAlias, + restrictedSessionsAlias, + restrictedFactsAlias, + restrictedEntitiesAlias, + restrictedLinksAlias, + }), ctes, ); } @@ -1011,8 +1425,10 @@ export async function executeCompiledBashCommand( } if (segment.kind === "psql") { - const graphNodesTable = process.env["HIVEMIND_GRAPH_NODES_TABLE"] ?? "graph_nodes"; - const graphEdgesTable = process.env["HIVEMIND_GRAPH_EDGES_TABLE"] ?? "graph_edges"; + const { + graphNodesTable, + graphEdgesTable, + } = resolveInterceptedTableNames(memoryTable, sessionsTable); const validated = validatePsqlQuery(segment.query, memoryTable, sessionsTable, graphNodesTable, graphEdgesTable); const prepared = await applyGraphRestrictionsToPsqlQuery( api, diff --git a/src/hooks/codex/session-start.ts b/src/hooks/codex/session-start.ts index f72fcce..2a537fb 100644 --- a/src/hooks/codex/session-start.ts +++ b/src/hooks/codex/session-start.ts @@ -14,7 +14,7 @@ import { loadCredentials } from "../../commands/auth.js"; import { readStdin } from "../../utils/stdin.js"; import { log as _log } from "../../utils/debug.js"; import { isDirectRun } from "../../utils/direct-run.js"; -import { isIndexDisabled, isPsqlMode, isSessionsOnlyMode } from "../../utils/retrieval-mode.js"; +import { isFactsSessionsOnlyPsqlMode, isIndexDisabled, isPsqlMode, isSessionsOnlyMode } from "../../utils/retrieval-mode.js"; import { getInstalledVersion } from "../version-check.js"; const log = (msg: string) => _log("codex-session-start", msg); @@ -147,6 +147,55 @@ Answer rules: Only psql SELECT queries over memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links are intercepted in this mode. For normal recall, query memory_facts for distilled claims, memory_entities for canonical names, and sessions for exact grounding; graph-based restriction is applied automatically where relevant. Do NOT use python, python3, node, curl, or filesystem paths for recall in this mode.`; +export const CODEX_SESSION_START_CONTEXT_PSQL_FACTS_SESSIONS_ONLY = `DEEPLAKE MEMORY SQL MODE: Use SQL only for Deeplake recall tasks. + +Available tables: +- sessions(path, creation_date, turn_index, event_type, dia_id, speaker, text, turn_summary, source_date_time, message) +- memory_facts(path, fact_id, subject_entity_id, subject_name, subject_type, predicate, object_entity_id, object_name, object_type, summary, evidence, search_text, confidence, valid_at, valid_from, valid_to, source_session_id, source_path) +- memory_entities(path, entity_id, canonical_name, entity_type, aliases, summary, search_text, source_session_ids, source_paths) +- fact_entity_links(path, link_id, fact_id, entity_id, entity_role, source_session_id, source_path) + +The summary and graph tables are intentionally unavailable in this mode. Treat them as if they do not exist. + +Use this command shape: +- psql -At -F '|' -c "SELECT ..." + +Workflow: +1. Resolve the named person, project, place, or organization with memory_entities. +2. Expand connected facts through fact_entity_links and memory_facts. +3. Use memory_facts to identify the small set of likely source sessions. +4. Use sessions for transcript grounding and final answer verification. +5. Prefer small targeted SELECTs with ORDER BY and LIMIT 5-10. +6. Do not use filesystem commands, grep, cat, ls, Read, or Glob for recall in this mode. +7. Use sessions.text, sessions.speaker, sessions.turn_index, and sessions.source_date_time for transcript retrieval. Use sessions.message only when you need the raw JSON payload. +8. Facts are for narrowing and aggregation; sessions are for the final exact answer. + +Good query patterns: +- Canonical entity lookup: + psql -At -F '|' -c "SELECT entity_id, canonical_name, entity_type, aliases, summary FROM memory_entities WHERE canonical_name ILIKE '%%' OR aliases ILIKE '%%' LIMIT 5" +- Fact lookup by entity: + psql -At -F '|' -c "SELECT fact_id, subject_name, predicate, object_name, summary, valid_at, valid_from, valid_to, source_session_id, source_path FROM memory_facts WHERE subject_name ILIKE '%%' AND (predicate ILIKE '%%' OR object_name ILIKE '%%') ORDER BY creation_date DESC LIMIT 10" +- Entity-linked fact expansion: + psql -At -F '|' -c "SELECT f.fact_id, f.subject_name, f.predicate, f.object_name, f.summary, f.source_session_id, f.source_path FROM fact_entity_links l JOIN memory_facts f ON f.fact_id = l.fact_id WHERE l.entity_id = '' ORDER BY f.creation_date DESC LIMIT 10" +- Transcript grounding by exact path: + psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') ORDER BY path ASC, turn_index ASC" +- Transcript search inside known sessions: + psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') AND (speaker ILIKE '%%' OR text ILIKE '%%') ORDER BY path ASC, turn_index ASC" + +Avoid these mistakes: +- Do NOT query memory, graph_nodes, or graph_edges in this mode. +- Do NOT use fact tables for exact quoted wording when a transcript row is available; use them to narrow and aggregate, then ground on sessions. +- Do NOT filter sessions.message directly when sessions.text / sessions.speaker already contain the needed transcript fields. +- Do NOT replace an exact status or self-label with a broader biography. +- Do NOT recalculate a relative-time answer against today's date when the stored phrase already answers the question. + +Answer rules: +- Return the smallest exact answer supported by the data. +- Resolve relative dates against the session's own creation_date or transcript date metadata, not today's date. +- Do not answer "not found" until you have checked both the fact layer and a likely sessions row. + +Only psql SELECT queries over sessions, memory_facts, memory_entities, and fact_entity_links are intercepted in this mode. Do NOT use python, python3, node, curl, filesystem paths, memory, or graph tables for recall in this mode.`; + export interface CodexSessionStartInput { session_id: string; transcript_path?: string | null; @@ -163,7 +212,9 @@ export function buildCodexSessionStartContext(args: { }): string { const versionNotice = args.currentVersion ? `\nHivemind v${args.currentVersion}` : ""; const template = isPsqlMode() - ? CODEX_SESSION_START_CONTEXT_PSQL + ? isFactsSessionsOnlyPsqlMode() + ? CODEX_SESSION_START_CONTEXT_PSQL_FACTS_SESSIONS_ONLY + : CODEX_SESSION_START_CONTEXT_PSQL : isSessionsOnlyMode() ? CODEX_SESSION_START_CONTEXT_SESSIONS_ONLY : isIndexDisabled() diff --git a/src/hooks/codex/wiki-worker.ts b/src/hooks/codex/wiki-worker.ts index 7f4fbda..c756b0f 100644 --- a/src/hooks/codex/wiki-worker.ts +++ b/src/hooks/codex/wiki-worker.ts @@ -18,6 +18,7 @@ import { replaceSessionGraph, } from "../knowledge-graph.js"; import { + buildMemoryFactTranscript, buildMemoryFactPrompt, parseMemoryFactExtraction, replaceSessionFacts, @@ -113,7 +114,7 @@ async function main(): Promise { // 1. Fetch session events from sessions table wlog("fetching session events"); const rows = await query( - `SELECT message, creation_date FROM "${cfg.sessionsTable}" ` + + `SELECT path, message, creation_date, turn_index, event_type, speaker, text, turn_summary, source_date_time FROM "${cfg.sessionsTable}" ` + `WHERE path LIKE E'${esc(`/sessions/%${cfg.sessionId}%`)}' ORDER BY creation_date ASC, turn_index ASC` ); @@ -232,8 +233,17 @@ async function main(): Promise { } try { + const transcriptText = buildMemoryFactTranscript(rows.map((row) => ({ + turnIndex: Number(row["turn_index"] ?? 0), + eventType: typeof row["event_type"] === "string" ? row["event_type"] : "", + speaker: typeof row["speaker"] === "string" ? row["speaker"] : "", + text: typeof row["text"] === "string" ? row["text"] : "", + turnSummary: typeof row["turn_summary"] === "string" ? row["turn_summary"] : "", + sourceDateTime: typeof row["source_date_time"] === "string" ? row["source_date_time"] : "", + creationDate: typeof row["creation_date"] === "string" ? row["creation_date"] : "", + }))); const factPrompt = buildMemoryFactPrompt({ - summaryText: text, + transcriptText, sessionId: cfg.sessionId, sourcePath: jsonlServerPath, project: cfg.project, diff --git a/src/hooks/memory-facts.ts b/src/hooks/memory-facts.ts index 7b0a30c..8070e2d 100644 --- a/src/hooks/memory-facts.ts +++ b/src/hooks/memory-facts.ts @@ -43,25 +43,38 @@ export interface ReplaceSessionFactsResult { links: number; } -export const MEMORY_FACT_PROMPT_TEMPLATE = `You are extracting durable long-term memory facts from a session summary. +export interface SessionFactTranscriptRow { + turnIndex: number; + eventType?: string; + speaker?: string; + text?: string; + turnSummary?: string; + sourceDateTime?: string; + creationDate?: string; +} + +export const MEMORY_FACT_PROMPT_TEMPLATE = `You are extracting durable long-term memory facts from raw session transcript rows. SESSION ID: __SESSION_ID__ SOURCE PATH: __SOURCE_PATH__ PROJECT: __PROJECT__ -SUMMARY MARKDOWN: -__SUMMARY_TEXT__ +TRANSCRIPT ROWS: +__TRANSCRIPT_TEXT__ Return ONLY valid JSON with this exact shape: {"facts":[{"subject":"canonical entity","subject_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","subject_aliases":["optional alias"],"predicate":"snake_case_relation","object":"canonical object text","object_type":"person|organization|place|artifact|project|tool|file|event|goal|status|preference|concept|other","object_aliases":["optional alias"],"summary":"short factual claim","evidence":"short supporting phrase","confidence":0.0,"valid_at":"optional date/time text","valid_from":"optional date/time text","valid_to":"optional date/time text"}]} Rules: +- The transcript rows are the only source of truth for this extraction. Do not rely on summaries or inferred rewrites. - Extract atomic facts that are useful for later recall. One durable claim per fact. - Prefer canonical names for repeated people, organizations, places, projects, tools, and artifacts. - Use relation-style predicates such as works_on, home_country, relationship_status, prefers, plans, decided_to_pursue, located_in, uses_tool, recommended, supports, owns, read, attends, moved_from, moved_to. -- Facts should preserve temporal history instead of overwriting it. If the summary says something changed, emit the new fact and include timing in valid_at / valid_from / valid_to when the summary supports it. -- Include assistant-confirmed or tool-confirmed actions when they are stated as completed facts in the summary. -- Do not invent facts that are not supported by the summary. +- Facts should preserve temporal history instead of overwriting it. If the transcript says something changed, emit the new fact and include timing in valid_at / valid_from / valid_to when the transcript supports it. +- Include assistant-confirmed or tool-confirmed actions when they are stated as completed facts in the transcript. +- If a speaker explicitly self-identifies or states a status, preserve that exact label instead of broadening it. +- Preserve exact named places, titles, organizations, and relative time phrases when they are the stated fact. +- Do not invent facts that are not supported by the transcript. - Avoid duplicates or near-duplicates. If two facts say the same thing, keep the more specific one. - Return no markdown, no prose, no code fences, only JSON.`; @@ -330,15 +343,43 @@ export function parseMemoryFactExtraction(raw: string): MemoryFactExtraction { }; } +export function buildMemoryFactTranscript(rows: SessionFactTranscriptRow[]): string { + const normalized = rows + .map((row) => ({ + turnIndex: Number.isFinite(row.turnIndex) ? row.turnIndex : 0, + speaker: normalizeString(row.speaker), + text: normalizeString(row.text), + eventType: normalizeString(row.eventType) || "message", + turnSummary: normalizeString(row.turnSummary), + sourceDateTime: normalizeString(row.sourceDateTime) || normalizeString(row.creationDate), + })) + .filter((row) => row.text || row.turnSummary); + + if (normalized.length === 0) return "(no transcript rows)"; + + return normalized.map((row) => { + const prefix = [ + `turn=${row.turnIndex}`, + row.sourceDateTime ? `time=${row.sourceDateTime}` : "", + row.speaker ? `speaker=${row.speaker}` : `event=${row.eventType}`, + ].filter(Boolean).join(" | "); + const lines = [`[${prefix}] ${row.text || row.turnSummary}`]; + if (row.turnSummary && row.turnSummary !== row.text) { + lines.push(`summary: ${row.turnSummary}`); + } + return lines.join("\n"); + }).join("\n"); +} + export function buildMemoryFactPrompt(args: { - summaryText: string; + transcriptText: string; sessionId: string; sourcePath: string; project: string; template?: string; }): string { return (args.template ?? MEMORY_FACT_PROMPT_TEMPLATE) - .replace(/__SUMMARY_TEXT__/g, args.summaryText) + .replace(/__TRANSCRIPT_TEXT__/g, args.transcriptText) .replace(/__SESSION_ID__/g, args.sessionId) .replace(/__SOURCE_PATH__/g, args.sourcePath) .replace(/__PROJECT__/g, args.project); diff --git a/src/hooks/session-start.ts b/src/hooks/session-start.ts index 752d059..aecf25f 100644 --- a/src/hooks/session-start.ts +++ b/src/hooks/session-start.ts @@ -15,7 +15,7 @@ import { loadCredentials, saveCredentials } from "../commands/auth.js"; import { readStdin } from "../utils/stdin.js"; import { log as _log } from "../utils/debug.js"; import { isDirectRun } from "../utils/direct-run.js"; -import { isIndexDisabled, isPsqlMode, isSessionsOnlyMode } from "../utils/retrieval-mode.js"; +import { isFactsSessionsOnlyPsqlMode, isIndexDisabled, isPsqlMode, isSessionsOnlyMode } from "../utils/retrieval-mode.js"; import { DEFAULT_VERSION_CACHE_TTL_MS, getInstalledVersion, @@ -221,6 +221,61 @@ IMPORTANT: Only psql SELECT queries over memory, sessions, graph_nodes, graph_ed Debugging: Set HIVEMIND_DEBUG=1 to enable verbose logging to ~/.deeplake/hook-debug.log`; +export const CLAUDE_SESSION_START_CONTEXT_PSQL_FACTS_SESSIONS_ONLY = `DEEPLAKE MEMORY SQL MODE: For this run, use SQL only when answering recall questions. + +Available Deeplake tables: +- sessions(path, creation_date, turn_index, event_type, dia_id, speaker, text, turn_summary, source_date_time, message) +- memory_facts(path, fact_id, subject_entity_id, subject_name, subject_type, predicate, object_entity_id, object_name, object_type, summary, evidence, search_text, confidence, valid_at, valid_from, valid_to, source_session_id, source_path) +- memory_entities(path, entity_id, canonical_name, entity_type, aliases, summary, search_text, source_session_ids, source_paths) +- fact_entity_links(path, link_id, fact_id, entity_id, entity_role, source_session_id, source_path) + +The summary and graph tables are intentionally unavailable in this mode. Treat them as if they do not exist. + +Use this command shape: +- psql -At -F '|' -c "SELECT ..." + +SQL strategy: +1. Start with memory_entities to resolve the named person, project, place, or organization into a canonical entity. +2. Expand connected facts through fact_entity_links and memory_facts. +3. Use memory_facts to identify the small set of likely source sessions. +4. Ground every exact answer on sessions rows from those source sessions. +5. Prefer small targeted SELECTs with ORDER BY and LIMIT 5-10. +6. Do not use filesystem commands, grep, cat, ls, Read, or Glob for recall in this mode. +7. Use sessions.text, sessions.speaker, sessions.turn_index, and sessions.source_date_time for transcript retrieval. Use sessions.message only when you need the raw JSON payload. +8. For identity, origin, relationship, preference, and "what did they decide" questions, prefer transcript grounding over paraphrased fact labels. +9. For list/profile questions, facts are for narrowing and aggregation; sessions are for final verification. + +Good query patterns: +- Canonical entity lookup: + psql -At -F '|' -c "SELECT entity_id, canonical_name, entity_type, aliases, summary FROM memory_entities WHERE canonical_name ILIKE '%%' OR aliases ILIKE '%%' LIMIT 5" +- Fact lookup by name/topic: + psql -At -F '|' -c "SELECT fact_id, subject_name, predicate, object_name, summary, valid_at, valid_from, valid_to, source_session_id, source_path FROM memory_facts WHERE subject_name ILIKE '%%' AND (predicate ILIKE '%%' OR object_name ILIKE '%%') ORDER BY creation_date DESC LIMIT 10" +- Entity-linked fact expansion: + psql -At -F '|' -c "SELECT f.fact_id, f.subject_name, f.predicate, f.object_name, f.summary, f.source_session_id, f.source_path FROM fact_entity_links l JOIN memory_facts f ON f.fact_id = l.fact_id WHERE l.entity_id = '' ORDER BY f.creation_date DESC LIMIT 10" +- Transcript grounding by exact path: + psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') ORDER BY path ASC, turn_index ASC" +- Transcript search inside known sessions: + psql -At -F '|' -c "SELECT path, creation_date, turn_index, speaker, text, source_date_time FROM sessions WHERE path IN ('/sessions/...', '/sessions/...') AND (speaker ILIKE '%%' OR text ILIKE '%%') ORDER BY path ASC, turn_index ASC" + +Avoid these mistakes: +- Do NOT query memory, graph_nodes, or graph_edges in this mode. +- Do NOT use fact tables for exact quoted wording when a transcript row is available; use them to narrow and aggregate, then ground on sessions. +- Do NOT filter sessions.message directly when sessions.text / sessions.speaker already contain the needed transcript fields. +- Do NOT blend multiple different events when the question asks about one specific event. Prefer the most direct supporting row. +- Do NOT replace an exact status or self-label with a broader biography. +- Do NOT recalculate a relative-time answer against today's date when the stored phrase already answers the question. + +Answer rules: +- Return the smallest exact answer supported by the data. +- Resolve relative dates against the session's own creation_date or transcript date metadata, not today's date. +- Do not answer "not found" until you have checked both the fact layer and a likely sessions row for the named person. +- For duration or age-style answers, preserve the stored relative phrase when it directly answers the question instead of over-converting it. +- For list or profile questions, aggregate across the small set of candidate sessions before answering. + +IMPORTANT: Only psql SELECT queries over sessions, memory_facts, memory_entities, and fact_entity_links are intercepted in this mode. Do NOT use python, python3, node, curl, filesystem paths, memory, or graph tables for recall in this mode. + +Debugging: Set HIVEMIND_DEBUG=1 to enable verbose logging to ~/.deeplake/hook-debug.log`; + const GITHUB_RAW_PKG = "https://raw.githubusercontent.com/activeloopai/hivemind/main/package.json"; export function buildSessionStartAdditionalContext(args: { @@ -230,7 +285,9 @@ export function buildSessionStartAdditionalContext(args: { latestVersion: string | null; }): string { const template = isPsqlMode() - ? CLAUDE_SESSION_START_CONTEXT_PSQL + ? isFactsSessionsOnlyPsqlMode() + ? CLAUDE_SESSION_START_CONTEXT_PSQL_FACTS_SESSIONS_ONLY + : CLAUDE_SESSION_START_CONTEXT_PSQL : isSessionsOnlyMode() ? CLAUDE_SESSION_START_CONTEXT_SESSIONS_ONLY : isIndexDisabled() diff --git a/src/hooks/wiki-worker.ts b/src/hooks/wiki-worker.ts index 06f3fae..636fad1 100644 --- a/src/hooks/wiki-worker.ts +++ b/src/hooks/wiki-worker.ts @@ -19,6 +19,7 @@ import { replaceSessionGraph, } from "./knowledge-graph.js"; import { + buildMemoryFactTranscript, buildMemoryFactPrompt, parseMemoryFactExtraction, replaceSessionFacts, @@ -116,7 +117,7 @@ async function main(): Promise { // 1. Fetch session events from sessions table, reconstruct JSONL wlog("fetching session events"); const rows = await query( - `SELECT message, creation_date FROM "${cfg.sessionsTable}" ` + + `SELECT path, message, creation_date, turn_index, event_type, speaker, text, turn_summary, source_date_time FROM "${cfg.sessionsTable}" ` + `WHERE path LIKE '${esc(`/sessions/%${cfg.sessionId}%`)}' ORDER BY creation_date ASC, turn_index ASC` ); @@ -239,8 +240,17 @@ async function main(): Promise { } try { + const transcriptText = buildMemoryFactTranscript(rows.map((row) => ({ + turnIndex: Number(row["turn_index"] ?? 0), + eventType: typeof row["event_type"] === "string" ? row["event_type"] : "", + speaker: typeof row["speaker"] === "string" ? row["speaker"] : "", + text: typeof row["text"] === "string" ? row["text"] : "", + turnSummary: typeof row["turn_summary"] === "string" ? row["turn_summary"] : "", + sourceDateTime: typeof row["source_date_time"] === "string" ? row["source_date_time"] : "", + creationDate: typeof row["creation_date"] === "string" ? row["creation_date"] : "", + }))); const factPrompt = buildMemoryFactPrompt({ - summaryText: text, + transcriptText, sessionId: cfg.sessionId, sourcePath: jsonlServerPath, project: cfg.project, diff --git a/src/shell/grep-core.ts b/src/shell/grep-core.ts index 8e2cae4..0c483b6 100644 --- a/src/shell/grep-core.ts +++ b/src/shell/grep-core.ts @@ -16,14 +16,66 @@ */ import type { DeeplakeApi } from "../deeplake-api.js"; +import { HarrierEmbedder } from "../embeddings/harrier.js"; import { sqlStr, sqlLike } from "../utils/sql.js"; -import { isSessionsOnlyMode, isSummaryBm25Disabled } from "../utils/retrieval-mode.js"; +import { getGrepRetrievalMode, isSessionsOnlyMode, isSummaryBm25Disabled } from "../utils/retrieval-mode.js"; const DEFAULT_GREP_CANDIDATE_LIMIT = Number( process.env["HIVEMIND_GREP_LIMIT"] ?? process.env["DEEPLAKE_GREP_LIMIT"] ?? 500, ); +const DEFAULT_EMBED_RETRIEVAL_MODEL_ID = "onnx-community/harrier-oss-v1-270m-ONNX"; +const DEFAULT_HYBRID_VECTOR_WEIGHT = 0.7; +const DEFAULT_HYBRID_TEXT_WEIGHT = 0.3; + +let retrievalEmbedder: HarrierEmbedder | null = null; + +function envString(...names: string[]): string | undefined { + for (const name of names) { + const value = process.env[name]?.trim(); + if (value) return value; + } + return undefined; +} + +function envFlag(...names: string[]): boolean { + const raw = envString(...names) ?? ""; + return /^(1|true|yes|on)$/i.test(raw); +} + +function envNumber(fallback: number, ...names: string[]): number { + const raw = envString(...names); + const parsed = Number(raw); + return Number.isFinite(parsed) ? parsed : fallback; +} + +function getRetrievalEmbedder(): HarrierEmbedder { + if (!retrievalEmbedder) { + retrievalEmbedder = new HarrierEmbedder({ + modelId: envString( + "HIVEMIND_EMBED_RETRIEVAL_MODEL_ID", + "DEEPLAKE_EMBED_RETRIEVAL_MODEL_ID", + "HIVEMIND_HARRIER_MODEL_ID", + "DEEPLAKE_HARRIER_MODEL_ID", + ) ?? DEFAULT_EMBED_RETRIEVAL_MODEL_ID, + device: envString("HIVEMIND_EMBED_RETRIEVAL_DEVICE", "DEEPLAKE_EMBED_RETRIEVAL_DEVICE") ?? "cpu", + dtype: envString("HIVEMIND_EMBED_RETRIEVAL_DTYPE", "DEEPLAKE_EMBED_RETRIEVAL_DTYPE"), + cacheDir: envString("HIVEMIND_EMBED_RETRIEVAL_CACHE_DIR", "DEEPLAKE_EMBED_RETRIEVAL_CACHE_DIR"), + localModelPath: envString("HIVEMIND_EMBED_RETRIEVAL_LOCAL_MODEL_PATH", "DEEPLAKE_EMBED_RETRIEVAL_LOCAL_MODEL_PATH"), + localFilesOnly: envFlag("HIVEMIND_EMBED_RETRIEVAL_LOCAL_FILES_ONLY", "DEEPLAKE_EMBED_RETRIEVAL_LOCAL_FILES_ONLY"), + }); + } + return retrievalEmbedder; +} + +function sqlFloat4Array(values: number[]): string { + if (values.length === 0) throw new Error("Query embedding is empty"); + return `ARRAY[${values.map((value) => { + if (!Number.isFinite(value)) throw new Error("Query embedding contains non-finite values"); + return Math.fround(value).toString(); + }).join(", ")}]::float4[]`; +} // ── Types ──────────────────────────────────────────────────────────────────── @@ -58,6 +110,8 @@ export interface SearchOptions { prefilterPattern?: string; /** Optional safe literal alternation anchors for regex searches (e.g. foo|bar). */ prefilterPatterns?: string[]; + /** Optional semantic query text used for vector and hybrid retrieval. */ + queryText?: string; /** Optional lexical query text for BM25 summary retrieval. */ bm25QueryText?: string; /** Per-table row cap. */ @@ -270,7 +324,7 @@ export async function searchDeeplakeTables( sessionsTable: string, opts: SearchOptions, ): Promise { - const { pathFilter, contentScanOnly, likeOp, escapedPattern, regexPattern, prefilterPattern, prefilterPatterns, bm25QueryText } = opts; + const { pathFilter, contentScanOnly, likeOp, escapedPattern, regexPattern, prefilterPattern, prefilterPatterns, queryText, bm25QueryText } = opts; const limit = opts.limit ?? DEFAULT_GREP_CANDIDATE_LIMIT; const filterPatterns = contentScanOnly ? (prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : (prefilterPattern ? [prefilterPattern] : [])) @@ -288,7 +342,11 @@ export async function searchDeeplakeTables( const fallbackSessFilter = likeSessFilter; const hasSqlRegexFilter = Boolean(regexMemFilter || regexSessFilter); const sessionsOnly = isSessionsOnlyMode(); - const useSummaryBm25 = !sessionsOnly && !isSummaryBm25Disabled() && Boolean(bm25QueryText); + const retrievalMode = getGrepRetrievalMode(); + const semanticQueryText = (queryText ?? bm25QueryText ?? "").trim(); + const useEmbeddingRetrieval = retrievalMode === "embedding" && semanticQueryText.length > 0; + const useHybridRetrieval = retrievalMode === "hybrid" && semanticQueryText.length > 0; + const useSummaryBm25 = retrievalMode === "classic" && !sessionsOnly && !isSummaryBm25Disabled() && Boolean(bm25QueryText); const shouldUseFallbackCapablePrimary = useSummaryBm25 || hasSqlRegexFilter; const ensureSummaryBm25Index = (api as DeeplakeApi & { @@ -309,6 +367,69 @@ export async function searchDeeplakeTables( : `SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`; }; + if (useEmbeddingRetrieval || useHybridRetrieval) { + const embedder = getRetrievalEmbedder(); + const [queryEmbedding] = await embedder.embedQueries([semanticQueryText]); + if (!queryEmbedding) throw new Error("Failed to build query embedding"); + const queryVectorSql = sqlFloat4Array(queryEmbedding); + const vectorWeight = envNumber(DEFAULT_HYBRID_VECTOR_WEIGHT, "HIVEMIND_HYBRID_VECTOR_WEIGHT", "DEEPLAKE_HYBRID_VECTOR_WEIGHT"); + const textWeight = envNumber(DEFAULT_HYBRID_TEXT_WEIGHT, "HIVEMIND_HYBRID_TEXT_WEIGHT", "DEEPLAKE_HYBRID_TEXT_WEIGHT"); + const buildSemanticCombinedQuery = (): string => { + const memQuery = useHybridRetrieval + ? buildHybridSimilarityQuery( + memoryTable, + pathFilter, + "summary::text", + 0, + "''", + queryVectorSql, + semanticQueryText, + vectorWeight, + textWeight, + limit, + ) + : buildEmbeddingSimilarityQuery( + memoryTable, + pathFilter, + "summary::text", + 0, + "''", + queryVectorSql, + limit, + ); + const sessQuery = useHybridRetrieval + ? buildHybridSimilarityQuery( + sessionsTable, + pathFilter, + "message::text", + 1, + "COALESCE(creation_date::text, '')", + queryVectorSql, + semanticQueryText, + vectorWeight, + textWeight, + limit, + ) + : buildEmbeddingSimilarityQuery( + sessionsTable, + pathFilter, + "message::text", + 1, + "COALESCE(creation_date::text, '')", + queryVectorSql, + limit, + ); + return sessionsOnly + ? `SELECT path, content, source_order, creation_date FROM (${sessQuery}) AS combined ORDER BY path, source_order, creation_date` + : `SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`; + }; + const rows = await api.query(buildSemanticCombinedQuery()); + return rows.map(row => ({ + path: String(row["path"]), + content: String(row["content"] ?? ""), + })); + } + const primaryMemFilter = useSummaryBm25 ? "" : `${likeMemFilter}${regexMemFilter}`; const primaryQuery = buildCombinedQuery(primaryMemFilter, primarySessFilter, useSummaryBm25); const fallbackQuery = buildCombinedQuery(likeMemFilter, fallbackSessFilter, false); @@ -430,6 +551,7 @@ export function buildGrepSearchOptions(params: GrepMatchParams, targetPath: stri const literalPrefilter = hasRegexMeta ? extractRegexLiteralPrefilter(normalizedPattern) : null; const alternationPrefilters = hasRegexMeta ? extractRegexAlternationPrefilters(normalizedPattern) : null; const bm25QueryText = buildSummaryBm25QueryText(normalizedPattern, params.fixedString, literalPrefilter, alternationPrefilters); + const queryText = (bm25QueryText ?? normalizedPattern.trim()) || undefined; const regexBase = params.fixedString ? escapeRegexLiteral(normalizedPattern) : normalizedPattern; const sqlRegexPattern = params.wordMatch ? `\\b(?:${regexBase})\\b` @@ -444,6 +566,7 @@ export function buildGrepSearchOptions(params: GrepMatchParams, targetPath: stri regexPattern: sqlRegexPattern, prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : undefined, prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)), + queryText, bm25QueryText: bm25QueryText ?? undefined, limit: DEFAULT_GREP_CANDIDATE_LIMIT, }; @@ -504,6 +627,33 @@ function buildSummaryBm25Query( return `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter} ORDER BY (summary <#> '${sqlStr(queryText)}') DESC LIMIT ${limit}`; } +function buildEmbeddingSimilarityQuery( + tableName: string, + pathFilter: string, + contentExpr: string, + sourceOrder: number, + creationDateExpr: string, + queryVectorSql: string, + limit: number, +): string { + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date FROM "${tableName}" WHERE 1=1${pathFilter} AND embedding IS NOT NULL ORDER BY (embedding <#> ${queryVectorSql}) DESC LIMIT ${limit}`; +} + +function buildHybridSimilarityQuery( + tableName: string, + pathFilter: string, + contentExpr: string, + sourceOrder: number, + creationDateExpr: string, + queryVectorSql: string, + queryText: string, + vectorWeight: number, + textWeight: number, + limit: number, +): string { + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date FROM "${tableName}" WHERE 1=1${pathFilter} AND embedding IS NOT NULL ORDER BY (((embedding, ${contentExpr})::deeplake_hybrid_record) <#> deeplake_hybrid_record(${queryVectorSql}, '${sqlStr(queryText)}', ${vectorWeight}, ${textWeight})) DESC LIMIT ${limit}`; +} + export function toSqlRegexPattern( pattern: string, ignoreCase: boolean, diff --git a/src/tools/backfill-locomo-facts.ts b/src/tools/backfill-locomo-facts.ts index b45c55d..cd52611 100644 --- a/src/tools/backfill-locomo-facts.ts +++ b/src/tools/backfill-locomo-facts.ts @@ -7,6 +7,7 @@ import { promisify } from "node:util"; import { loadCredentials } from "../commands/auth.js"; import { DeeplakeApi, DeeplakeQueryError, summarizeSql } from "../deeplake-api.js"; import { + buildMemoryFactTranscript, buildMemoryFactPrompt, parseMemoryFactExtraction, replaceSessionFacts, @@ -17,6 +18,7 @@ const execFileAsync = promisify(execFile); interface Args { memoryTable: string; + sessionsTable: string; factsTable: string; entitiesTable: string; linksTable: string; @@ -30,7 +32,6 @@ interface Args { interface SummaryRow { path: string; - summary: string; project?: string; } @@ -38,6 +39,7 @@ function parseArgs(): Args { const args = process.argv.slice(2); const opts: Args = { memoryTable: "memory", + sessionsTable: "sessions", factsTable: "memory_facts", entitiesTable: "memory_entities", linksTable: "fact_entity_links", @@ -56,6 +58,9 @@ function parseArgs(): Args { case "--facts-table": opts.factsTable = args[++i] ?? opts.factsTable; break; + case "--sessions-table": + opts.sessionsTable = args[++i] ?? opts.sessionsTable; + break; case "--entities-table": opts.entitiesTable = args[++i] ?? opts.entitiesTable; break; @@ -85,14 +90,9 @@ function parseArgs(): Args { return opts; } -function extractSummarySourcePath(summary: string): string { - const match = summary.match(/^- \*\*Source\*\*: (.+)$/m); - return match?.[1]?.trim() || ""; -} - -function sessionIdFromSummaryPath(path: string): string { - const base = basename(path).replace(/\.md$/, ""); - return base.endsWith("_summary") ? base.slice(0, -"_summary".length) : base; +function sessionIdFromSessionPath(path: string): string { + const base = basename(path).replace(/\.jsonl?$/, ""); + return base; } function serializeError(error: unknown): Record { @@ -140,7 +140,7 @@ function appendErrorLog(logPath: string | undefined, payload: Record { await api.query(`DELETE FROM "${opts.entitiesTable}"`); } - const summaryRows = await api.query( - `SELECT path, summary, project FROM "${opts.memoryTable}" WHERE path LIKE '/summaries/locomo/%' ORDER BY path ASC`, + const sessionRows = await api.query( + `SELECT DISTINCT path, project FROM "${opts.sessionsTable}" WHERE path LIKE '/sessions/conv_%_session_%.json%' ORDER BY path ASC`, ); - const summaries: SummaryRow[] = summaryRows.map((row) => ({ + const summaries: SummaryRow[] = sessionRows.map((row) => ({ path: String(row["path"] ?? ""), - summary: String(row["summary"] ?? ""), project: row["project"] == null ? undefined : String(row["project"]), })) - .filter((row) => row.path && row.summary) + .filter((row) => row.path) .filter((row) => !opts.pathContains || row.path.includes(opts.pathContains)); const claudeBin = findClaudeBin(); @@ -217,11 +216,24 @@ async function main(): Promise { const index = nextIndex++; if (index >= summaries.length) return; const row = summaries[index]; - const sessionId = sessionIdFromSummaryPath(row.path); - const sourcePath = extractSummarySourcePath(row.summary) || `/sessions/${sessionId}.json`; + const sessionId = sessionIdFromSessionPath(row.path); + const sourcePath = row.path; try { + const transcriptRows = await api.query( + `SELECT creation_date, turn_index, event_type, speaker, text, turn_summary, source_date_time FROM "${opts.sessionsTable}" ` + + `WHERE path = '${row.path.replace(/'/g, "''")}' ORDER BY creation_date ASC, turn_index ASC`, + ); + const transcriptText = buildMemoryFactTranscript(transcriptRows.map((transcriptRow) => ({ + turnIndex: Number(transcriptRow["turn_index"] ?? 0), + eventType: typeof transcriptRow["event_type"] === "string" ? transcriptRow["event_type"] : "", + speaker: typeof transcriptRow["speaker"] === "string" ? transcriptRow["speaker"] : "", + text: typeof transcriptRow["text"] === "string" ? transcriptRow["text"] : "", + turnSummary: typeof transcriptRow["turn_summary"] === "string" ? transcriptRow["turn_summary"] : "", + sourceDateTime: typeof transcriptRow["source_date_time"] === "string" ? transcriptRow["source_date_time"] : "", + creationDate: typeof transcriptRow["creation_date"] === "string" ? transcriptRow["creation_date"] : "", + }))); const extraction = await generateFacts( - row.summary, + transcriptText, sourcePath, sessionId, row.project || "locomo", @@ -259,7 +271,7 @@ async function main(): Promise { } await Promise.all(Array.from({ length: opts.concurrency }, () => worker())); - process.stdout.write(`Done. facts_summaries=${completed} failed=${failures} total_facts=${totalFacts}\n`); + process.stdout.write(`Done. facts_sessions=${completed} failed=${failures} total_facts=${totalFacts}\n`); } main().catch((error) => { diff --git a/src/utils/retrieval-mode.ts b/src/utils/retrieval-mode.ts index cc2639b..b3fcdfb 100644 --- a/src/utils/retrieval-mode.ts +++ b/src/utils/retrieval-mode.ts @@ -3,6 +3,14 @@ export function isSessionsOnlyMode(): boolean { return /^(1|true|yes|on)$/i.test(raw.trim()); } +export type GrepRetrievalMode = "classic" | "embedding" | "hybrid"; + +export function getGrepRetrievalMode(): GrepRetrievalMode { + const raw = (process.env["HIVEMIND_GREP_RETRIEVAL_MODE"] ?? process.env["DEEPLAKE_GREP_RETRIEVAL_MODE"] ?? "").trim().toLowerCase(); + if (raw === "embedding" || raw === "hybrid") return raw; + return "classic"; +} + export function isIndexDisabled(): boolean { const raw = process.env["HIVEMIND_DISABLE_INDEX"] ?? process.env["DEEPLAKE_DISABLE_INDEX"] ?? ""; return /^(1|true|yes|on)$/i.test(raw.trim()); @@ -17,3 +25,8 @@ export function isPsqlMode(): boolean { const raw = process.env["HIVEMIND_PSQL_MODE"] ?? process.env["DEEPLAKE_PSQL_MODE"] ?? ""; return /^(1|true|yes|on)$/i.test(raw.trim()); } + +export function isFactsSessionsOnlyPsqlMode(): boolean { + const raw = process.env["HIVEMIND_PSQL_FACTS_SESSIONS_ONLY"] ?? process.env["DEEPLAKE_PSQL_FACTS_SESSIONS_ONLY"] ?? ""; + return /^(1|true|yes|on)$/i.test(raw.trim()); +} From 356cbd80a91d50b4ece668b500b0d6fd1be166eb Mon Sep 17 00:00:00 2001 From: davitbun Date: Mon, 20 Apr 2026 22:52:01 -0700 Subject: [PATCH 6/7] recent changes --- claude-code/bundle/pre-tool-use.js | 31 ++++++++++++------ claude-code/tests/hooks-source.test.ts | 29 +++++++++++++++++ codex/bundle/pre-tool-use.js | 17 ++++++++-- src/hooks/codex/pre-tool-use.ts | 30 ++++++++++++++++-- src/hooks/pre-tool-use.ts | 44 ++++++++++++++++++++------ 5 files changed, 127 insertions(+), 24 deletions(-) diff --git a/claude-code/bundle/pre-tool-use.js b/claude-code/bundle/pre-tool-use.js index a87a584..4177d4e 100755 --- a/claude-code/bundle/pre-tool-use.js +++ b/claude-code/bundle/pre-tool-use.js @@ -3377,6 +3377,13 @@ function rewritePaths(cmd) { } // dist/src/hooks/pre-tool-use.js +function touchesVirtualMemoryPath(value) { + const rewritten = rewritePaths(value).trim(); + return rewritten === "/index.md" || rewritten === "/summaries" || rewritten.startsWith("/summaries/") || rewritten === "/sessions" || rewritten.startsWith("/sessions/"); +} +function touchesAnyMemoryPath(value) { + return touchesMemory(value) || touchesVirtualMemoryPath(value); +} function isAnyPsqlCommand(cmd) { return /^\s*psql\b/.test(cmd.trim()); } @@ -3393,9 +3400,15 @@ function needsHivemindPsqlRewrite(cmd) { return !!query && queryReferencesInterceptedTables(query) && !queryUsesOnlyInterceptedTables(query); } function buildPsqlOnlyGuidance() { + if (isFactsSessionsOnlyPsqlMode()) { + return "[RETRY REQUIRED] Hivemind recall is SQL-only in this mode. Use psql with the sessions, memory_facts, memory_entities, and fact_entity_links tables only. Do NOT use grep, cat, ls, Read, Glob, memory, graph, or filesystem paths for memory lookups."; + } return "[RETRY REQUIRED] Hivemind recall is SQL-only in this mode. Use psql with the memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links tables only. Do NOT use grep, cat, ls, Read, Glob, or filesystem paths for memory lookups."; } function buildPsqlSchemaGuidance() { + if (isFactsSessionsOnlyPsqlMode()) { + return "[RETRY REQUIRED] Only psql SELECT queries over sessions, memory_facts, memory_entities, and fact_entity_links are intercepted in SQL mode. Rewrite the query to reference only those tables with normal psql SELECT syntax."; + } return "[RETRY REQUIRED] Only psql SELECT queries over memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links are intercepted in SQL mode. Rewrite the query to reference only those tables with normal psql SELECT syntax."; } var log4 = (msg) => log("pre", msg); @@ -3416,9 +3429,9 @@ function getShellCommand(toolName, toolInput) { switch (toolName) { case "Grep": { const p = toolInput.path; - if (isPsqlMode() && p && touchesMemory(p)) + if (isPsqlMode() && p && touchesAnyMemoryPath(p)) return null; - if (p && touchesMemory(p)) { + if (p && touchesAnyMemoryPath(p)) { const pattern = toolInput.pattern ?? ""; const flags = ["-r"]; if (toolInput["-i"]) @@ -3431,9 +3444,9 @@ function getShellCommand(toolName, toolInput) { } case "Read": { const fp = getReadTargetPath(toolInput); - if (isPsqlMode() && fp && touchesMemory(fp)) + if (isPsqlMode() && fp && touchesAnyMemoryPath(fp)) return null; - if (fp && touchesMemory(fp)) { + if (fp && touchesAnyMemoryPath(fp)) { const rewritten = rewritePaths(fp) || "/"; return `${isLikelyDirectoryPath(rewritten) ? "ls" : "cat"} ${rewritten}`; } @@ -3445,9 +3458,9 @@ function getShellCommand(toolName, toolInput) { break; if (isHivemindPsqlCommand(cmd)) return cmd.trim(); - if (isPsqlMode() && (touchesMemory(cmd) || needsHivemindPsqlRewrite(cmd))) + if (isPsqlMode() && (touchesAnyMemoryPath(cmd) || needsHivemindPsqlRewrite(cmd))) return null; - if (!touchesMemory(cmd)) + if (!touchesAnyMemoryPath(cmd)) break; const rewritten = rewritePaths(cmd); if (!isSafe(rewritten)) { @@ -3458,9 +3471,9 @@ function getShellCommand(toolName, toolInput) { } case "Glob": { const p = toolInput.path; - if (isPsqlMode() && p && touchesMemory(p)) + if (isPsqlMode() && p && touchesAnyMemoryPath(p)) return null; - if (p && touchesMemory(p)) + if (p && touchesAnyMemoryPath(p)) return "ls /"; break; } @@ -3499,7 +3512,7 @@ async function processPreToolUse(input, deps = {}) { const shellCmd = getShellCommand(input.tool_name, input.tool_input); const toolPath = getReadTargetPath(input.tool_input) ?? input.tool_input.path ?? ""; const psqlRewriteNeeded = needsHivemindPsqlRewrite(cmd); - if (!shellCmd && (touchesMemory(cmd) || touchesMemory(toolPath) || psqlRewriteNeeded)) { + if (!shellCmd && (touchesAnyMemoryPath(cmd) || touchesAnyMemoryPath(toolPath) || psqlRewriteNeeded)) { const guidance = isPsqlMode() ? psqlRewriteNeeded ? buildPsqlSchemaGuidance() : buildPsqlOnlyGuidance() : `[RETRY REQUIRED] The command you tried is not available for ~/.deeplake/memory/. This virtual filesystem only supports bash builtins plus benchmark SQL mode via psql -At -F '|' -c "SELECT ...". python, python3, node, and curl are NOT available. You MUST rewrite your command using only the bash tools listed above and try again. For example, to parse JSON use: cat file.json | jq '.key'. To count keys: cat file.json | jq 'keys | length'.`; logFn(`unsupported command, returning guidance: ${cmd}`); return buildAllowDecision(`echo ${JSON.stringify(guidance)}`, isPsqlMode() ? "[DeepLake SQL] unsupported command \u2014 rewrite using psql over memory/sessions" : "[DeepLake] unsupported command \u2014 rewrite using bash builtins"); diff --git a/claude-code/tests/hooks-source.test.ts b/claude-code/tests/hooks-source.test.ts index bbe0e4b..db32e1c 100644 --- a/claude-code/tests/hooks-source.test.ts +++ b/claude-code/tests/hooks-source.test.ts @@ -355,6 +355,35 @@ describe("claude pre-tool source", () => { } }); + it("blocks virtual memory filesystem paths in facts-and-sessions-only sql mode", async () => { + const prevPsql = process.env.HIVEMIND_PSQL_MODE; + const prevFactsSessions = process.env.HIVEMIND_PSQL_FACTS_SESSIONS_ONLY; + process.env.HIVEMIND_PSQL_MODE = "1"; + process.env.HIVEMIND_PSQL_FACTS_SESSIONS_ONLY = "1"; + try { + expect(getShellCommand("Read", { file_path: "/index.md" })).toBeNull(); + expect(getShellCommand("Grep", { path: "/summaries/locomo", pattern: "Caroline" })).toBeNull(); + expect(getShellCommand("Bash", { command: "cat /sessions/conv_0_session_1.json" })).toBeNull(); + + const guidance = await processPreToolUse({ + session_id: "s1", + tool_name: "Read", + tool_input: { file_path: "/index.md" }, + tool_use_id: "tu-facts-sessions-only-read", + }, { + config: baseConfig, + }); + expect(guidance?.command).toContain("RETRY REQUIRED"); + expect(guidance?.command).toContain("sessions, memory_facts, memory_entities, and fact_entity_links"); + expect(guidance?.description).toContain("unsupported command"); + } finally { + if (prevPsql === undefined) delete process.env.HIVEMIND_PSQL_MODE; + else process.env.HIVEMIND_PSQL_MODE = prevPsql; + if (prevFactsSessions === undefined) delete process.env.HIVEMIND_PSQL_FACTS_SESSIONS_ONLY; + else process.env.HIVEMIND_PSQL_FACTS_SESSIONS_ONLY = prevFactsSessions; + } + }); + it("returns guidance for unsupported memory commands and passthrough for non-memory commands", async () => { const guidance = await processPreToolUse({ session_id: "s1", diff --git a/codex/bundle/pre-tool-use.js b/codex/bundle/pre-tool-use.js index 7bc0074..adc4c75 100755 --- a/codex/bundle/pre-tool-use.js +++ b/codex/bundle/pre-tool-use.js @@ -3378,6 +3378,13 @@ function rewritePaths(cmd) { } // dist/src/hooks/codex/pre-tool-use.js +function touchesVirtualMemoryPath(value) { + const rewritten = rewritePaths(value).trim(); + return rewritten === "/index.md" || rewritten === "/summaries" || rewritten.startsWith("/summaries/") || rewritten === "/sessions" || rewritten.startsWith("/sessions/"); +} +function touchesAnyMemoryPath(value) { + return touchesMemory(value) || touchesVirtualMemoryPath(value); +} function isAnyPsqlCommand(cmd) { return /^\s*psql\b/.test(cmd.trim()); } @@ -3400,9 +3407,15 @@ function buildUnsupportedGuidance() { return `This command is not supported for ~/.deeplake/memory/ operations. Only bash builtins are available, plus benchmark SQL mode via psql -At -F '|' -c "SELECT ...". Do NOT use python, python3, node, curl, or other interpreters. Rewrite your command using only bash tools and retry.`; } function buildPsqlOnlyGuidance() { + if (isFactsSessionsOnlyPsqlMode()) { + return "Hivemind recall is SQL-only in this mode. Use psql with the sessions, memory_facts, memory_entities, and fact_entity_links tables only. Do NOT use grep, cat, ls, Read, Glob, memory, graph, or filesystem paths for memory lookups."; + } return "Hivemind recall is SQL-only in this mode. Use psql with the memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links tables only. Do NOT use grep, cat, ls, Read, Glob, or filesystem paths for memory lookups."; } function buildPsqlSchemaGuidance() { + if (isFactsSessionsOnlyPsqlMode()) { + return "Only psql SELECT queries over sessions, memory_facts, memory_entities, and fact_entity_links are intercepted in SQL mode. Rewrite the query to reference only those tables with normal psql SELECT syntax."; + } return "Only psql SELECT queries over memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links are intercepted in SQL mode. Rewrite the query to reference only those tables with normal psql SELECT syntax."; } function runVirtualShell(cmd, shellBundle = SHELL_BUNDLE, logFn = log4) { @@ -3422,7 +3435,7 @@ async function processCodexPreToolUse(input, deps = {}) { const { config = loadConfig(), createApi = (table, activeConfig) => new DeeplakeApi(activeConfig.token, activeConfig.apiUrl, activeConfig.orgId, activeConfig.workspaceId, table), executeCompiledBashCommandFn = executeCompiledBashCommand, readVirtualPathContentsFn = readVirtualPathContents, readVirtualPathContentFn = readVirtualPathContent, listVirtualPathRowsFn = listVirtualPathRows, findVirtualPathsFn = findVirtualPaths, handleGrepDirectFn = handleGrepDirect, readCachedIndexContentFn = readCachedIndexContent, writeCachedIndexContentFn = writeCachedIndexContent, runVirtualShellFn = runVirtualShell, shellBundle = SHELL_BUNDLE, logFn = log4 } = deps; const cmd = input.tool_input?.command ?? ""; logFn(`hook fired: cmd=${cmd}`); - if (!touchesMemory(cmd) && !isAnyPsqlCommand(cmd)) + if (!touchesAnyMemoryPath(cmd) && !isAnyPsqlCommand(cmd)) return { action: "pass" }; if (isAnyPsqlCommand(cmd) && !isHivemindPsqlCommand(cmd)) { if (needsHivemindPsqlRewrite(cmd)) { @@ -3434,7 +3447,7 @@ async function processCodexPreToolUse(input, deps = {}) { } return { action: "pass" }; } - if (isPsqlMode() && touchesMemory(cmd)) { + if (isPsqlMode() && touchesAnyMemoryPath(cmd)) { return { action: "guide", output: buildPsqlOnlyGuidance(), diff --git a/src/hooks/codex/pre-tool-use.ts b/src/hooks/codex/pre-tool-use.ts index b2b57f0..c70f6ee 100644 --- a/src/hooks/codex/pre-tool-use.ts +++ b/src/hooks/codex/pre-tool-use.ts @@ -42,10 +42,25 @@ import { import { log as _log } from "../../utils/debug.js"; import { isDirectRun } from "../../utils/direct-run.js"; import { isSafe, touchesMemory, rewritePaths } from "../memory-path-utils.js"; -import { isIndexDisabled, isPsqlMode, isSessionsOnlyMode } from "../../utils/retrieval-mode.js"; +import { isFactsSessionsOnlyPsqlMode, isIndexDisabled, isPsqlMode, isSessionsOnlyMode } from "../../utils/retrieval-mode.js"; export { isSafe, touchesMemory, rewritePaths }; +function touchesVirtualMemoryPath(value: string): boolean { + const rewritten = rewritePaths(value).trim(); + return ( + rewritten === "/index.md" || + rewritten === "/summaries" || + rewritten.startsWith("/summaries/") || + rewritten === "/sessions" || + rewritten.startsWith("/sessions/") + ); +} + +function touchesAnyMemoryPath(value: string): boolean { + return touchesMemory(value) || touchesVirtualMemoryPath(value); +} + function isAnyPsqlCommand(cmd: string): boolean { return /^\s*psql\b/.test(cmd.trim()); } @@ -94,12 +109,21 @@ export function buildUnsupportedGuidance(): string { } export function buildPsqlOnlyGuidance(): string { + if (isFactsSessionsOnlyPsqlMode()) { + return "Hivemind recall is SQL-only in this mode. " + + "Use psql with the sessions, memory_facts, memory_entities, and fact_entity_links tables only. " + + "Do NOT use grep, cat, ls, Read, Glob, memory, graph, or filesystem paths for memory lookups."; + } return "Hivemind recall is SQL-only in this mode. " + "Use psql with the memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links tables only. " + "Do NOT use grep, cat, ls, Read, Glob, or filesystem paths for memory lookups."; } export function buildPsqlSchemaGuidance(): string { + if (isFactsSessionsOnlyPsqlMode()) { + return "Only psql SELECT queries over sessions, memory_facts, memory_entities, and fact_entity_links are intercepted in SQL mode. " + + "Rewrite the query to reference only those tables with normal psql SELECT syntax."; + } return "Only psql SELECT queries over memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links are intercepted in SQL mode. " + "Rewrite the query to reference only those tables with normal psql SELECT syntax."; } @@ -163,7 +187,7 @@ export async function processCodexPreToolUse( const cmd = input.tool_input?.command ?? ""; logFn(`hook fired: cmd=${cmd}`); - if (!touchesMemory(cmd) && !isAnyPsqlCommand(cmd)) return { action: "pass" }; + if (!touchesAnyMemoryPath(cmd) && !isAnyPsqlCommand(cmd)) return { action: "pass" }; if (isAnyPsqlCommand(cmd) && !isHivemindPsqlCommand(cmd)) { if (needsHivemindPsqlRewrite(cmd)) { @@ -176,7 +200,7 @@ export async function processCodexPreToolUse( return { action: "pass" }; } - if (isPsqlMode() && touchesMemory(cmd)) { + if (isPsqlMode() && touchesAnyMemoryPath(cmd)) { return { action: "guide", output: buildPsqlOnlyGuidance(), diff --git a/src/hooks/pre-tool-use.ts b/src/hooks/pre-tool-use.ts index 8288c3e..c7aab27 100644 --- a/src/hooks/pre-tool-use.ts +++ b/src/hooks/pre-tool-use.ts @@ -28,10 +28,25 @@ import { writeCachedIndexContent, } from "./query-cache.js"; import { isSafe, touchesMemory, rewritePaths } from "./memory-path-utils.js"; -import { isIndexDisabled, isPsqlMode, isSessionsOnlyMode } from "../utils/retrieval-mode.js"; +import { isFactsSessionsOnlyPsqlMode, isIndexDisabled, isPsqlMode, isSessionsOnlyMode } from "../utils/retrieval-mode.js"; export { isSafe, touchesMemory, rewritePaths }; +function touchesVirtualMemoryPath(value: string): boolean { + const rewritten = rewritePaths(value).trim(); + return ( + rewritten === "/index.md" || + rewritten === "/summaries" || + rewritten.startsWith("/summaries/") || + rewritten === "/sessions" || + rewritten.startsWith("/sessions/") + ); +} + +function touchesAnyMemoryPath(value: string): boolean { + return touchesMemory(value) || touchesVirtualMemoryPath(value); +} + function isAnyPsqlCommand(cmd: string): boolean { return /^\s*psql\b/.test(cmd.trim()); } @@ -49,12 +64,21 @@ function needsHivemindPsqlRewrite(cmd: string): boolean { } function buildPsqlOnlyGuidance(): string { + if (isFactsSessionsOnlyPsqlMode()) { + return "[RETRY REQUIRED] Hivemind recall is SQL-only in this mode. " + + "Use psql with the sessions, memory_facts, memory_entities, and fact_entity_links tables only. " + + "Do NOT use grep, cat, ls, Read, Glob, memory, graph, or filesystem paths for memory lookups."; + } return "[RETRY REQUIRED] Hivemind recall is SQL-only in this mode. " + "Use psql with the memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links tables only. " + "Do NOT use grep, cat, ls, Read, Glob, or filesystem paths for memory lookups."; } function buildPsqlSchemaGuidance(): string { + if (isFactsSessionsOnlyPsqlMode()) { + return "[RETRY REQUIRED] Only psql SELECT queries over sessions, memory_facts, memory_entities, and fact_entity_links are intercepted in SQL mode. " + + "Rewrite the query to reference only those tables with normal psql SELECT syntax."; + } return "[RETRY REQUIRED] Only psql SELECT queries over memory, sessions, graph_nodes, graph_edges, memory_facts, memory_entities, and fact_entity_links are intercepted in SQL mode. " + "Rewrite the query to reference only those tables with normal psql SELECT syntax."; } @@ -94,8 +118,8 @@ export function getShellCommand(toolName: string, toolInput: Record Date: Tue, 21 Apr 2026 11:52:01 -0700 Subject: [PATCH 7/7] Improve regex parity and hybrid retrieval plumbing --- claude-code/bundle/pre-tool-use.js | 244 +++++++++++++++--- claude-code/bundle/session-start.js | 9 +- claude-code/bundle/shell/deeplake-shell.js | 176 +++++++++++-- .../tests/bash-command-compiler.test.ts | 18 ++ .../tests/benchmark-replay-parity.test.ts | 94 +++++++ claude-code/tests/grep-core.test.ts | 74 +++--- claude-code/tests/hooks-source.test.ts | 11 + codex/bundle/pre-tool-use.js | 244 +++++++++++++++--- codex/bundle/session-start.js | 5 +- codex/bundle/shell/deeplake-shell.js | 176 +++++++++++-- src/hooks/bash-command-compiler.ts | 91 ++++++- src/hooks/codex/pre-tool-use.ts | 3 +- src/hooks/codex/session-start.ts | 5 +- src/hooks/pre-tool-use.ts | 3 +- src/hooks/session-start.ts | 9 +- src/shell/grep-core.ts | 225 ++++++++++------ src/utils/hybrid-fusion.ts | 127 +++++++++ 17 files changed, 1255 insertions(+), 259 deletions(-) create mode 100644 src/utils/hybrid-fusion.ts diff --git a/claude-code/bundle/pre-tool-use.js b/claude-code/bundle/pre-tool-use.js index 4177d4e..295234a 100755 --- a/claude-code/bundle/pre-tool-use.js +++ b/claude-code/bundle/pre-tool-use.js @@ -814,6 +814,103 @@ var HarrierEmbedder = class { } }; +// dist/src/utils/hybrid-fusion.js +function coerceFinite(value) { + return Number.isFinite(value) ? value : 0; +} +function normalizeWeights(vectorWeight, textWeight) { + const safeVector = Math.max(0, coerceFinite(vectorWeight)); + const safeText = Math.max(0, coerceFinite(textWeight)); + const total = safeVector + safeText; + if (total <= 0) + return { vectorWeight: 0.5, textWeight: 0.5 }; + return { + vectorWeight: safeVector / total, + textWeight: safeText / total + }; +} +function softmaxNormalizeScores(scores) { + if (scores.length === 0) + return []; + const safeScores = scores.map(coerceFinite); + const maxScore = Math.max(...safeScores); + const exps = safeScores.map((score) => Math.exp(score - maxScore)); + const sum = exps.reduce((acc, value) => acc + value, 0) || 1; + return exps.map((value) => value / sum); +} +function pickPreferredRow(existing, candidate) { + if (!existing) + return candidate; + if (candidate.score > existing.score) + return candidate; + if (candidate.score < existing.score) + return existing; + if (candidate.sourceOrder < existing.sourceOrder) + return candidate; + if (candidate.sourceOrder > existing.sourceOrder) + return existing; + if (candidate.creationDate < existing.creationDate) + return candidate; + if (candidate.creationDate > existing.creationDate) + return existing; + return candidate.path < existing.path ? candidate : existing; +} +function dedupeBestRows(rows) { + const bestByPath = /* @__PURE__ */ new Map(); + for (const row of rows) { + if (!row.path) + continue; + bestByPath.set(row.path, pickPreferredRow(bestByPath.get(row.path), row)); + } + return [...bestByPath.values()]; +} +function fuseRetrievalRows(args) { + const { textRows, vectorRows, limit } = args; + const { textWeight, vectorWeight } = normalizeWeights(args.vectorWeight, args.textWeight); + const dedupedTextRows = dedupeBestRows(textRows); + const dedupedVectorRows = dedupeBestRows(vectorRows); + const textNorm = softmaxNormalizeScores(dedupedTextRows.map((row) => row.score)); + const vectorNorm = softmaxNormalizeScores(dedupedVectorRows.map((row) => row.score)); + const fusedByPath = /* @__PURE__ */ new Map(); + for (let i = 0; i < dedupedTextRows.length; i++) { + const row = dedupedTextRows[i]; + fusedByPath.set(row.path, { + path: row.path, + content: row.content, + sourceOrder: row.sourceOrder, + creationDate: row.creationDate, + textScore: textNorm[i] ?? 0, + vectorScore: 0, + fusedScore: textWeight * (textNorm[i] ?? 0) + }); + } + for (let i = 0; i < dedupedVectorRows.length; i++) { + const row = dedupedVectorRows[i]; + const existing = fusedByPath.get(row.path); + const vectorScore = vectorNorm[i] ?? 0; + if (existing) { + if (existing.content.length === 0 && row.content.length > 0) + existing.content = row.content; + existing.sourceOrder = Math.min(existing.sourceOrder, row.sourceOrder); + if (!existing.creationDate || row.creationDate < existing.creationDate) + existing.creationDate = row.creationDate; + existing.vectorScore = vectorScore; + existing.fusedScore = textWeight * existing.textScore + vectorWeight * existing.vectorScore; + continue; + } + fusedByPath.set(row.path, { + path: row.path, + content: row.content, + sourceOrder: row.sourceOrder, + creationDate: row.creationDate, + textScore: 0, + vectorScore, + fusedScore: vectorWeight * vectorScore + }); + } + return [...fusedByPath.values()].sort((a, b) => b.fusedScore - a.fusedScore || b.vectorScore - a.vectorScore || b.textScore - a.textScore || a.sourceOrder - b.sourceOrder || a.creationDate.localeCompare(b.creationDate) || a.path.localeCompare(b.path)).slice(0, Math.max(0, limit)); +} + // dist/src/utils/retrieval-mode.js function isSessionsOnlyMode() { const raw = process.env["HIVEMIND_SESSIONS_ONLY"] ?? process.env["DEEPLAKE_SESSIONS_ONLY"] ?? ""; @@ -1111,16 +1208,15 @@ async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { const regexSessFilter = regexPattern ? buildRegexFilter("message::text", regexPattern, ignoreCase) : ""; const primarySessFilter = `${likeSessFilter}${regexSessFilter}`; const fallbackSessFilter = likeSessFilter; - const hasSqlRegexFilter = Boolean(regexMemFilter || regexSessFilter); const sessionsOnly = isSessionsOnlyMode(); const retrievalMode = getGrepRetrievalMode(); const semanticQueryText = (queryText ?? bm25QueryText ?? "").trim(); + const lexicalQueryText = (bm25QueryText ?? semanticQueryText).trim(); const useEmbeddingRetrieval = retrievalMode === "embedding" && semanticQueryText.length > 0; const useHybridRetrieval = retrievalMode === "hybrid" && semanticQueryText.length > 0; const useSummaryBm25 = retrievalMode === "classic" && !sessionsOnly && !isSummaryBm25Disabled() && Boolean(bm25QueryText); - const shouldUseFallbackCapablePrimary = useSummaryBm25 || hasSqlRegexFilter; const ensureSummaryBm25Index = api.ensureSummaryBm25Index; - if (useSummaryBm25 && typeof ensureSummaryBm25Index === "function") { + if ((useSummaryBm25 || useHybridRetrieval && !sessionsOnly && lexicalQueryText.length > 0) && typeof ensureSummaryBm25Index === "function") { await ensureSummaryBm25Index.call(api, memoryTable).catch(() => { }); } @@ -1137,21 +1233,35 @@ async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { const queryVectorSql = sqlFloat4Array(queryEmbedding); const vectorWeight = envNumber(DEFAULT_HYBRID_VECTOR_WEIGHT, "HIVEMIND_HYBRID_VECTOR_WEIGHT", "DEEPLAKE_HYBRID_VECTOR_WEIGHT"); const textWeight = envNumber(DEFAULT_HYBRID_TEXT_WEIGHT, "HIVEMIND_HYBRID_TEXT_WEIGHT", "DEEPLAKE_HYBRID_TEXT_WEIGHT"); - const buildSemanticCombinedQuery = () => { - const memQuery = useHybridRetrieval ? buildHybridSimilarityQuery(memoryTable, pathFilter, "summary::text", 0, "''", queryVectorSql, semanticQueryText, vectorWeight, textWeight, limit) : buildEmbeddingSimilarityQuery(memoryTable, pathFilter, "summary::text", 0, "''", queryVectorSql, limit); - const sessQuery = useHybridRetrieval ? buildHybridSimilarityQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", queryVectorSql, semanticQueryText, vectorWeight, textWeight, limit) : buildEmbeddingSimilarityQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", queryVectorSql, limit); - return sessionsOnly ? `SELECT path, content, source_order, creation_date FROM (${sessQuery}) AS combined ORDER BY path, source_order, creation_date` : `SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`; - }; - const rows2 = await api.query(buildSemanticCombinedQuery()); - return rows2.map((row) => ({ - path: String(row["path"]), - content: String(row["content"] ?? "") + const vectorQuery = buildScoredCombinedQuery(sessionsOnly, buildEmbeddingSimilarityQuery(memoryTable, pathFilter, "summary::text", 0, "''", queryVectorSql, limit), buildEmbeddingSimilarityQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", queryVectorSql, limit), limit); + if (!useHybridRetrieval) { + const rows2 = await api.query(vectorQuery); + return rows2.map((row) => ({ + path: String(row["path"]), + content: String(row["content"] ?? "") + })); + } + const lexicalQuery = buildScoredCombinedQuery(sessionsOnly, buildBm25SimilarityQuery(memoryTable, pathFilter, "summary::text", 0, "''", lexicalQueryText, limit), buildBm25SimilarityQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", lexicalQueryText, limit), limit); + const lexicalFallbackQuery = buildScoredCombinedQuery(sessionsOnly, buildHeuristicLexicalQuery(memoryTable, pathFilter, "summary::text", 0, "''", lexicalQueryText, limit), buildHeuristicLexicalQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", lexicalQueryText, limit), limit); + const [vectorRows, textRows] = await Promise.all([ + api.query(vectorQuery), + api.query(lexicalQuery).catch(() => api.query(lexicalFallbackQuery)) + ]); + return fuseRetrievalRows({ + textRows: mapScoredRows(textRows), + vectorRows: mapScoredRows(vectorRows), + textWeight, + vectorWeight, + limit + }).map((row) => ({ + path: row.path, + content: row.content })); } const primaryMemFilter = useSummaryBm25 ? "" : `${likeMemFilter}${regexMemFilter}`; const primaryQuery = buildCombinedQuery(primaryMemFilter, primarySessFilter, useSummaryBm25); const fallbackQuery = buildCombinedQuery(likeMemFilter, fallbackSessFilter, false); - const rows = shouldUseFallbackCapablePrimary ? await api.query(primaryQuery).catch(() => api.query(fallbackQuery)) : await api.query(primaryQuery); + const rows = useSummaryBm25 ? await api.query(primaryQuery).catch(() => api.query(fallbackQuery)) : await api.query(primaryQuery); return rows.map((row) => ({ path: String(row["path"]), content: String(row["content"] ?? "") @@ -1278,16 +1388,37 @@ function buildSummaryBm25Query(memoryTable, pathFilter, queryText, limit) { return `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter} ORDER BY (summary <#> '${sqlStr(queryText)}') DESC LIMIT ${limit}`; } function buildEmbeddingSimilarityQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryVectorSql, limit) { - return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date FROM "${tableName}" WHERE 1=1${pathFilter} AND embedding IS NOT NULL ORDER BY (embedding <#> ${queryVectorSql}) DESC LIMIT ${limit}`; + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date, (embedding <#> ${queryVectorSql}) AS score FROM "${tableName}" WHERE 1=1${pathFilter} AND embedding IS NOT NULL ORDER BY score DESC LIMIT ${limit}`; +} +function buildBm25SimilarityQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryText, limit) { + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date, (${contentExpr} <#> '${sqlStr(queryText)}') AS score FROM "${tableName}" WHERE 1=1${pathFilter} ORDER BY score DESC LIMIT ${limit}`; +} +function buildHeuristicLexicalQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryText, limit) { + const terms = [...new Set(queryText.split(/\s+/).map((term) => term.trim()).filter((term) => term.length >= 2))].slice(0, 8); + const clauses = terms.map((term) => `${contentExpr} ILIKE '%${sqlLike(term)}%'`); + const scoreTerms = [ + ...terms.map((term) => `CASE WHEN ${contentExpr} ILIKE '%${sqlLike(term)}%' THEN 1 ELSE 0 END`), + `CASE WHEN ${contentExpr} ILIKE '%${sqlLike(queryText)}%' THEN ${Math.max(1, Math.min(terms.length, 4))} ELSE 0 END` + ]; + const scoreExpr = scoreTerms.join(" + "); + const where = clauses.length > 0 ? ` AND (${clauses.join(" OR ")})` : ""; + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date, (${scoreExpr})::float AS score FROM "${tableName}" WHERE 1=1${pathFilter}${where} ORDER BY score DESC LIMIT ${limit}`; } -function buildHybridSimilarityQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryVectorSql, queryText, vectorWeight, textWeight, limit) { - return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date FROM "${tableName}" WHERE 1=1${pathFilter} AND embedding IS NOT NULL ORDER BY (((embedding, ${contentExpr})::deeplake_hybrid_record) <#> deeplake_hybrid_record(${queryVectorSql}, '${sqlStr(queryText)}', ${vectorWeight}, ${textWeight})) DESC LIMIT ${limit}`; +function buildScoredCombinedQuery(sessionsOnly, memQuery, sessQuery, limit) { + return sessionsOnly ? `SELECT path, content, source_order, creation_date, score FROM (${sessQuery}) AS combined ORDER BY score DESC, source_order, creation_date, path LIMIT ${limit}` : `SELECT path, content, source_order, creation_date, score FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY score DESC, source_order, creation_date, path LIMIT ${limit}`; } -function toSqlRegexPattern(pattern, ignoreCase) { +function mapScoredRows(rows) { + return rows.map((row) => ({ + path: String(row["path"] ?? ""), + content: String(row["content"] ?? ""), + sourceOrder: Number(row["source_order"] ?? 0), + creationDate: String(row["creation_date"] ?? ""), + score: Number.isFinite(Number(row["score"])) ? Number(row["score"]) : 0 + })); +} +function toSqlRegexPattern(pattern, _ignoreCase) { if (!pattern) return null; - if (ignoreCase) - return null; try { new RegExp(pattern); return translateRegexPatternToSql(pattern); @@ -1295,9 +1426,6 @@ function toSqlRegexPattern(pattern, ignoreCase) { return pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); } } -function isSqlRegexPushdownSafe(pattern) { - return !/[\\[\]{}^$]/.test(pattern) && !/\(\?/.test(pattern); -} function unwrapWholeRegexGroup(pattern) { if (!pattern.startsWith("(") || !pattern.endsWith(")")) return pattern; @@ -1400,12 +1528,10 @@ function buildContentPredicate(column, likeOp, patterns) { function buildRegexPredicate(column, pattern, ignoreCase) { if (!pattern) return ""; - if (!isSqlRegexPushdownSafe(pattern)) - return ""; const sqlPattern = toSqlRegexPattern(pattern, ignoreCase); if (!sqlPattern) return ""; - return `${column} ~ '${sqlStr(sqlPattern)}'`; + return `${column} ${ignoreCase ? "~*" : "~"} '${sqlStr(sqlPattern)}'`; } function compileGrepRegex(params) { const normalizedPattern = params.fixedString ? params.pattern : normalizeGrepRegexPattern(params.pattern); @@ -2098,6 +2224,13 @@ function sqlFloat4Array2(values) { return Math.fround(value).toString(); }).join(", ")}]::float4[]`; } +function quoteShellToken(token) { + if (token === "") + return "''"; + if (!/[\s"'\\|&;<>()[\]{}$*?]/.test(token)) + return token; + return `'${token.replace(/'/g, `'"'"'`)}'`; +} function isQuoted(ch) { return ch === "'" || ch === '"'; } @@ -2304,7 +2437,7 @@ function parseFindSpec(tokens) { return null; return { patterns, - execGrepCmd: execTokens.slice(0, -1).join(" ") + execGrepCmd: execTokens.slice(0, -1).map(quoteShellToken).join(" ") }; } return null; @@ -2681,24 +2814,50 @@ async function fetchSummaryCandidates(api, memoryTable, terms) { return []; const retrievalMode = getGrepRetrievalMode(); const phrase = filteredTerms.join(" "); - const clauses = filteredTerms.map((term) => `summary ILIKE '%${sqlLike(term)}%'`); - let sql; + let rows = []; if (retrievalMode === "embedding" || retrievalMode === "hybrid") { const embedder = getSummaryRetrievalEmbedder(); const [queryEmbedding] = await embedder.embedQueries([phrase]); if (!queryEmbedding) return []; const queryVectorSql = sqlFloat4Array2(queryEmbedding); - sql = retrievalMode === "hybrid" ? `SELECT path, summary, ((embedding, summary)::deeplake_hybrid_record <#> deeplake_hybrid_record(${queryVectorSql}, '${sqlStr(phrase)}', ${envNumber2(DEFAULT_HYBRID_VECTOR_WEIGHT2, "HIVEMIND_HYBRID_VECTOR_WEIGHT", "DEEPLAKE_HYBRID_VECTOR_WEIGHT")}, ${envNumber2(DEFAULT_HYBRID_TEXT_WEIGHT2, "HIVEMIND_HYBRID_TEXT_WEIGHT", "DEEPLAKE_HYBRID_TEXT_WEIGHT")})) AS score FROM "${memoryTable}" WHERE embedding IS NOT NULL ORDER BY score DESC LIMIT 8` : `SELECT path, summary, (embedding <#> ${queryVectorSql}) AS score FROM "${memoryTable}" WHERE embedding IS NOT NULL ORDER BY score DESC LIMIT 8`; + const vectorSql = `SELECT path, summary, (embedding <#> ${queryVectorSql}) AS score FROM "${memoryTable}" WHERE embedding IS NOT NULL ORDER BY score DESC LIMIT 8`; + if (retrievalMode === "embedding") { + rows = (await api.query(vectorSql)).map((row) => ({ + path: typeof row["path"] === "string" ? row["path"] : "", + summary: typeof row["summary"] === "string" ? row["summary"] : "" + })); + } else { + const textSql = `SELECT path, summary, summary <#> '${sqlStr(phrase)}' AS score FROM "${memoryTable}" ORDER BY score DESC LIMIT 8`; + const textFallbackSql = buildSummaryHeuristicQuery(memoryTable, filteredTerms, phrase); + const [vectorRows, textRows] = await Promise.all([ + api.query(vectorSql), + api.query(textSql).catch(() => api.query(textFallbackSql)) + ]); + rows = fuseRetrievalRows({ + textRows: mapSummaryRows(textRows), + vectorRows: mapSummaryRows(vectorRows), + textWeight: envNumber2(DEFAULT_HYBRID_TEXT_WEIGHT2, "HIVEMIND_HYBRID_TEXT_WEIGHT", "DEEPLAKE_HYBRID_TEXT_WEIGHT"), + vectorWeight: envNumber2(DEFAULT_HYBRID_VECTOR_WEIGHT2, "HIVEMIND_HYBRID_VECTOR_WEIGHT", "DEEPLAKE_HYBRID_VECTOR_WEIGHT"), + limit: 8 + }).map((row) => ({ + path: row.path, + summary: row.content + })); + } } else { const phraseSql = sqlStr(phrase); - sql = `SELECT path, summary, summary <#> '${phraseSql}' AS score FROM "${memoryTable}" WHERE ${clauses.join(" OR ")} ORDER BY score ASC LIMIT 8`; + const clauses = filteredTerms.map((term) => `summary ILIKE '%${sqlLike(term)}%'`); + const sql = `SELECT path, summary, summary <#> '${phraseSql}' AS score FROM "${memoryTable}" WHERE ${clauses.join(" OR ")} ORDER BY score DESC LIMIT 8`; + rows = (await api.query(sql).catch(() => api.query(buildSummaryHeuristicQuery(memoryTable, filteredTerms, phrase)))).map((row) => ({ + path: typeof row["path"] === "string" ? row["path"] : "", + summary: typeof row["summary"] === "string" ? row["summary"] : "" + })); } - const rows = await api.query(sql); const candidateMap = /* @__PURE__ */ new Map(); for (const row of rows) { - const path = typeof row["path"] === "string" ? row["path"] : ""; - const summary = typeof row["summary"] === "string" ? row["summary"] : ""; + const path = row.path; + const summary = row.summary; const sourcePath = extractSummarySourcePath(summary) || (extractSessionIdFromPath(path) ? `/sessions/${extractSessionIdFromPath(path)}.json` : ""); const sessionId = extractSessionIdFromPath(path) || extractSessionIdFromPath(sourcePath); addHybridCandidate(candidateMap, { @@ -2710,6 +2869,23 @@ async function fetchSummaryCandidates(api, memoryTable, terms) { } return [...candidateMap.values()]; } +function buildSummaryHeuristicQuery(memoryTable, filteredTerms, phrase) { + const clauses = filteredTerms.map((term) => `summary ILIKE '%${sqlLike(term)}%'`); + const scoreTerms = [ + ...filteredTerms.map((term) => `CASE WHEN summary ILIKE '%${sqlLike(term)}%' THEN 1 ELSE 0 END`), + `CASE WHEN summary ILIKE '%${sqlLike(phrase)}%' THEN ${Math.max(1, Math.min(filteredTerms.length, 4))} ELSE 0 END` + ]; + return `SELECT path, summary, (${scoreTerms.join(" + ")})::float AS score FROM "${memoryTable}" WHERE ${clauses.join(" OR ")} ORDER BY score DESC LIMIT 8`; +} +function mapSummaryRows(rows) { + return rows.map((row) => ({ + path: typeof row["path"] === "string" ? row["path"] : "", + content: typeof row["summary"] === "string" ? row["summary"] : "", + sourceOrder: 0, + creationDate: "", + score: Number.isFinite(Number(row["score"])) ? Number(row["score"]) : 0 + })); +} function prependCtes(sql, ctes) { if (ctes.length === 0) return sql; @@ -3379,7 +3555,7 @@ function rewritePaths(cmd) { // dist/src/hooks/pre-tool-use.js function touchesVirtualMemoryPath(value) { const rewritten = rewritePaths(value).trim(); - return rewritten === "/index.md" || rewritten === "/summaries" || rewritten.startsWith("/summaries/") || rewritten === "/sessions" || rewritten.startsWith("/sessions/"); + return rewritten === "/index.md" || rewritten === "/summaries" || rewritten.startsWith("/summaries/") || rewritten === "/sessions" || rewritten.startsWith("/sessions/") || /(^|[\s"'`])\/(?:index\.md|summaries(?:\/|\b)|sessions(?:\/|\b))/.test(rewritten); } function touchesAnyMemoryPath(value) { return touchesMemory(value) || touchesVirtualMemoryPath(value); diff --git a/claude-code/bundle/session-start.js b/claude-code/bundle/session-start.js index a741c46..484d175 100755 --- a/claude-code/bundle/session-start.js +++ b/claude-code/bundle/session-start.js @@ -355,12 +355,13 @@ SQL strategy: 1. Start with memory_entities to resolve the named person, project, place, or organization into a canonical entity. 2. Expand connected facts through fact_entity_links and memory_facts. 3. Use memory_facts to identify the small set of likely source sessions. -4. Ground every exact answer on sessions rows from those source sessions. +4. Ground every final answer on sessions rows from those source sessions. 5. Prefer small targeted SELECTs with ORDER BY and LIMIT 5-10. 6. Do not use filesystem commands, grep, cat, ls, Read, or Glob for recall in this mode. 7. Use sessions.text, sessions.speaker, sessions.turn_index, and sessions.source_date_time for transcript retrieval. Use sessions.message only when you need the raw JSON payload. -8. For identity, origin, relationship, preference, and "what did they decide" questions, prefer transcript grounding over paraphrased fact labels. -9. For list/profile questions, facts are for narrowing and aggregation; sessions are for final verification. +8. Sessions are the source of truth. Facts are only a helper index and synthesis layer. +9. For identity, origin, relationship, preference, and "what did they decide" questions, prefer transcript grounding over paraphrased fact labels. +10. For list/profile questions, facts are for narrowing and aggregation; sessions are for final verification. Good query patterns: - Canonical entity lookup: @@ -376,6 +377,7 @@ Good query patterns: Avoid these mistakes: - Do NOT query memory, graph_nodes, or graph_edges in this mode. +- Do NOT answer directly from memory_facts.summary, memory_entities.summary, or aliases when a relevant transcript row is available. - Do NOT use fact tables for exact quoted wording when a transcript row is available; use them to narrow and aggregate, then ground on sessions. - Do NOT filter sessions.message directly when sessions.text / sessions.speaker already contain the needed transcript fields. - Do NOT blend multiple different events when the question asks about one specific event. Prefer the most direct supporting row. @@ -384,6 +386,7 @@ Avoid these mistakes: Answer rules: - Return the smallest exact answer supported by the data. +- Sessions win over facts if they differ in detail or specificity. - Resolve relative dates against the session's own creation_date or transcript date metadata, not today's date. - Do not answer "not found" until you have checked both the fact layer and a likely sessions row for the named person. - For duration or age-style answers, preserve the stored relative phrase when it directly answers the question instead of over-converting it. diff --git a/claude-code/bundle/shell/deeplake-shell.js b/claude-code/bundle/shell/deeplake-shell.js index 2e88d4f..6546720 100755 --- a/claude-code/bundle/shell/deeplake-shell.js +++ b/claude-code/bundle/shell/deeplake-shell.js @@ -67502,6 +67502,103 @@ var HarrierEmbedder = class { } }; +// dist/src/utils/hybrid-fusion.js +function coerceFinite(value) { + return Number.isFinite(value) ? value : 0; +} +function normalizeWeights(vectorWeight, textWeight) { + const safeVector = Math.max(0, coerceFinite(vectorWeight)); + const safeText = Math.max(0, coerceFinite(textWeight)); + const total = safeVector + safeText; + if (total <= 0) + return { vectorWeight: 0.5, textWeight: 0.5 }; + return { + vectorWeight: safeVector / total, + textWeight: safeText / total + }; +} +function softmaxNormalizeScores(scores) { + if (scores.length === 0) + return []; + const safeScores = scores.map(coerceFinite); + const maxScore = Math.max(...safeScores); + const exps = safeScores.map((score) => Math.exp(score - maxScore)); + const sum = exps.reduce((acc, value) => acc + value, 0) || 1; + return exps.map((value) => value / sum); +} +function pickPreferredRow(existing, candidate) { + if (!existing) + return candidate; + if (candidate.score > existing.score) + return candidate; + if (candidate.score < existing.score) + return existing; + if (candidate.sourceOrder < existing.sourceOrder) + return candidate; + if (candidate.sourceOrder > existing.sourceOrder) + return existing; + if (candidate.creationDate < existing.creationDate) + return candidate; + if (candidate.creationDate > existing.creationDate) + return existing; + return candidate.path < existing.path ? candidate : existing; +} +function dedupeBestRows(rows) { + const bestByPath = /* @__PURE__ */ new Map(); + for (const row of rows) { + if (!row.path) + continue; + bestByPath.set(row.path, pickPreferredRow(bestByPath.get(row.path), row)); + } + return [...bestByPath.values()]; +} +function fuseRetrievalRows(args) { + const { textRows, vectorRows, limit } = args; + const { textWeight, vectorWeight } = normalizeWeights(args.vectorWeight, args.textWeight); + const dedupedTextRows = dedupeBestRows(textRows); + const dedupedVectorRows = dedupeBestRows(vectorRows); + const textNorm = softmaxNormalizeScores(dedupedTextRows.map((row) => row.score)); + const vectorNorm = softmaxNormalizeScores(dedupedVectorRows.map((row) => row.score)); + const fusedByPath = /* @__PURE__ */ new Map(); + for (let i11 = 0; i11 < dedupedTextRows.length; i11++) { + const row = dedupedTextRows[i11]; + fusedByPath.set(row.path, { + path: row.path, + content: row.content, + sourceOrder: row.sourceOrder, + creationDate: row.creationDate, + textScore: textNorm[i11] ?? 0, + vectorScore: 0, + fusedScore: textWeight * (textNorm[i11] ?? 0) + }); + } + for (let i11 = 0; i11 < dedupedVectorRows.length; i11++) { + const row = dedupedVectorRows[i11]; + const existing = fusedByPath.get(row.path); + const vectorScore = vectorNorm[i11] ?? 0; + if (existing) { + if (existing.content.length === 0 && row.content.length > 0) + existing.content = row.content; + existing.sourceOrder = Math.min(existing.sourceOrder, row.sourceOrder); + if (!existing.creationDate || row.creationDate < existing.creationDate) + existing.creationDate = row.creationDate; + existing.vectorScore = vectorScore; + existing.fusedScore = textWeight * existing.textScore + vectorWeight * existing.vectorScore; + continue; + } + fusedByPath.set(row.path, { + path: row.path, + content: row.content, + sourceOrder: row.sourceOrder, + creationDate: row.creationDate, + textScore: 0, + vectorScore, + fusedScore: vectorWeight * vectorScore + }); + } + return [...fusedByPath.values()].sort((a15, b26) => b26.fusedScore - a15.fusedScore || b26.vectorScore - a15.vectorScore || b26.textScore - a15.textScore || a15.sourceOrder - b26.sourceOrder || a15.creationDate.localeCompare(b26.creationDate) || a15.path.localeCompare(b26.path)).slice(0, Math.max(0, limit)); +} + // dist/src/utils/retrieval-mode.js function isSessionsOnlyMode() { const raw = process.env["HIVEMIND_SESSIONS_ONLY"] ?? process.env["DEEPLAKE_SESSIONS_ONLY"] ?? ""; @@ -67791,16 +67888,15 @@ async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { const regexSessFilter = regexPattern ? buildRegexFilter("message::text", regexPattern, ignoreCase) : ""; const primarySessFilter = `${likeSessFilter}${regexSessFilter}`; const fallbackSessFilter = likeSessFilter; - const hasSqlRegexFilter = Boolean(regexMemFilter || regexSessFilter); const sessionsOnly = isSessionsOnlyMode(); const retrievalMode = getGrepRetrievalMode(); const semanticQueryText = (queryText ?? bm25QueryText ?? "").trim(); + const lexicalQueryText = (bm25QueryText ?? semanticQueryText).trim(); const useEmbeddingRetrieval = retrievalMode === "embedding" && semanticQueryText.length > 0; const useHybridRetrieval = retrievalMode === "hybrid" && semanticQueryText.length > 0; const useSummaryBm25 = retrievalMode === "classic" && !sessionsOnly && !isSummaryBm25Disabled() && Boolean(bm25QueryText); - const shouldUseFallbackCapablePrimary = useSummaryBm25 || hasSqlRegexFilter; const ensureSummaryBm25Index = api.ensureSummaryBm25Index; - if (useSummaryBm25 && typeof ensureSummaryBm25Index === "function") { + if ((useSummaryBm25 || useHybridRetrieval && !sessionsOnly && lexicalQueryText.length > 0) && typeof ensureSummaryBm25Index === "function") { await ensureSummaryBm25Index.call(api, memoryTable).catch(() => { }); } @@ -67817,21 +67913,35 @@ async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { const queryVectorSql = sqlFloat4Array(queryEmbedding); const vectorWeight = envNumber(DEFAULT_HYBRID_VECTOR_WEIGHT, "HIVEMIND_HYBRID_VECTOR_WEIGHT", "DEEPLAKE_HYBRID_VECTOR_WEIGHT"); const textWeight = envNumber(DEFAULT_HYBRID_TEXT_WEIGHT, "HIVEMIND_HYBRID_TEXT_WEIGHT", "DEEPLAKE_HYBRID_TEXT_WEIGHT"); - const buildSemanticCombinedQuery = () => { - const memQuery = useHybridRetrieval ? buildHybridSimilarityQuery(memoryTable, pathFilter, "summary::text", 0, "''", queryVectorSql, semanticQueryText, vectorWeight, textWeight, limit) : buildEmbeddingSimilarityQuery(memoryTable, pathFilter, "summary::text", 0, "''", queryVectorSql, limit); - const sessQuery = useHybridRetrieval ? buildHybridSimilarityQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", queryVectorSql, semanticQueryText, vectorWeight, textWeight, limit) : buildEmbeddingSimilarityQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", queryVectorSql, limit); - return sessionsOnly ? `SELECT path, content, source_order, creation_date FROM (${sessQuery}) AS combined ORDER BY path, source_order, creation_date` : `SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`; - }; - const rows2 = await api.query(buildSemanticCombinedQuery()); - return rows2.map((row) => ({ - path: String(row["path"]), - content: String(row["content"] ?? "") + const vectorQuery = buildScoredCombinedQuery(sessionsOnly, buildEmbeddingSimilarityQuery(memoryTable, pathFilter, "summary::text", 0, "''", queryVectorSql, limit), buildEmbeddingSimilarityQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", queryVectorSql, limit), limit); + if (!useHybridRetrieval) { + const rows2 = await api.query(vectorQuery); + return rows2.map((row) => ({ + path: String(row["path"]), + content: String(row["content"] ?? "") + })); + } + const lexicalQuery = buildScoredCombinedQuery(sessionsOnly, buildBm25SimilarityQuery(memoryTable, pathFilter, "summary::text", 0, "''", lexicalQueryText, limit), buildBm25SimilarityQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", lexicalQueryText, limit), limit); + const lexicalFallbackQuery = buildScoredCombinedQuery(sessionsOnly, buildHeuristicLexicalQuery(memoryTable, pathFilter, "summary::text", 0, "''", lexicalQueryText, limit), buildHeuristicLexicalQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", lexicalQueryText, limit), limit); + const [vectorRows, textRows] = await Promise.all([ + api.query(vectorQuery), + api.query(lexicalQuery).catch(() => api.query(lexicalFallbackQuery)) + ]); + return fuseRetrievalRows({ + textRows: mapScoredRows(textRows), + vectorRows: mapScoredRows(vectorRows), + textWeight, + vectorWeight, + limit + }).map((row) => ({ + path: row.path, + content: row.content })); } const primaryMemFilter = useSummaryBm25 ? "" : `${likeMemFilter}${regexMemFilter}`; const primaryQuery = buildCombinedQuery(primaryMemFilter, primarySessFilter, useSummaryBm25); const fallbackQuery = buildCombinedQuery(likeMemFilter, fallbackSessFilter, false); - const rows = shouldUseFallbackCapablePrimary ? await api.query(primaryQuery).catch(() => api.query(fallbackQuery)) : await api.query(primaryQuery); + const rows = useSummaryBm25 ? await api.query(primaryQuery).catch(() => api.query(fallbackQuery)) : await api.query(primaryQuery); return rows.map((row) => ({ path: String(row["path"]), content: String(row["content"] ?? "") @@ -67968,16 +68078,37 @@ function buildSummaryBm25Query(memoryTable, pathFilter, queryText, limit) { return `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter} ORDER BY (summary <#> '${sqlStr(queryText)}') DESC LIMIT ${limit}`; } function buildEmbeddingSimilarityQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryVectorSql, limit) { - return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date FROM "${tableName}" WHERE 1=1${pathFilter} AND embedding IS NOT NULL ORDER BY (embedding <#> ${queryVectorSql}) DESC LIMIT ${limit}`; + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date, (embedding <#> ${queryVectorSql}) AS score FROM "${tableName}" WHERE 1=1${pathFilter} AND embedding IS NOT NULL ORDER BY score DESC LIMIT ${limit}`; +} +function buildBm25SimilarityQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryText, limit) { + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date, (${contentExpr} <#> '${sqlStr(queryText)}') AS score FROM "${tableName}" WHERE 1=1${pathFilter} ORDER BY score DESC LIMIT ${limit}`; +} +function buildHeuristicLexicalQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryText, limit) { + const terms = [...new Set(queryText.split(/\s+/).map((term) => term.trim()).filter((term) => term.length >= 2))].slice(0, 8); + const clauses = terms.map((term) => `${contentExpr} ILIKE '%${sqlLike(term)}%'`); + const scoreTerms = [ + ...terms.map((term) => `CASE WHEN ${contentExpr} ILIKE '%${sqlLike(term)}%' THEN 1 ELSE 0 END`), + `CASE WHEN ${contentExpr} ILIKE '%${sqlLike(queryText)}%' THEN ${Math.max(1, Math.min(terms.length, 4))} ELSE 0 END` + ]; + const scoreExpr = scoreTerms.join(" + "); + const where = clauses.length > 0 ? ` AND (${clauses.join(" OR ")})` : ""; + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date, (${scoreExpr})::float AS score FROM "${tableName}" WHERE 1=1${pathFilter}${where} ORDER BY score DESC LIMIT ${limit}`; } -function buildHybridSimilarityQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryVectorSql, queryText, vectorWeight, textWeight, limit) { - return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date FROM "${tableName}" WHERE 1=1${pathFilter} AND embedding IS NOT NULL ORDER BY (((embedding, ${contentExpr})::deeplake_hybrid_record) <#> deeplake_hybrid_record(${queryVectorSql}, '${sqlStr(queryText)}', ${vectorWeight}, ${textWeight})) DESC LIMIT ${limit}`; +function buildScoredCombinedQuery(sessionsOnly, memQuery, sessQuery, limit) { + return sessionsOnly ? `SELECT path, content, source_order, creation_date, score FROM (${sessQuery}) AS combined ORDER BY score DESC, source_order, creation_date, path LIMIT ${limit}` : `SELECT path, content, source_order, creation_date, score FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY score DESC, source_order, creation_date, path LIMIT ${limit}`; } -function toSqlRegexPattern(pattern, ignoreCase) { +function mapScoredRows(rows) { + return rows.map((row) => ({ + path: String(row["path"] ?? ""), + content: String(row["content"] ?? ""), + sourceOrder: Number(row["source_order"] ?? 0), + creationDate: String(row["creation_date"] ?? ""), + score: Number.isFinite(Number(row["score"])) ? Number(row["score"]) : 0 + })); +} +function toSqlRegexPattern(pattern, _ignoreCase) { if (!pattern) return null; - if (ignoreCase) - return null; try { new RegExp(pattern); return translateRegexPatternToSql(pattern); @@ -67985,9 +68116,6 @@ function toSqlRegexPattern(pattern, ignoreCase) { return pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); } } -function isSqlRegexPushdownSafe(pattern) { - return !/[\\[\]{}^$]/.test(pattern) && !/\(\?/.test(pattern); -} function unwrapWholeRegexGroup(pattern) { if (!pattern.startsWith("(") || !pattern.endsWith(")")) return pattern; @@ -68090,12 +68218,10 @@ function buildContentPredicate(column, likeOp, patterns) { function buildRegexPredicate(column, pattern, ignoreCase) { if (!pattern) return ""; - if (!isSqlRegexPushdownSafe(pattern)) - return ""; const sqlPattern = toSqlRegexPattern(pattern, ignoreCase); if (!sqlPattern) return ""; - return `${column} ~ '${sqlStr(sqlPattern)}'`; + return `${column} ${ignoreCase ? "~*" : "~"} '${sqlStr(sqlPattern)}'`; } function compileGrepRegex(params) { const normalizedPattern = params.fixedString ? params.pattern : normalizeGrepRegexPattern(params.pattern); diff --git a/claude-code/tests/bash-command-compiler.test.ts b/claude-code/tests/bash-command-compiler.test.ts index b656bc3..b7c7114 100644 --- a/claude-code/tests/bash-command-compiler.test.ts +++ b/claude-code/tests/bash-command-compiler.test.ts @@ -273,6 +273,24 @@ describe("bash-command-compiler parsing", () => { }, lineLimit: 10, }); + expect(parseCompiledSegment("find /sessions -name '*.json' -exec grep -Eli 'support group|lgbtq support' {} \\; | head -10")).toEqual({ + kind: "find_grep", + dir: "/sessions", + patterns: ["*.json"], + params: { + pattern: "support group|lgbtq support", + targetPath: "{}", + recursive: false, + ignoreCase: true, + wordMatch: false, + filesOnly: true, + countOnly: false, + lineNumber: false, + invertMatch: false, + fixedString: false, + }, + lineLimit: 10, + }); expect(parseCompiledSegment("grep -i 'age\\|birthday\\|born.*19\\|born.*20' /sessions/*.json 2>/dev/null | head -3")).toEqual({ kind: "grep", params: { diff --git a/claude-code/tests/benchmark-replay-parity.test.ts b/claude-code/tests/benchmark-replay-parity.test.ts index e1e0c66..d88b350 100644 --- a/claude-code/tests/benchmark-replay-parity.test.ts +++ b/claude-code/tests/benchmark-replay-parity.test.ts @@ -198,6 +198,48 @@ describe("benchmark replay parity", () => { expect(virtual).toContain('"text": "We keep classic kids\' books like Dr. Seuss on the bookshelf."'); }); + it("matches file-list output for find -exec grep with regex alternation", async () => { + const files = [ + buildSessionFile(6, [ + { dia_id: "D6:1", speaker: "Melanie", text: "We keep classic kids' books like Dr. Seuss on the bookshelf." }, + { dia_id: "D6:2", speaker: "Caroline", text: "That sounds perfect for the kids." }, + ]), + buildSessionFile(7, [ + { dia_id: "D7:1", speaker: "Caroline", text: "I just started a new counseling course." }, + ]), + ]; + const root = writeFixture(files); + roots.push(root); + + const command = "find /sessions -name '*.json' -exec grep -El 'Dr. Seuss|bookshelf' {} \\; | head -10"; + const local = runLocalBash(root, command); + const virtual = await runVirtualCommand(files, command); + + expect(local.replaceAll(root, "")).toEqual(virtual); + expect(virtual).toBe("/sessions/conv_0_session_6.json"); + }); + + it("matches file-list output for case-insensitive find -exec grep regex", async () => { + const files = [ + buildSessionFile(10, [ + { dia_id: "D10:1", speaker: "Caroline", text: "I joined the LGBTQ support group last Tuesday, July 18, 2023." }, + { dia_id: "D10:2", speaker: "Melanie", text: "That sounds like such a good step." }, + ]), + buildSessionFile(11, [ + { dia_id: "D11:1", speaker: "Caroline", text: "I moved here from Sweden four years ago." }, + ]), + ]; + const root = writeFixture(files); + roots.push(root); + + const command = "find /sessions -name '*.json' -exec grep -Eli 'support group|lgbtq support' {} \\; | head -10"; + const local = runLocalBash(root, command); + const virtual = await runVirtualCommand(files, command); + + expect(local.replaceAll(root, "")).toEqual(virtual); + expect(virtual).toBe("/sessions/conv_0_session_10.json"); + }); + it("keeps the 18th-birthday shell-loop case explicitly divergent by returning retry guidance", async () => { const files = [ buildSessionFile(12, [ @@ -349,4 +391,56 @@ describe("benchmark replay parity", () => { expect(local.replaceAll(root, "")).toEqual(virtual); expect(local).toContain("Charlotte's Web"); }); + + it("matches raw output for grep --regexp= alternation over summaries", async () => { + const files: FixtureFile[] = [ + { + path: "/summaries/locomo/conv_0_session_6_summary.md", + content: [ + "# Session 6", + "## Searchable Facts", + "- Melanie said Charlotte's Web was her favorite book as a child.", + "- The family keeps classic kids' books on the bookshelf.", + "", + ].join("\n"), + }, + { + path: "/summaries/locomo/conv_0_session_7_summary.md", + content: [ + "# Session 7", + "## Searchable Facts", + "- Caroline started a new counseling course.", + "", + ].join("\n"), + }, + ]; + const root = writeFixture(files); + roots.push(root); + + const command = "grep -r -i --regexp='book\\|read' /summaries/locomo/conv_0_session_*.md"; + const local = runLocalBash(root, command); + const virtual = await runVirtualCommand(files, command); + + expect(local.replaceAll(root, "")).toEqual(virtual); + expect(virtual).toContain("Charlotte's Web"); + }); + + it("matches raw output for quoted regex over pretty-printed session json", async () => { + const files = [ + buildSessionFile(10, [ + { dia_id: "D10:12", speaker: "Melanie", text: "We camped near a mountain lake in a state park last summer." }, + { dia_id: "D10:13", speaker: "Caroline", text: "That sounds beautiful." }, + ]), + ]; + const root = writeFixture(files); + roots.push(root); + + const command = "grep -r -E '\"dia_id\": \"D10:12\"|\"text\": \"We camped near a mountain lake' /sessions/conv_0_session_10.json"; + const local = runLocalBash(root, command); + const virtual = await runVirtualCommand(files, command); + + expect(local.replaceAll(root, "")).toEqual(virtual); + expect(virtual).toContain('"dia_id": "D10:12"'); + expect(virtual).toContain('"text": "We camped near a mountain lake in a state park last summer."'); + }); }); diff --git a/claude-code/tests/grep-core.test.ts b/claude-code/tests/grep-core.test.ts index 8bfd2b3..626151b 100644 --- a/claude-code/tests/grep-core.test.ts +++ b/claude-code/tests/grep-core.test.ts @@ -683,7 +683,7 @@ describe("searchDeeplakeTables", () => { } }); - it("uses deeplake hybrid record scoring when retrieval mode is hybrid", async () => { + it("runs separate lexical and vector queries then fuses them when retrieval mode is hybrid", async () => { const prevMode = process.env.HIVEMIND_GREP_RETRIEVAL_MODE; const prevVector = process.env.HIVEMIND_HYBRID_VECTOR_WEIGHT; const prevText = process.env.HIVEMIND_HYBRID_TEXT_WEIGHT; @@ -692,8 +692,18 @@ describe("searchDeeplakeTables", () => { process.env.HIVEMIND_HYBRID_TEXT_WEIGHT = "0.4"; const embedSpy = vi.spyOn(HarrierEmbedder.prototype, "embedQueries").mockResolvedValue([[0.1, 0.2, 0.3]]); try { - const api = mockApi([]); - await searchDeeplakeTables(api, "memory", "sessions", { + const api = { + query: vi.fn() + .mockResolvedValueOnce([ + { path: "/summaries/shared.md", content: "shared", source_order: 0, creation_date: "", score: 5 }, + { path: "/sessions/vector.json", content: "vector", source_order: 1, creation_date: "2024-01-01", score: 1 }, + ]) + .mockResolvedValueOnce([ + { path: "/summaries/shared.md", content: "shared", source_order: 0, creation_date: "", score: 4 }, + { path: "/sessions/text.json", content: "text", source_order: 1, creation_date: "2024-01-02", score: 3 }, + ]), + } as any; + const rows = await searchDeeplakeTables(api, "memory", "sessions", { pathFilter: "", contentScanOnly: false, likeOp: "ILIKE", @@ -702,11 +712,19 @@ describe("searchDeeplakeTables", () => { bm25QueryText: "book novel literature", limit: 50, }); - const sql = api.query.mock.calls[0][0] as string; expect(embedSpy).toHaveBeenCalledWith(["book novel literature"]); - expect(sql).toContain("deeplake_hybrid_record"); - expect(sql).toContain("0.6, 0.4"); - expect(sql).toContain("ARRAY[0.10000000149011612"); + expect(api.query).toHaveBeenCalledTimes(2); + const [vectorSql, textSql] = api.query.mock.calls.map((call: unknown[]) => call[0] as string); + expect(vectorSql).toContain("embedding <#> ARRAY[0.10000000149011612"); + expect(textSql).toContain("summary::text <#> 'book novel literature'"); + expect(textSql).toContain("message::text <#> 'book novel literature'"); + expect(vectorSql).not.toContain("deeplake_hybrid_record"); + expect(textSql).not.toContain("deeplake_hybrid_record"); + expect(rows.map((row) => row.path)).toEqual([ + "/summaries/shared.md", + "/sessions/text.json", + "/sessions/vector.json", + ]); } finally { embedSpy.mockRestore(); if (prevMode === undefined) delete process.env.HIVEMIND_GREP_RETRIEVAL_MODE; @@ -763,7 +781,7 @@ describe("searchDeeplakeTables", () => { expect(sql).toContain("message::text ~ 'relationship|partner|married'"); }); - it("skips SQL regex pushdown for ignore-case regex scans", async () => { + it("uses case-insensitive regex pushdown for ignore-case regex scans", async () => { const api = mockApi([]); await searchDeeplakeTables(api, "m", "s", { pathFilter: "", @@ -775,8 +793,8 @@ describe("searchDeeplakeTables", () => { }); const sql = api.query.mock.calls[0][0] as string; expect(sql).toContain("summary::text ILIKE '%relationship%'"); - expect(sql).not.toContain("summary::text ~"); - expect(sql).not.toContain("message::text ~"); + expect(sql).toContain("summary::text ~* 'relationship|partner|married'"); + expect(sql).toContain("message::text ~* 'relationship|partner|married'"); }); it("uses OR ILIKE prefilters for grep BRE alternation patterns", async () => { @@ -801,7 +819,7 @@ describe("searchDeeplakeTables", () => { expect(sql).not.toContain("ILIKE '%book|novel|literature%'"); }); - it("keeps unsupported bracketed regex patterns out of SQL pushdown", async () => { + it("pushes down escaped regex literals for invalid bracketed patterns", async () => { const api = mockApi([]); await searchDeeplakeTables(api, "m", "s", { pathFilter: " AND path = '/index.md'", @@ -812,30 +830,8 @@ describe("searchDeeplakeTables", () => { }); const sql = api.query.mock.calls[0][0] as string; expect(sql).toContain("path = '/index.md'"); - expect(sql).not.toContain("summary::text ~"); - expect(sql).not.toContain("message::text ~"); - }); - - it("falls back to OR LIKE prefilters when regex SQL is rejected", async () => { - const api = { - query: vi.fn() - .mockRejectedValueOnce(new Error("regex operator not supported")) - .mockResolvedValueOnce([]), - } as any; - await searchDeeplakeTables(api, "m", "s", { - pathFilter: "", - contentScanOnly: true, - likeOp: "LIKE", - escapedPattern: "relationship|partner|married", - regexPattern: "relationship|partner|married", - prefilterPatterns: ["relationship", "partner", "married"], - }); - expect(api.query).toHaveBeenCalledTimes(2); - const fallbackSql = api.query.mock.calls[1][0] as string; - expect(fallbackSql).toContain("summary::text LIKE '%relationship%'"); - expect(fallbackSql).toContain("summary::text LIKE '%partner%'"); - expect(fallbackSql).toContain("summary::text LIKE '%married%'"); - expect(fallbackSql).not.toContain("relationship|partner|married"); + expect(sql).toContain("summary::text ~ '\\\\^- \\\\[conv_0_session_\\\\.\\\\*\\\\\\\\\\\\]'"); + expect(sql).toContain("message::text ~ '\\\\^- \\\\[conv_0_session_\\\\.\\\\*\\\\\\\\\\\\]'"); }); it("falls back to summary ILIKE when BM25 query is rejected", async () => { @@ -1144,7 +1140,7 @@ describe("regex literal prefilter", () => { it("builds SQL-safe regex patterns conservatively", () => { expect(toSqlRegexPattern("foo.*bar", false)).toBe("foo.*bar"); - expect(toSqlRegexPattern("foo.*bar", true)).toBeNull(); + expect(toSqlRegexPattern("foo.*bar", true)).toBe("foo.*bar"); expect(toSqlRegexPattern("^- [conv_0_session_.*\\]", false)).toBe("\\^- \\[conv_0_session_\\.\\*\\\\\\]"); expect(toSqlRegexPattern("\\bitem\\d+", false)).toBe("\\yitem[[:digit:]]+"); expect(toSqlRegexPattern("foo(?=bar)", false)).toBeNull(); @@ -1292,7 +1288,7 @@ describe("regex literal prefilter", () => { expect(sql).toContain("message::text ILIKE '%ten years ago%'"); }); - it("avoids SQL regex pushdown for bracket-anchored patterns and matches the raw-json local output", async () => { + it("uses SQL regex pushdown for bracket-anchored patterns and matches the raw-json local output", async () => { const rows = [ { path: "/sessions/conv_0_session_2.json", @@ -1320,8 +1316,8 @@ describe("regex literal prefilter", () => { expect(remote).toEqual(local); expect(remote).toEqual([]); const sql = api.query.mock.calls[0][0] as string; - expect(sql).not.toContain("summary::text ~"); - expect(sql).not.toContain("message::text ~"); + expect(sql).toContain("ORDER BY (summary <#> 'D2:1') DESC"); + expect(sql).toContain("message::text ~ '^\\\\[D2:1\\\\]'"); }); }); diff --git a/claude-code/tests/hooks-source.test.ts b/claude-code/tests/hooks-source.test.ts index db32e1c..700ee34 100644 --- a/claude-code/tests/hooks-source.test.ts +++ b/claude-code/tests/hooks-source.test.ts @@ -376,6 +376,17 @@ describe("claude pre-tool source", () => { expect(guidance?.command).toContain("RETRY REQUIRED"); expect(guidance?.command).toContain("sessions, memory_facts, memory_entities, and fact_entity_links"); expect(guidance?.description).toContain("unsupported command"); + + const bashGuidance = await processPreToolUse({ + session_id: "s1", + tool_name: "Bash", + tool_input: { command: "cat /sessions/conv_0_session_1.json" }, + tool_use_id: "tu-facts-sessions-only-bash", + }, { + config: baseConfig, + }); + expect(bashGuidance?.command).toContain("RETRY REQUIRED"); + expect(bashGuidance?.description).toContain("unsupported command"); } finally { if (prevPsql === undefined) delete process.env.HIVEMIND_PSQL_MODE; else process.env.HIVEMIND_PSQL_MODE = prevPsql; diff --git a/codex/bundle/pre-tool-use.js b/codex/bundle/pre-tool-use.js index adc4c75..76f3919 100755 --- a/codex/bundle/pre-tool-use.js +++ b/codex/bundle/pre-tool-use.js @@ -801,6 +801,103 @@ var HarrierEmbedder = class { } }; +// dist/src/utils/hybrid-fusion.js +function coerceFinite(value) { + return Number.isFinite(value) ? value : 0; +} +function normalizeWeights(vectorWeight, textWeight) { + const safeVector = Math.max(0, coerceFinite(vectorWeight)); + const safeText = Math.max(0, coerceFinite(textWeight)); + const total = safeVector + safeText; + if (total <= 0) + return { vectorWeight: 0.5, textWeight: 0.5 }; + return { + vectorWeight: safeVector / total, + textWeight: safeText / total + }; +} +function softmaxNormalizeScores(scores) { + if (scores.length === 0) + return []; + const safeScores = scores.map(coerceFinite); + const maxScore = Math.max(...safeScores); + const exps = safeScores.map((score) => Math.exp(score - maxScore)); + const sum = exps.reduce((acc, value) => acc + value, 0) || 1; + return exps.map((value) => value / sum); +} +function pickPreferredRow(existing, candidate) { + if (!existing) + return candidate; + if (candidate.score > existing.score) + return candidate; + if (candidate.score < existing.score) + return existing; + if (candidate.sourceOrder < existing.sourceOrder) + return candidate; + if (candidate.sourceOrder > existing.sourceOrder) + return existing; + if (candidate.creationDate < existing.creationDate) + return candidate; + if (candidate.creationDate > existing.creationDate) + return existing; + return candidate.path < existing.path ? candidate : existing; +} +function dedupeBestRows(rows) { + const bestByPath = /* @__PURE__ */ new Map(); + for (const row of rows) { + if (!row.path) + continue; + bestByPath.set(row.path, pickPreferredRow(bestByPath.get(row.path), row)); + } + return [...bestByPath.values()]; +} +function fuseRetrievalRows(args) { + const { textRows, vectorRows, limit } = args; + const { textWeight, vectorWeight } = normalizeWeights(args.vectorWeight, args.textWeight); + const dedupedTextRows = dedupeBestRows(textRows); + const dedupedVectorRows = dedupeBestRows(vectorRows); + const textNorm = softmaxNormalizeScores(dedupedTextRows.map((row) => row.score)); + const vectorNorm = softmaxNormalizeScores(dedupedVectorRows.map((row) => row.score)); + const fusedByPath = /* @__PURE__ */ new Map(); + for (let i = 0; i < dedupedTextRows.length; i++) { + const row = dedupedTextRows[i]; + fusedByPath.set(row.path, { + path: row.path, + content: row.content, + sourceOrder: row.sourceOrder, + creationDate: row.creationDate, + textScore: textNorm[i] ?? 0, + vectorScore: 0, + fusedScore: textWeight * (textNorm[i] ?? 0) + }); + } + for (let i = 0; i < dedupedVectorRows.length; i++) { + const row = dedupedVectorRows[i]; + const existing = fusedByPath.get(row.path); + const vectorScore = vectorNorm[i] ?? 0; + if (existing) { + if (existing.content.length === 0 && row.content.length > 0) + existing.content = row.content; + existing.sourceOrder = Math.min(existing.sourceOrder, row.sourceOrder); + if (!existing.creationDate || row.creationDate < existing.creationDate) + existing.creationDate = row.creationDate; + existing.vectorScore = vectorScore; + existing.fusedScore = textWeight * existing.textScore + vectorWeight * existing.vectorScore; + continue; + } + fusedByPath.set(row.path, { + path: row.path, + content: row.content, + sourceOrder: row.sourceOrder, + creationDate: row.creationDate, + textScore: 0, + vectorScore, + fusedScore: vectorWeight * vectorScore + }); + } + return [...fusedByPath.values()].sort((a, b) => b.fusedScore - a.fusedScore || b.vectorScore - a.vectorScore || b.textScore - a.textScore || a.sourceOrder - b.sourceOrder || a.creationDate.localeCompare(b.creationDate) || a.path.localeCompare(b.path)).slice(0, Math.max(0, limit)); +} + // dist/src/utils/retrieval-mode.js function isSessionsOnlyMode() { const raw = process.env["HIVEMIND_SESSIONS_ONLY"] ?? process.env["DEEPLAKE_SESSIONS_ONLY"] ?? ""; @@ -1098,16 +1195,15 @@ async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { const regexSessFilter = regexPattern ? buildRegexFilter("message::text", regexPattern, ignoreCase) : ""; const primarySessFilter = `${likeSessFilter}${regexSessFilter}`; const fallbackSessFilter = likeSessFilter; - const hasSqlRegexFilter = Boolean(regexMemFilter || regexSessFilter); const sessionsOnly = isSessionsOnlyMode(); const retrievalMode = getGrepRetrievalMode(); const semanticQueryText = (queryText ?? bm25QueryText ?? "").trim(); + const lexicalQueryText = (bm25QueryText ?? semanticQueryText).trim(); const useEmbeddingRetrieval = retrievalMode === "embedding" && semanticQueryText.length > 0; const useHybridRetrieval = retrievalMode === "hybrid" && semanticQueryText.length > 0; const useSummaryBm25 = retrievalMode === "classic" && !sessionsOnly && !isSummaryBm25Disabled() && Boolean(bm25QueryText); - const shouldUseFallbackCapablePrimary = useSummaryBm25 || hasSqlRegexFilter; const ensureSummaryBm25Index = api.ensureSummaryBm25Index; - if (useSummaryBm25 && typeof ensureSummaryBm25Index === "function") { + if ((useSummaryBm25 || useHybridRetrieval && !sessionsOnly && lexicalQueryText.length > 0) && typeof ensureSummaryBm25Index === "function") { await ensureSummaryBm25Index.call(api, memoryTable).catch(() => { }); } @@ -1124,21 +1220,35 @@ async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { const queryVectorSql = sqlFloat4Array(queryEmbedding); const vectorWeight = envNumber(DEFAULT_HYBRID_VECTOR_WEIGHT, "HIVEMIND_HYBRID_VECTOR_WEIGHT", "DEEPLAKE_HYBRID_VECTOR_WEIGHT"); const textWeight = envNumber(DEFAULT_HYBRID_TEXT_WEIGHT, "HIVEMIND_HYBRID_TEXT_WEIGHT", "DEEPLAKE_HYBRID_TEXT_WEIGHT"); - const buildSemanticCombinedQuery = () => { - const memQuery = useHybridRetrieval ? buildHybridSimilarityQuery(memoryTable, pathFilter, "summary::text", 0, "''", queryVectorSql, semanticQueryText, vectorWeight, textWeight, limit) : buildEmbeddingSimilarityQuery(memoryTable, pathFilter, "summary::text", 0, "''", queryVectorSql, limit); - const sessQuery = useHybridRetrieval ? buildHybridSimilarityQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", queryVectorSql, semanticQueryText, vectorWeight, textWeight, limit) : buildEmbeddingSimilarityQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", queryVectorSql, limit); - return sessionsOnly ? `SELECT path, content, source_order, creation_date FROM (${sessQuery}) AS combined ORDER BY path, source_order, creation_date` : `SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`; - }; - const rows2 = await api.query(buildSemanticCombinedQuery()); - return rows2.map((row) => ({ - path: String(row["path"]), - content: String(row["content"] ?? "") + const vectorQuery = buildScoredCombinedQuery(sessionsOnly, buildEmbeddingSimilarityQuery(memoryTable, pathFilter, "summary::text", 0, "''", queryVectorSql, limit), buildEmbeddingSimilarityQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", queryVectorSql, limit), limit); + if (!useHybridRetrieval) { + const rows2 = await api.query(vectorQuery); + return rows2.map((row) => ({ + path: String(row["path"]), + content: String(row["content"] ?? "") + })); + } + const lexicalQuery = buildScoredCombinedQuery(sessionsOnly, buildBm25SimilarityQuery(memoryTable, pathFilter, "summary::text", 0, "''", lexicalQueryText, limit), buildBm25SimilarityQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", lexicalQueryText, limit), limit); + const lexicalFallbackQuery = buildScoredCombinedQuery(sessionsOnly, buildHeuristicLexicalQuery(memoryTable, pathFilter, "summary::text", 0, "''", lexicalQueryText, limit), buildHeuristicLexicalQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", lexicalQueryText, limit), limit); + const [vectorRows, textRows] = await Promise.all([ + api.query(vectorQuery), + api.query(lexicalQuery).catch(() => api.query(lexicalFallbackQuery)) + ]); + return fuseRetrievalRows({ + textRows: mapScoredRows(textRows), + vectorRows: mapScoredRows(vectorRows), + textWeight, + vectorWeight, + limit + }).map((row) => ({ + path: row.path, + content: row.content })); } const primaryMemFilter = useSummaryBm25 ? "" : `${likeMemFilter}${regexMemFilter}`; const primaryQuery = buildCombinedQuery(primaryMemFilter, primarySessFilter, useSummaryBm25); const fallbackQuery = buildCombinedQuery(likeMemFilter, fallbackSessFilter, false); - const rows = shouldUseFallbackCapablePrimary ? await api.query(primaryQuery).catch(() => api.query(fallbackQuery)) : await api.query(primaryQuery); + const rows = useSummaryBm25 ? await api.query(primaryQuery).catch(() => api.query(fallbackQuery)) : await api.query(primaryQuery); return rows.map((row) => ({ path: String(row["path"]), content: String(row["content"] ?? "") @@ -1265,16 +1375,37 @@ function buildSummaryBm25Query(memoryTable, pathFilter, queryText, limit) { return `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter} ORDER BY (summary <#> '${sqlStr(queryText)}') DESC LIMIT ${limit}`; } function buildEmbeddingSimilarityQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryVectorSql, limit) { - return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date FROM "${tableName}" WHERE 1=1${pathFilter} AND embedding IS NOT NULL ORDER BY (embedding <#> ${queryVectorSql}) DESC LIMIT ${limit}`; + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date, (embedding <#> ${queryVectorSql}) AS score FROM "${tableName}" WHERE 1=1${pathFilter} AND embedding IS NOT NULL ORDER BY score DESC LIMIT ${limit}`; +} +function buildBm25SimilarityQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryText, limit) { + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date, (${contentExpr} <#> '${sqlStr(queryText)}') AS score FROM "${tableName}" WHERE 1=1${pathFilter} ORDER BY score DESC LIMIT ${limit}`; +} +function buildHeuristicLexicalQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryText, limit) { + const terms = [...new Set(queryText.split(/\s+/).map((term) => term.trim()).filter((term) => term.length >= 2))].slice(0, 8); + const clauses = terms.map((term) => `${contentExpr} ILIKE '%${sqlLike(term)}%'`); + const scoreTerms = [ + ...terms.map((term) => `CASE WHEN ${contentExpr} ILIKE '%${sqlLike(term)}%' THEN 1 ELSE 0 END`), + `CASE WHEN ${contentExpr} ILIKE '%${sqlLike(queryText)}%' THEN ${Math.max(1, Math.min(terms.length, 4))} ELSE 0 END` + ]; + const scoreExpr = scoreTerms.join(" + "); + const where = clauses.length > 0 ? ` AND (${clauses.join(" OR ")})` : ""; + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date, (${scoreExpr})::float AS score FROM "${tableName}" WHERE 1=1${pathFilter}${where} ORDER BY score DESC LIMIT ${limit}`; } -function buildHybridSimilarityQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryVectorSql, queryText, vectorWeight, textWeight, limit) { - return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date FROM "${tableName}" WHERE 1=1${pathFilter} AND embedding IS NOT NULL ORDER BY (((embedding, ${contentExpr})::deeplake_hybrid_record) <#> deeplake_hybrid_record(${queryVectorSql}, '${sqlStr(queryText)}', ${vectorWeight}, ${textWeight})) DESC LIMIT ${limit}`; +function buildScoredCombinedQuery(sessionsOnly, memQuery, sessQuery, limit) { + return sessionsOnly ? `SELECT path, content, source_order, creation_date, score FROM (${sessQuery}) AS combined ORDER BY score DESC, source_order, creation_date, path LIMIT ${limit}` : `SELECT path, content, source_order, creation_date, score FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY score DESC, source_order, creation_date, path LIMIT ${limit}`; } -function toSqlRegexPattern(pattern, ignoreCase) { +function mapScoredRows(rows) { + return rows.map((row) => ({ + path: String(row["path"] ?? ""), + content: String(row["content"] ?? ""), + sourceOrder: Number(row["source_order"] ?? 0), + creationDate: String(row["creation_date"] ?? ""), + score: Number.isFinite(Number(row["score"])) ? Number(row["score"]) : 0 + })); +} +function toSqlRegexPattern(pattern, _ignoreCase) { if (!pattern) return null; - if (ignoreCase) - return null; try { new RegExp(pattern); return translateRegexPatternToSql(pattern); @@ -1282,9 +1413,6 @@ function toSqlRegexPattern(pattern, ignoreCase) { return pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); } } -function isSqlRegexPushdownSafe(pattern) { - return !/[\\[\]{}^$]/.test(pattern) && !/\(\?/.test(pattern); -} function unwrapWholeRegexGroup(pattern) { if (!pattern.startsWith("(") || !pattern.endsWith(")")) return pattern; @@ -1387,12 +1515,10 @@ function buildContentPredicate(column, likeOp, patterns) { function buildRegexPredicate(column, pattern, ignoreCase) { if (!pattern) return ""; - if (!isSqlRegexPushdownSafe(pattern)) - return ""; const sqlPattern = toSqlRegexPattern(pattern, ignoreCase); if (!sqlPattern) return ""; - return `${column} ~ '${sqlStr(sqlPattern)}'`; + return `${column} ${ignoreCase ? "~*" : "~"} '${sqlStr(sqlPattern)}'`; } function compileGrepRegex(params) { const normalizedPattern = params.fixedString ? params.pattern : normalizeGrepRegexPattern(params.pattern); @@ -2085,6 +2211,13 @@ function sqlFloat4Array2(values) { return Math.fround(value).toString(); }).join(", ")}]::float4[]`; } +function quoteShellToken(token) { + if (token === "") + return "''"; + if (!/[\s"'\\|&;<>()[\]{}$*?]/.test(token)) + return token; + return `'${token.replace(/'/g, `'"'"'`)}'`; +} function isQuoted(ch) { return ch === "'" || ch === '"'; } @@ -2291,7 +2424,7 @@ function parseFindSpec(tokens) { return null; return { patterns, - execGrepCmd: execTokens.slice(0, -1).join(" ") + execGrepCmd: execTokens.slice(0, -1).map(quoteShellToken).join(" ") }; } return null; @@ -2668,24 +2801,50 @@ async function fetchSummaryCandidates(api, memoryTable, terms) { return []; const retrievalMode = getGrepRetrievalMode(); const phrase = filteredTerms.join(" "); - const clauses = filteredTerms.map((term) => `summary ILIKE '%${sqlLike(term)}%'`); - let sql; + let rows = []; if (retrievalMode === "embedding" || retrievalMode === "hybrid") { const embedder = getSummaryRetrievalEmbedder(); const [queryEmbedding] = await embedder.embedQueries([phrase]); if (!queryEmbedding) return []; const queryVectorSql = sqlFloat4Array2(queryEmbedding); - sql = retrievalMode === "hybrid" ? `SELECT path, summary, ((embedding, summary)::deeplake_hybrid_record <#> deeplake_hybrid_record(${queryVectorSql}, '${sqlStr(phrase)}', ${envNumber2(DEFAULT_HYBRID_VECTOR_WEIGHT2, "HIVEMIND_HYBRID_VECTOR_WEIGHT", "DEEPLAKE_HYBRID_VECTOR_WEIGHT")}, ${envNumber2(DEFAULT_HYBRID_TEXT_WEIGHT2, "HIVEMIND_HYBRID_TEXT_WEIGHT", "DEEPLAKE_HYBRID_TEXT_WEIGHT")})) AS score FROM "${memoryTable}" WHERE embedding IS NOT NULL ORDER BY score DESC LIMIT 8` : `SELECT path, summary, (embedding <#> ${queryVectorSql}) AS score FROM "${memoryTable}" WHERE embedding IS NOT NULL ORDER BY score DESC LIMIT 8`; + const vectorSql = `SELECT path, summary, (embedding <#> ${queryVectorSql}) AS score FROM "${memoryTable}" WHERE embedding IS NOT NULL ORDER BY score DESC LIMIT 8`; + if (retrievalMode === "embedding") { + rows = (await api.query(vectorSql)).map((row) => ({ + path: typeof row["path"] === "string" ? row["path"] : "", + summary: typeof row["summary"] === "string" ? row["summary"] : "" + })); + } else { + const textSql = `SELECT path, summary, summary <#> '${sqlStr(phrase)}' AS score FROM "${memoryTable}" ORDER BY score DESC LIMIT 8`; + const textFallbackSql = buildSummaryHeuristicQuery(memoryTable, filteredTerms, phrase); + const [vectorRows, textRows] = await Promise.all([ + api.query(vectorSql), + api.query(textSql).catch(() => api.query(textFallbackSql)) + ]); + rows = fuseRetrievalRows({ + textRows: mapSummaryRows(textRows), + vectorRows: mapSummaryRows(vectorRows), + textWeight: envNumber2(DEFAULT_HYBRID_TEXT_WEIGHT2, "HIVEMIND_HYBRID_TEXT_WEIGHT", "DEEPLAKE_HYBRID_TEXT_WEIGHT"), + vectorWeight: envNumber2(DEFAULT_HYBRID_VECTOR_WEIGHT2, "HIVEMIND_HYBRID_VECTOR_WEIGHT", "DEEPLAKE_HYBRID_VECTOR_WEIGHT"), + limit: 8 + }).map((row) => ({ + path: row.path, + summary: row.content + })); + } } else { const phraseSql = sqlStr(phrase); - sql = `SELECT path, summary, summary <#> '${phraseSql}' AS score FROM "${memoryTable}" WHERE ${clauses.join(" OR ")} ORDER BY score ASC LIMIT 8`; + const clauses = filteredTerms.map((term) => `summary ILIKE '%${sqlLike(term)}%'`); + const sql = `SELECT path, summary, summary <#> '${phraseSql}' AS score FROM "${memoryTable}" WHERE ${clauses.join(" OR ")} ORDER BY score DESC LIMIT 8`; + rows = (await api.query(sql).catch(() => api.query(buildSummaryHeuristicQuery(memoryTable, filteredTerms, phrase)))).map((row) => ({ + path: typeof row["path"] === "string" ? row["path"] : "", + summary: typeof row["summary"] === "string" ? row["summary"] : "" + })); } - const rows = await api.query(sql); const candidateMap = /* @__PURE__ */ new Map(); for (const row of rows) { - const path = typeof row["path"] === "string" ? row["path"] : ""; - const summary = typeof row["summary"] === "string" ? row["summary"] : ""; + const path = row.path; + const summary = row.summary; const sourcePath = extractSummarySourcePath(summary) || (extractSessionIdFromPath(path) ? `/sessions/${extractSessionIdFromPath(path)}.json` : ""); const sessionId = extractSessionIdFromPath(path) || extractSessionIdFromPath(sourcePath); addHybridCandidate(candidateMap, { @@ -2697,6 +2856,23 @@ async function fetchSummaryCandidates(api, memoryTable, terms) { } return [...candidateMap.values()]; } +function buildSummaryHeuristicQuery(memoryTable, filteredTerms, phrase) { + const clauses = filteredTerms.map((term) => `summary ILIKE '%${sqlLike(term)}%'`); + const scoreTerms = [ + ...filteredTerms.map((term) => `CASE WHEN summary ILIKE '%${sqlLike(term)}%' THEN 1 ELSE 0 END`), + `CASE WHEN summary ILIKE '%${sqlLike(phrase)}%' THEN ${Math.max(1, Math.min(filteredTerms.length, 4))} ELSE 0 END` + ]; + return `SELECT path, summary, (${scoreTerms.join(" + ")})::float AS score FROM "${memoryTable}" WHERE ${clauses.join(" OR ")} ORDER BY score DESC LIMIT 8`; +} +function mapSummaryRows(rows) { + return rows.map((row) => ({ + path: typeof row["path"] === "string" ? row["path"] : "", + content: typeof row["summary"] === "string" ? row["summary"] : "", + sourceOrder: 0, + creationDate: "", + score: Number.isFinite(Number(row["score"])) ? Number(row["score"]) : 0 + })); +} function prependCtes(sql, ctes) { if (ctes.length === 0) return sql; @@ -3380,7 +3556,7 @@ function rewritePaths(cmd) { // dist/src/hooks/codex/pre-tool-use.js function touchesVirtualMemoryPath(value) { const rewritten = rewritePaths(value).trim(); - return rewritten === "/index.md" || rewritten === "/summaries" || rewritten.startsWith("/summaries/") || rewritten === "/sessions" || rewritten.startsWith("/sessions/"); + return rewritten === "/index.md" || rewritten === "/summaries" || rewritten.startsWith("/summaries/") || rewritten === "/sessions" || rewritten.startsWith("/sessions/") || /(^|[\s"'`])\/(?:index\.md|summaries(?:\/|\b)|sessions(?:\/|\b))/.test(rewritten); } function touchesAnyMemoryPath(value) { return touchesMemory(value) || touchesVirtualMemoryPath(value); diff --git a/codex/bundle/session-start.js b/codex/bundle/session-start.js index 4945d2b..1e29e19 100755 --- a/codex/bundle/session-start.js +++ b/codex/bundle/session-start.js @@ -261,7 +261,8 @@ Workflow: 5. Prefer small targeted SELECTs with ORDER BY and LIMIT 5-10. 6. Do not use filesystem commands, grep, cat, ls, Read, or Glob for recall in this mode. 7. Use sessions.text, sessions.speaker, sessions.turn_index, and sessions.source_date_time for transcript retrieval. Use sessions.message only when you need the raw JSON payload. -8. Facts are for narrowing and aggregation; sessions are for the final exact answer. +8. Sessions are the source of truth. Facts are only a helper index and synthesis layer. +9. Facts are for narrowing and aggregation; sessions are for the final exact answer. Good query patterns: - Canonical entity lookup: @@ -277,6 +278,7 @@ Good query patterns: Avoid these mistakes: - Do NOT query memory, graph_nodes, or graph_edges in this mode. +- Do NOT answer directly from memory_facts.summary, memory_entities.summary, or aliases when a relevant transcript row is available. - Do NOT use fact tables for exact quoted wording when a transcript row is available; use them to narrow and aggregate, then ground on sessions. - Do NOT filter sessions.message directly when sessions.text / sessions.speaker already contain the needed transcript fields. - Do NOT replace an exact status or self-label with a broader biography. @@ -284,6 +286,7 @@ Avoid these mistakes: Answer rules: - Return the smallest exact answer supported by the data. +- Sessions win over facts if they differ in detail or specificity. - Resolve relative dates against the session's own creation_date or transcript date metadata, not today's date. - Do not answer "not found" until you have checked both the fact layer and a likely sessions row. diff --git a/codex/bundle/shell/deeplake-shell.js b/codex/bundle/shell/deeplake-shell.js index 2e88d4f..6546720 100755 --- a/codex/bundle/shell/deeplake-shell.js +++ b/codex/bundle/shell/deeplake-shell.js @@ -67502,6 +67502,103 @@ var HarrierEmbedder = class { } }; +// dist/src/utils/hybrid-fusion.js +function coerceFinite(value) { + return Number.isFinite(value) ? value : 0; +} +function normalizeWeights(vectorWeight, textWeight) { + const safeVector = Math.max(0, coerceFinite(vectorWeight)); + const safeText = Math.max(0, coerceFinite(textWeight)); + const total = safeVector + safeText; + if (total <= 0) + return { vectorWeight: 0.5, textWeight: 0.5 }; + return { + vectorWeight: safeVector / total, + textWeight: safeText / total + }; +} +function softmaxNormalizeScores(scores) { + if (scores.length === 0) + return []; + const safeScores = scores.map(coerceFinite); + const maxScore = Math.max(...safeScores); + const exps = safeScores.map((score) => Math.exp(score - maxScore)); + const sum = exps.reduce((acc, value) => acc + value, 0) || 1; + return exps.map((value) => value / sum); +} +function pickPreferredRow(existing, candidate) { + if (!existing) + return candidate; + if (candidate.score > existing.score) + return candidate; + if (candidate.score < existing.score) + return existing; + if (candidate.sourceOrder < existing.sourceOrder) + return candidate; + if (candidate.sourceOrder > existing.sourceOrder) + return existing; + if (candidate.creationDate < existing.creationDate) + return candidate; + if (candidate.creationDate > existing.creationDate) + return existing; + return candidate.path < existing.path ? candidate : existing; +} +function dedupeBestRows(rows) { + const bestByPath = /* @__PURE__ */ new Map(); + for (const row of rows) { + if (!row.path) + continue; + bestByPath.set(row.path, pickPreferredRow(bestByPath.get(row.path), row)); + } + return [...bestByPath.values()]; +} +function fuseRetrievalRows(args) { + const { textRows, vectorRows, limit } = args; + const { textWeight, vectorWeight } = normalizeWeights(args.vectorWeight, args.textWeight); + const dedupedTextRows = dedupeBestRows(textRows); + const dedupedVectorRows = dedupeBestRows(vectorRows); + const textNorm = softmaxNormalizeScores(dedupedTextRows.map((row) => row.score)); + const vectorNorm = softmaxNormalizeScores(dedupedVectorRows.map((row) => row.score)); + const fusedByPath = /* @__PURE__ */ new Map(); + for (let i11 = 0; i11 < dedupedTextRows.length; i11++) { + const row = dedupedTextRows[i11]; + fusedByPath.set(row.path, { + path: row.path, + content: row.content, + sourceOrder: row.sourceOrder, + creationDate: row.creationDate, + textScore: textNorm[i11] ?? 0, + vectorScore: 0, + fusedScore: textWeight * (textNorm[i11] ?? 0) + }); + } + for (let i11 = 0; i11 < dedupedVectorRows.length; i11++) { + const row = dedupedVectorRows[i11]; + const existing = fusedByPath.get(row.path); + const vectorScore = vectorNorm[i11] ?? 0; + if (existing) { + if (existing.content.length === 0 && row.content.length > 0) + existing.content = row.content; + existing.sourceOrder = Math.min(existing.sourceOrder, row.sourceOrder); + if (!existing.creationDate || row.creationDate < existing.creationDate) + existing.creationDate = row.creationDate; + existing.vectorScore = vectorScore; + existing.fusedScore = textWeight * existing.textScore + vectorWeight * existing.vectorScore; + continue; + } + fusedByPath.set(row.path, { + path: row.path, + content: row.content, + sourceOrder: row.sourceOrder, + creationDate: row.creationDate, + textScore: 0, + vectorScore, + fusedScore: vectorWeight * vectorScore + }); + } + return [...fusedByPath.values()].sort((a15, b26) => b26.fusedScore - a15.fusedScore || b26.vectorScore - a15.vectorScore || b26.textScore - a15.textScore || a15.sourceOrder - b26.sourceOrder || a15.creationDate.localeCompare(b26.creationDate) || a15.path.localeCompare(b26.path)).slice(0, Math.max(0, limit)); +} + // dist/src/utils/retrieval-mode.js function isSessionsOnlyMode() { const raw = process.env["HIVEMIND_SESSIONS_ONLY"] ?? process.env["DEEPLAKE_SESSIONS_ONLY"] ?? ""; @@ -67791,16 +67888,15 @@ async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { const regexSessFilter = regexPattern ? buildRegexFilter("message::text", regexPattern, ignoreCase) : ""; const primarySessFilter = `${likeSessFilter}${regexSessFilter}`; const fallbackSessFilter = likeSessFilter; - const hasSqlRegexFilter = Boolean(regexMemFilter || regexSessFilter); const sessionsOnly = isSessionsOnlyMode(); const retrievalMode = getGrepRetrievalMode(); const semanticQueryText = (queryText ?? bm25QueryText ?? "").trim(); + const lexicalQueryText = (bm25QueryText ?? semanticQueryText).trim(); const useEmbeddingRetrieval = retrievalMode === "embedding" && semanticQueryText.length > 0; const useHybridRetrieval = retrievalMode === "hybrid" && semanticQueryText.length > 0; const useSummaryBm25 = retrievalMode === "classic" && !sessionsOnly && !isSummaryBm25Disabled() && Boolean(bm25QueryText); - const shouldUseFallbackCapablePrimary = useSummaryBm25 || hasSqlRegexFilter; const ensureSummaryBm25Index = api.ensureSummaryBm25Index; - if (useSummaryBm25 && typeof ensureSummaryBm25Index === "function") { + if ((useSummaryBm25 || useHybridRetrieval && !sessionsOnly && lexicalQueryText.length > 0) && typeof ensureSummaryBm25Index === "function") { await ensureSummaryBm25Index.call(api, memoryTable).catch(() => { }); } @@ -67817,21 +67913,35 @@ async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) { const queryVectorSql = sqlFloat4Array(queryEmbedding); const vectorWeight = envNumber(DEFAULT_HYBRID_VECTOR_WEIGHT, "HIVEMIND_HYBRID_VECTOR_WEIGHT", "DEEPLAKE_HYBRID_VECTOR_WEIGHT"); const textWeight = envNumber(DEFAULT_HYBRID_TEXT_WEIGHT, "HIVEMIND_HYBRID_TEXT_WEIGHT", "DEEPLAKE_HYBRID_TEXT_WEIGHT"); - const buildSemanticCombinedQuery = () => { - const memQuery = useHybridRetrieval ? buildHybridSimilarityQuery(memoryTable, pathFilter, "summary::text", 0, "''", queryVectorSql, semanticQueryText, vectorWeight, textWeight, limit) : buildEmbeddingSimilarityQuery(memoryTable, pathFilter, "summary::text", 0, "''", queryVectorSql, limit); - const sessQuery = useHybridRetrieval ? buildHybridSimilarityQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", queryVectorSql, semanticQueryText, vectorWeight, textWeight, limit) : buildEmbeddingSimilarityQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", queryVectorSql, limit); - return sessionsOnly ? `SELECT path, content, source_order, creation_date FROM (${sessQuery}) AS combined ORDER BY path, source_order, creation_date` : `SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`; - }; - const rows2 = await api.query(buildSemanticCombinedQuery()); - return rows2.map((row) => ({ - path: String(row["path"]), - content: String(row["content"] ?? "") + const vectorQuery = buildScoredCombinedQuery(sessionsOnly, buildEmbeddingSimilarityQuery(memoryTable, pathFilter, "summary::text", 0, "''", queryVectorSql, limit), buildEmbeddingSimilarityQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", queryVectorSql, limit), limit); + if (!useHybridRetrieval) { + const rows2 = await api.query(vectorQuery); + return rows2.map((row) => ({ + path: String(row["path"]), + content: String(row["content"] ?? "") + })); + } + const lexicalQuery = buildScoredCombinedQuery(sessionsOnly, buildBm25SimilarityQuery(memoryTable, pathFilter, "summary::text", 0, "''", lexicalQueryText, limit), buildBm25SimilarityQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", lexicalQueryText, limit), limit); + const lexicalFallbackQuery = buildScoredCombinedQuery(sessionsOnly, buildHeuristicLexicalQuery(memoryTable, pathFilter, "summary::text", 0, "''", lexicalQueryText, limit), buildHeuristicLexicalQuery(sessionsTable, pathFilter, "message::text", 1, "COALESCE(creation_date::text, '')", lexicalQueryText, limit), limit); + const [vectorRows, textRows] = await Promise.all([ + api.query(vectorQuery), + api.query(lexicalQuery).catch(() => api.query(lexicalFallbackQuery)) + ]); + return fuseRetrievalRows({ + textRows: mapScoredRows(textRows), + vectorRows: mapScoredRows(vectorRows), + textWeight, + vectorWeight, + limit + }).map((row) => ({ + path: row.path, + content: row.content })); } const primaryMemFilter = useSummaryBm25 ? "" : `${likeMemFilter}${regexMemFilter}`; const primaryQuery = buildCombinedQuery(primaryMemFilter, primarySessFilter, useSummaryBm25); const fallbackQuery = buildCombinedQuery(likeMemFilter, fallbackSessFilter, false); - const rows = shouldUseFallbackCapablePrimary ? await api.query(primaryQuery).catch(() => api.query(fallbackQuery)) : await api.query(primaryQuery); + const rows = useSummaryBm25 ? await api.query(primaryQuery).catch(() => api.query(fallbackQuery)) : await api.query(primaryQuery); return rows.map((row) => ({ path: String(row["path"]), content: String(row["content"] ?? "") @@ -67968,16 +68078,37 @@ function buildSummaryBm25Query(memoryTable, pathFilter, queryText, limit) { return `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date FROM "${memoryTable}" WHERE 1=1${pathFilter} ORDER BY (summary <#> '${sqlStr(queryText)}') DESC LIMIT ${limit}`; } function buildEmbeddingSimilarityQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryVectorSql, limit) { - return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date FROM "${tableName}" WHERE 1=1${pathFilter} AND embedding IS NOT NULL ORDER BY (embedding <#> ${queryVectorSql}) DESC LIMIT ${limit}`; + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date, (embedding <#> ${queryVectorSql}) AS score FROM "${tableName}" WHERE 1=1${pathFilter} AND embedding IS NOT NULL ORDER BY score DESC LIMIT ${limit}`; +} +function buildBm25SimilarityQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryText, limit) { + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date, (${contentExpr} <#> '${sqlStr(queryText)}') AS score FROM "${tableName}" WHERE 1=1${pathFilter} ORDER BY score DESC LIMIT ${limit}`; +} +function buildHeuristicLexicalQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryText, limit) { + const terms = [...new Set(queryText.split(/\s+/).map((term) => term.trim()).filter((term) => term.length >= 2))].slice(0, 8); + const clauses = terms.map((term) => `${contentExpr} ILIKE '%${sqlLike(term)}%'`); + const scoreTerms = [ + ...terms.map((term) => `CASE WHEN ${contentExpr} ILIKE '%${sqlLike(term)}%' THEN 1 ELSE 0 END`), + `CASE WHEN ${contentExpr} ILIKE '%${sqlLike(queryText)}%' THEN ${Math.max(1, Math.min(terms.length, 4))} ELSE 0 END` + ]; + const scoreExpr = scoreTerms.join(" + "); + const where = clauses.length > 0 ? ` AND (${clauses.join(" OR ")})` : ""; + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date, (${scoreExpr})::float AS score FROM "${tableName}" WHERE 1=1${pathFilter}${where} ORDER BY score DESC LIMIT ${limit}`; } -function buildHybridSimilarityQuery(tableName, pathFilter, contentExpr, sourceOrder, creationDateExpr, queryVectorSql, queryText, vectorWeight, textWeight, limit) { - return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date FROM "${tableName}" WHERE 1=1${pathFilter} AND embedding IS NOT NULL ORDER BY (((embedding, ${contentExpr})::deeplake_hybrid_record) <#> deeplake_hybrid_record(${queryVectorSql}, '${sqlStr(queryText)}', ${vectorWeight}, ${textWeight})) DESC LIMIT ${limit}`; +function buildScoredCombinedQuery(sessionsOnly, memQuery, sessQuery, limit) { + return sessionsOnly ? `SELECT path, content, source_order, creation_date, score FROM (${sessQuery}) AS combined ORDER BY score DESC, source_order, creation_date, path LIMIT ${limit}` : `SELECT path, content, source_order, creation_date, score FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY score DESC, source_order, creation_date, path LIMIT ${limit}`; } -function toSqlRegexPattern(pattern, ignoreCase) { +function mapScoredRows(rows) { + return rows.map((row) => ({ + path: String(row["path"] ?? ""), + content: String(row["content"] ?? ""), + sourceOrder: Number(row["source_order"] ?? 0), + creationDate: String(row["creation_date"] ?? ""), + score: Number.isFinite(Number(row["score"])) ? Number(row["score"]) : 0 + })); +} +function toSqlRegexPattern(pattern, _ignoreCase) { if (!pattern) return null; - if (ignoreCase) - return null; try { new RegExp(pattern); return translateRegexPatternToSql(pattern); @@ -67985,9 +68116,6 @@ function toSqlRegexPattern(pattern, ignoreCase) { return pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); } } -function isSqlRegexPushdownSafe(pattern) { - return !/[\\[\]{}^$]/.test(pattern) && !/\(\?/.test(pattern); -} function unwrapWholeRegexGroup(pattern) { if (!pattern.startsWith("(") || !pattern.endsWith(")")) return pattern; @@ -68090,12 +68218,10 @@ function buildContentPredicate(column, likeOp, patterns) { function buildRegexPredicate(column, pattern, ignoreCase) { if (!pattern) return ""; - if (!isSqlRegexPushdownSafe(pattern)) - return ""; const sqlPattern = toSqlRegexPattern(pattern, ignoreCase); if (!sqlPattern) return ""; - return `${column} ~ '${sqlStr(sqlPattern)}'`; + return `${column} ${ignoreCase ? "~*" : "~"} '${sqlStr(sqlPattern)}'`; } function compileGrepRegex(params) { const normalizedPattern = params.fixedString ? params.pattern : normalizeGrepRegexPattern(params.pattern); diff --git a/src/hooks/bash-command-compiler.ts b/src/hooks/bash-command-compiler.ts index 79bf9fe..c29d2c5 100644 --- a/src/hooks/bash-command-compiler.ts +++ b/src/hooks/bash-command-compiler.ts @@ -1,5 +1,6 @@ import type { DeeplakeApi } from "../deeplake-api.js"; import { HarrierEmbedder } from "../embeddings/harrier.js"; +import { type ScoredRetrievalRow, fuseRetrievalRows } from "../utils/hybrid-fusion.js"; import { sqlLike, sqlStr } from "../utils/sql.js"; import { type GrepParams, handleGrepDirect, parseBashGrep } from "./grep-direct.js"; import { normalizeContent, refineGrepMatches } from "../shell/grep-core.js"; @@ -83,6 +84,12 @@ interface ParsedFindSpec { execGrepCmd: string | null; } +function quoteShellToken(token: string): string { + if (token === "") return "''"; + if (!/[\s"'\\|&;<>()[\]{}$*?]/.test(token)) return token; + return `'${token.replace(/'/g, `'\"'\"'`)}'`; +} + function isQuoted(ch: string): boolean { return ch === "'" || ch === "\""; } @@ -288,7 +295,7 @@ function parseFindSpec(tokens: string[]): ParsedFindSpec | null { if ((terminator !== "\\;" && terminator !== ";") || target !== "{}") return null; return { patterns, - execGrepCmd: execTokens.slice(0, -1).join(" "), + execGrepCmd: execTokens.slice(0, -1).map(quoteShellToken).join(" "), }; } return null; @@ -844,31 +851,64 @@ async function fetchSummaryCandidates( if (filteredTerms.length === 0) return []; const retrievalMode = getGrepRetrievalMode(); const phrase = filteredTerms.join(" "); - const clauses = filteredTerms.map((term) => `summary ILIKE '%${sqlLike(term)}%'`); - let sql: string; + let rows: Array<{ path: string; summary: string }> = []; if (retrievalMode === "embedding" || retrievalMode === "hybrid") { const embedder = getSummaryRetrievalEmbedder(); const [queryEmbedding] = await embedder.embedQueries([phrase]); if (!queryEmbedding) return []; const queryVectorSql = sqlFloat4Array(queryEmbedding); - sql = retrievalMode === "hybrid" - ? `SELECT path, summary, ((embedding, summary)::deeplake_hybrid_record <#> deeplake_hybrid_record(${queryVectorSql}, '${sqlStr(phrase)}', ${envNumber(DEFAULT_HYBRID_VECTOR_WEIGHT, "HIVEMIND_HYBRID_VECTOR_WEIGHT", "DEEPLAKE_HYBRID_VECTOR_WEIGHT")}, ${envNumber(DEFAULT_HYBRID_TEXT_WEIGHT, "HIVEMIND_HYBRID_TEXT_WEIGHT", "DEEPLAKE_HYBRID_TEXT_WEIGHT")})) AS score FROM "${memoryTable}" WHERE embedding IS NOT NULL ORDER BY score DESC LIMIT 8` - : `SELECT path, summary, (embedding <#> ${queryVectorSql}) AS score FROM "${memoryTable}" WHERE embedding IS NOT NULL ORDER BY score DESC LIMIT 8`; + const vectorSql = + `SELECT path, summary, (embedding <#> ${queryVectorSql}) AS score` + + ` FROM "${memoryTable}"` + + ` WHERE embedding IS NOT NULL` + + ` ORDER BY score DESC` + + ` LIMIT 8`; + if (retrievalMode === "embedding") { + rows = (await api.query(vectorSql)).map((row) => ({ + path: typeof row["path"] === "string" ? row["path"] : "", + summary: typeof row["summary"] === "string" ? row["summary"] : "", + })); + } else { + const textSql = + `SELECT path, summary, summary <#> '${sqlStr(phrase)}' AS score` + + ` FROM "${memoryTable}"` + + ` ORDER BY score DESC` + + ` LIMIT 8`; + const textFallbackSql = buildSummaryHeuristicQuery(memoryTable, filteredTerms, phrase); + const [vectorRows, textRows] = await Promise.all([ + api.query(vectorSql), + api.query(textSql).catch(() => api.query(textFallbackSql)), + ]); + rows = fuseRetrievalRows({ + textRows: mapSummaryRows(textRows), + vectorRows: mapSummaryRows(vectorRows), + textWeight: envNumber(DEFAULT_HYBRID_TEXT_WEIGHT, "HIVEMIND_HYBRID_TEXT_WEIGHT", "DEEPLAKE_HYBRID_TEXT_WEIGHT"), + vectorWeight: envNumber(DEFAULT_HYBRID_VECTOR_WEIGHT, "HIVEMIND_HYBRID_VECTOR_WEIGHT", "DEEPLAKE_HYBRID_VECTOR_WEIGHT"), + limit: 8, + }).map((row) => ({ + path: row.path, + summary: row.content, + })); + } } else { const phraseSql = sqlStr(phrase); - sql = + const clauses = filteredTerms.map((term) => `summary ILIKE '%${sqlLike(term)}%'`); + const sql = `SELECT path, summary, summary <#> '${phraseSql}' AS score` + ` FROM "${memoryTable}"` + ` WHERE ${clauses.join(" OR ")}` + - ` ORDER BY score ASC` + + ` ORDER BY score DESC` + ` LIMIT 8`; + rows = (await api.query(sql).catch(() => api.query(buildSummaryHeuristicQuery(memoryTable, filteredTerms, phrase)))).map((row) => ({ + path: typeof row["path"] === "string" ? row["path"] : "", + summary: typeof row["summary"] === "string" ? row["summary"] : "", + })); } - const rows = await api.query(sql); const candidateMap = new Map(); for (const row of rows) { - const path = typeof row["path"] === "string" ? row["path"] : ""; - const summary = typeof row["summary"] === "string" ? row["summary"] : ""; + const path = row.path; + const summary = row.summary; const sourcePath = extractSummarySourcePath(summary) || (extractSessionIdFromPath(path) ? `/sessions/${extractSessionIdFromPath(path)}.json` : ""); const sessionId = extractSessionIdFromPath(path) || extractSessionIdFromPath(sourcePath); addHybridCandidate(candidateMap, { @@ -881,6 +921,35 @@ async function fetchSummaryCandidates( return [...candidateMap.values()]; } +function buildSummaryHeuristicQuery( + memoryTable: string, + filteredTerms: string[], + phrase: string, +): string { + const clauses = filteredTerms.map((term) => `summary ILIKE '%${sqlLike(term)}%'`); + const scoreTerms = [ + ...filteredTerms.map((term) => `CASE WHEN summary ILIKE '%${sqlLike(term)}%' THEN 1 ELSE 0 END`), + `CASE WHEN summary ILIKE '%${sqlLike(phrase)}%' THEN ${Math.max(1, Math.min(filteredTerms.length, 4))} ELSE 0 END`, + ]; + return ( + `SELECT path, summary, (${scoreTerms.join(" + ")})::float AS score` + + ` FROM "${memoryTable}"` + + ` WHERE ${clauses.join(" OR ")}` + + ` ORDER BY score DESC` + + ` LIMIT 8` + ); +} + +function mapSummaryRows(rows: Record[]): ScoredRetrievalRow[] { + return rows.map((row) => ({ + path: typeof row["path"] === "string" ? row["path"] : "", + content: typeof row["summary"] === "string" ? row["summary"] : "", + sourceOrder: 0, + creationDate: "", + score: Number.isFinite(Number(row["score"])) ? Number(row["score"]) : 0, + })); +} + function prependCtes(sql: string, ctes: string[]): string { if (ctes.length === 0) return sql; if (/^with\b/i.test(sql)) { diff --git a/src/hooks/codex/pre-tool-use.ts b/src/hooks/codex/pre-tool-use.ts index c70f6ee..8c94103 100644 --- a/src/hooks/codex/pre-tool-use.ts +++ b/src/hooks/codex/pre-tool-use.ts @@ -53,7 +53,8 @@ function touchesVirtualMemoryPath(value: string): boolean { rewritten === "/summaries" || rewritten.startsWith("/summaries/") || rewritten === "/sessions" || - rewritten.startsWith("/sessions/") + rewritten.startsWith("/sessions/") || + /(^|[\s"'`])\/(?:index\.md|summaries(?:\/|\b)|sessions(?:\/|\b))/.test(rewritten) ); } diff --git a/src/hooks/codex/session-start.ts b/src/hooks/codex/session-start.ts index 2a537fb..582de16 100644 --- a/src/hooks/codex/session-start.ts +++ b/src/hooks/codex/session-start.ts @@ -168,7 +168,8 @@ Workflow: 5. Prefer small targeted SELECTs with ORDER BY and LIMIT 5-10. 6. Do not use filesystem commands, grep, cat, ls, Read, or Glob for recall in this mode. 7. Use sessions.text, sessions.speaker, sessions.turn_index, and sessions.source_date_time for transcript retrieval. Use sessions.message only when you need the raw JSON payload. -8. Facts are for narrowing and aggregation; sessions are for the final exact answer. +8. Sessions are the source of truth. Facts are only a helper index and synthesis layer. +9. Facts are for narrowing and aggregation; sessions are for the final exact answer. Good query patterns: - Canonical entity lookup: @@ -184,6 +185,7 @@ Good query patterns: Avoid these mistakes: - Do NOT query memory, graph_nodes, or graph_edges in this mode. +- Do NOT answer directly from memory_facts.summary, memory_entities.summary, or aliases when a relevant transcript row is available. - Do NOT use fact tables for exact quoted wording when a transcript row is available; use them to narrow and aggregate, then ground on sessions. - Do NOT filter sessions.message directly when sessions.text / sessions.speaker already contain the needed transcript fields. - Do NOT replace an exact status or self-label with a broader biography. @@ -191,6 +193,7 @@ Avoid these mistakes: Answer rules: - Return the smallest exact answer supported by the data. +- Sessions win over facts if they differ in detail or specificity. - Resolve relative dates against the session's own creation_date or transcript date metadata, not today's date. - Do not answer "not found" until you have checked both the fact layer and a likely sessions row. diff --git a/src/hooks/pre-tool-use.ts b/src/hooks/pre-tool-use.ts index c7aab27..6e212e6 100644 --- a/src/hooks/pre-tool-use.ts +++ b/src/hooks/pre-tool-use.ts @@ -39,7 +39,8 @@ function touchesVirtualMemoryPath(value: string): boolean { rewritten === "/summaries" || rewritten.startsWith("/summaries/") || rewritten === "/sessions" || - rewritten.startsWith("/sessions/") + rewritten.startsWith("/sessions/") || + /(^|[\s"'`])\/(?:index\.md|summaries(?:\/|\b)|sessions(?:\/|\b))/.test(rewritten) ); } diff --git a/src/hooks/session-start.ts b/src/hooks/session-start.ts index aecf25f..1c27323 100644 --- a/src/hooks/session-start.ts +++ b/src/hooks/session-start.ts @@ -238,12 +238,13 @@ SQL strategy: 1. Start with memory_entities to resolve the named person, project, place, or organization into a canonical entity. 2. Expand connected facts through fact_entity_links and memory_facts. 3. Use memory_facts to identify the small set of likely source sessions. -4. Ground every exact answer on sessions rows from those source sessions. +4. Ground every final answer on sessions rows from those source sessions. 5. Prefer small targeted SELECTs with ORDER BY and LIMIT 5-10. 6. Do not use filesystem commands, grep, cat, ls, Read, or Glob for recall in this mode. 7. Use sessions.text, sessions.speaker, sessions.turn_index, and sessions.source_date_time for transcript retrieval. Use sessions.message only when you need the raw JSON payload. -8. For identity, origin, relationship, preference, and "what did they decide" questions, prefer transcript grounding over paraphrased fact labels. -9. For list/profile questions, facts are for narrowing and aggregation; sessions are for final verification. +8. Sessions are the source of truth. Facts are only a helper index and synthesis layer. +9. For identity, origin, relationship, preference, and "what did they decide" questions, prefer transcript grounding over paraphrased fact labels. +10. For list/profile questions, facts are for narrowing and aggregation; sessions are for final verification. Good query patterns: - Canonical entity lookup: @@ -259,6 +260,7 @@ Good query patterns: Avoid these mistakes: - Do NOT query memory, graph_nodes, or graph_edges in this mode. +- Do NOT answer directly from memory_facts.summary, memory_entities.summary, or aliases when a relevant transcript row is available. - Do NOT use fact tables for exact quoted wording when a transcript row is available; use them to narrow and aggregate, then ground on sessions. - Do NOT filter sessions.message directly when sessions.text / sessions.speaker already contain the needed transcript fields. - Do NOT blend multiple different events when the question asks about one specific event. Prefer the most direct supporting row. @@ -267,6 +269,7 @@ Avoid these mistakes: Answer rules: - Return the smallest exact answer supported by the data. +- Sessions win over facts if they differ in detail or specificity. - Resolve relative dates against the session's own creation_date or transcript date metadata, not today's date. - Do not answer "not found" until you have checked both the fact layer and a likely sessions row for the named person. - For duration or age-style answers, preserve the stored relative phrase when it directly answers the question instead of over-converting it. diff --git a/src/shell/grep-core.ts b/src/shell/grep-core.ts index 0c483b6..b7e70d8 100644 --- a/src/shell/grep-core.ts +++ b/src/shell/grep-core.ts @@ -17,6 +17,7 @@ import type { DeeplakeApi } from "../deeplake-api.js"; import { HarrierEmbedder } from "../embeddings/harrier.js"; +import { type ScoredRetrievalRow, fuseRetrievalRows } from "../utils/hybrid-fusion.js"; import { sqlStr, sqlLike } from "../utils/sql.js"; import { getGrepRetrievalMode, isSessionsOnlyMode, isSummaryBm25Disabled } from "../utils/retrieval-mode.js"; @@ -314,7 +315,7 @@ function buildPathCondition(targetPath: string): string { } /** - * Dual-table LIKE/ILIKE search. Casts `summary` (TEXT) and `message` (JSONB) + * Dual-table text/regex search. Casts `summary` (TEXT) and `message` (JSONB) * to ::text so the same predicate works across both. The lookup always goes * through a single UNION ALL query so one grep maps to one SQL search. */ @@ -340,20 +341,18 @@ export async function searchDeeplakeTables( // coarser query path. const primarySessFilter = `${likeSessFilter}${regexSessFilter}`; const fallbackSessFilter = likeSessFilter; - const hasSqlRegexFilter = Boolean(regexMemFilter || regexSessFilter); const sessionsOnly = isSessionsOnlyMode(); const retrievalMode = getGrepRetrievalMode(); const semanticQueryText = (queryText ?? bm25QueryText ?? "").trim(); + const lexicalQueryText = (bm25QueryText ?? semanticQueryText).trim(); const useEmbeddingRetrieval = retrievalMode === "embedding" && semanticQueryText.length > 0; const useHybridRetrieval = retrievalMode === "hybrid" && semanticQueryText.length > 0; const useSummaryBm25 = retrievalMode === "classic" && !sessionsOnly && !isSummaryBm25Disabled() && Boolean(bm25QueryText); - const shouldUseFallbackCapablePrimary = useSummaryBm25 - || hasSqlRegexFilter; const ensureSummaryBm25Index = (api as DeeplakeApi & { ensureSummaryBm25Index?: (tableName?: string) => Promise; }).ensureSummaryBm25Index; - if (useSummaryBm25 && typeof ensureSummaryBm25Index === "function") { + if ((useSummaryBm25 || (useHybridRetrieval && !sessionsOnly && lexicalQueryText.length > 0)) && typeof ensureSummaryBm25Index === "function") { await ensureSummaryBm25Index.call(api, memoryTable).catch(() => {}); } @@ -374,59 +373,94 @@ export async function searchDeeplakeTables( const queryVectorSql = sqlFloat4Array(queryEmbedding); const vectorWeight = envNumber(DEFAULT_HYBRID_VECTOR_WEIGHT, "HIVEMIND_HYBRID_VECTOR_WEIGHT", "DEEPLAKE_HYBRID_VECTOR_WEIGHT"); const textWeight = envNumber(DEFAULT_HYBRID_TEXT_WEIGHT, "HIVEMIND_HYBRID_TEXT_WEIGHT", "DEEPLAKE_HYBRID_TEXT_WEIGHT"); - const buildSemanticCombinedQuery = (): string => { - const memQuery = useHybridRetrieval - ? buildHybridSimilarityQuery( - memoryTable, - pathFilter, - "summary::text", - 0, - "''", - queryVectorSql, - semanticQueryText, - vectorWeight, - textWeight, - limit, - ) - : buildEmbeddingSimilarityQuery( - memoryTable, - pathFilter, - "summary::text", - 0, - "''", - queryVectorSql, - limit, - ); - const sessQuery = useHybridRetrieval - ? buildHybridSimilarityQuery( - sessionsTable, - pathFilter, - "message::text", - 1, - "COALESCE(creation_date::text, '')", - queryVectorSql, - semanticQueryText, - vectorWeight, - textWeight, - limit, - ) - : buildEmbeddingSimilarityQuery( - sessionsTable, - pathFilter, - "message::text", - 1, - "COALESCE(creation_date::text, '')", - queryVectorSql, - limit, - ); - return sessionsOnly - ? `SELECT path, content, source_order, creation_date FROM (${sessQuery}) AS combined ORDER BY path, source_order, creation_date` - : `SELECT path, content, source_order, creation_date FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order, creation_date`; - }; - const rows = await api.query(buildSemanticCombinedQuery()); - return rows.map(row => ({ - path: String(row["path"]), - content: String(row["content"] ?? ""), + const vectorQuery = buildScoredCombinedQuery( + sessionsOnly, + buildEmbeddingSimilarityQuery( + memoryTable, + pathFilter, + "summary::text", + 0, + "''", + queryVectorSql, + limit, + ), + buildEmbeddingSimilarityQuery( + sessionsTable, + pathFilter, + "message::text", + 1, + "COALESCE(creation_date::text, '')", + queryVectorSql, + limit, + ), + limit, + ); + + if (!useHybridRetrieval) { + const rows = await api.query(vectorQuery); + return rows.map(row => ({ + path: String(row["path"]), + content: String(row["content"] ?? ""), + })); + } + + const lexicalQuery = buildScoredCombinedQuery( + sessionsOnly, + buildBm25SimilarityQuery( + memoryTable, + pathFilter, + "summary::text", + 0, + "''", + lexicalQueryText, + limit, + ), + buildBm25SimilarityQuery( + sessionsTable, + pathFilter, + "message::text", + 1, + "COALESCE(creation_date::text, '')", + lexicalQueryText, + limit, + ), + limit, + ); + const lexicalFallbackQuery = buildScoredCombinedQuery( + sessionsOnly, + buildHeuristicLexicalQuery( + memoryTable, + pathFilter, + "summary::text", + 0, + "''", + lexicalQueryText, + limit, + ), + buildHeuristicLexicalQuery( + sessionsTable, + pathFilter, + "message::text", + 1, + "COALESCE(creation_date::text, '')", + lexicalQueryText, + limit, + ), + limit, + ); + const [vectorRows, textRows] = await Promise.all([ + api.query(vectorQuery), + api.query(lexicalQuery).catch(() => api.query(lexicalFallbackQuery)), + ]); + return fuseRetrievalRows({ + textRows: mapScoredRows(textRows), + vectorRows: mapScoredRows(vectorRows), + textWeight, + vectorWeight, + limit, + }).map(row => ({ + path: row.path, + content: row.content, })); } @@ -434,7 +468,7 @@ export async function searchDeeplakeTables( const primaryQuery = buildCombinedQuery(primaryMemFilter, primarySessFilter, useSummaryBm25); const fallbackQuery = buildCombinedQuery(likeMemFilter, fallbackSessFilter, false); - const rows = shouldUseFallbackCapablePrimary + const rows = useSummaryBm25 ? await api.query(primaryQuery).catch(() => api.query(fallbackQuery)) : await api.query(primaryQuery); @@ -636,35 +670,73 @@ function buildEmbeddingSimilarityQuery( queryVectorSql: string, limit: number, ): string { - return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date FROM "${tableName}" WHERE 1=1${pathFilter} AND embedding IS NOT NULL ORDER BY (embedding <#> ${queryVectorSql}) DESC LIMIT ${limit}`; + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date, (embedding <#> ${queryVectorSql}) AS score FROM "${tableName}" WHERE 1=1${pathFilter} AND embedding IS NOT NULL ORDER BY score DESC LIMIT ${limit}`; } -function buildHybridSimilarityQuery( +function buildBm25SimilarityQuery( + tableName: string, + pathFilter: string, + contentExpr: string, + sourceOrder: number, + creationDateExpr: string, + queryText: string, + limit: number, +): string { + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date, (${contentExpr} <#> '${sqlStr(queryText)}') AS score FROM "${tableName}" WHERE 1=1${pathFilter} ORDER BY score DESC LIMIT ${limit}`; +} + +function buildHeuristicLexicalQuery( tableName: string, pathFilter: string, contentExpr: string, sourceOrder: number, creationDateExpr: string, - queryVectorSql: string, queryText: string, - vectorWeight: number, - textWeight: number, limit: number, ): string { - return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date FROM "${tableName}" WHERE 1=1${pathFilter} AND embedding IS NOT NULL ORDER BY (((embedding, ${contentExpr})::deeplake_hybrid_record) <#> deeplake_hybrid_record(${queryVectorSql}, '${sqlStr(queryText)}', ${vectorWeight}, ${textWeight})) DESC LIMIT ${limit}`; + const terms = [...new Set( + queryText + .split(/\s+/) + .map((term) => term.trim()) + .filter((term) => term.length >= 2), + )].slice(0, 8); + const clauses = terms.map((term) => `${contentExpr} ILIKE '%${sqlLike(term)}%'`); + const scoreTerms = [ + ...terms.map((term) => `CASE WHEN ${contentExpr} ILIKE '%${sqlLike(term)}%' THEN 1 ELSE 0 END`), + `CASE WHEN ${contentExpr} ILIKE '%${sqlLike(queryText)}%' THEN ${Math.max(1, Math.min(terms.length, 4))} ELSE 0 END`, + ]; + const scoreExpr = scoreTerms.join(" + "); + const where = clauses.length > 0 ? ` AND (${clauses.join(" OR ")})` : ""; + return `SELECT path, ${contentExpr} AS content, ${sourceOrder} AS source_order, ${creationDateExpr} AS creation_date, (${scoreExpr})::float AS score FROM "${tableName}" WHERE 1=1${pathFilter}${where} ORDER BY score DESC LIMIT ${limit}`; +} + +function buildScoredCombinedQuery( + sessionsOnly: boolean, + memQuery: string, + sessQuery: string, + limit: number, +): string { + return sessionsOnly + ? `SELECT path, content, source_order, creation_date, score FROM (${sessQuery}) AS combined ORDER BY score DESC, source_order, creation_date, path LIMIT ${limit}` + : `SELECT path, content, source_order, creation_date, score FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY score DESC, source_order, creation_date, path LIMIT ${limit}`; +} + +function mapScoredRows(rows: Record[]): ScoredRetrievalRow[] { + return rows.map((row) => ({ + path: String(row["path"] ?? ""), + content: String(row["content"] ?? ""), + sourceOrder: Number(row["source_order"] ?? 0), + creationDate: String(row["creation_date"] ?? ""), + score: Number.isFinite(Number(row["score"])) ? Number(row["score"]) : 0, + })); } export function toSqlRegexPattern( pattern: string, - ignoreCase: boolean, + _ignoreCase: boolean, ): string | null { if (!pattern) return null; - // Deeplake SQL supports `~` but not `~*`. For ignore-case regex searches, - // rely on LIKE/ILIKE prefilters plus in-memory regex refinement instead of - // pushing an incompatible SQL operator. - if (ignoreCase) return null; - try { new RegExp(pattern); return translateRegexPatternToSql(pattern); @@ -673,14 +745,6 @@ export function toSqlRegexPattern( } } -function isSqlRegexPushdownSafe(pattern: string): boolean { - // The managed backend rejects some otherwise valid JS regexes, especially - // patterns with bracket syntax, anchors, or escaped literals like `^\[`. - // Keep SQL regex pushdown to a conservative subset and rely on in-memory - // refinement after candidate fetch for everything else. - return !/[\\[\]{}^$]/.test(pattern) && !/\(\?/.test(pattern); -} - function unwrapWholeRegexGroup(pattern: string): string { if (!pattern.startsWith("(") || !pattern.endsWith(")")) return pattern; @@ -777,10 +841,9 @@ function buildRegexPredicate( ignoreCase: boolean, ): string { if (!pattern) return ""; - if (!isSqlRegexPushdownSafe(pattern)) return ""; const sqlPattern = toSqlRegexPattern(pattern, ignoreCase); if (!sqlPattern) return ""; - return `${column} ~ '${sqlStr(sqlPattern)}'`; + return `${column} ${ignoreCase ? "~*" : "~"} '${sqlStr(sqlPattern)}'`; } function joinAndPredicates(predicates: string[]): string { diff --git a/src/utils/hybrid-fusion.ts b/src/utils/hybrid-fusion.ts new file mode 100644 index 0000000..0941540 --- /dev/null +++ b/src/utils/hybrid-fusion.ts @@ -0,0 +1,127 @@ +export interface ScoredRetrievalRow { + path: string; + content: string; + sourceOrder: number; + creationDate: string; + score: number; +} + +export interface FusedRetrievalRow { + path: string; + content: string; + sourceOrder: number; + creationDate: string; + textScore: number; + vectorScore: number; + fusedScore: number; +} + +function coerceFinite(value: number): number { + return Number.isFinite(value) ? value : 0; +} + +function normalizeWeights(vectorWeight: number, textWeight: number): { vectorWeight: number; textWeight: number } { + const safeVector = Math.max(0, coerceFinite(vectorWeight)); + const safeText = Math.max(0, coerceFinite(textWeight)); + const total = safeVector + safeText; + if (total <= 0) return { vectorWeight: 0.5, textWeight: 0.5 }; + return { + vectorWeight: safeVector / total, + textWeight: safeText / total, + }; +} + +export function softmaxNormalizeScores(scores: number[]): number[] { + if (scores.length === 0) return []; + const safeScores = scores.map(coerceFinite); + const maxScore = Math.max(...safeScores); + const exps = safeScores.map((score) => Math.exp(score - maxScore)); + const sum = exps.reduce((acc, value) => acc + value, 0) || 1; + return exps.map((value) => value / sum); +} + +function pickPreferredRow(existing: ScoredRetrievalRow | undefined, candidate: ScoredRetrievalRow): ScoredRetrievalRow { + if (!existing) return candidate; + if (candidate.score > existing.score) return candidate; + if (candidate.score < existing.score) return existing; + if (candidate.sourceOrder < existing.sourceOrder) return candidate; + if (candidate.sourceOrder > existing.sourceOrder) return existing; + if (candidate.creationDate < existing.creationDate) return candidate; + if (candidate.creationDate > existing.creationDate) return existing; + return candidate.path < existing.path ? candidate : existing; +} + +function dedupeBestRows(rows: ScoredRetrievalRow[]): ScoredRetrievalRow[] { + const bestByPath = new Map(); + for (const row of rows) { + if (!row.path) continue; + bestByPath.set(row.path, pickPreferredRow(bestByPath.get(row.path), row)); + } + return [...bestByPath.values()]; +} + +export function fuseRetrievalRows(args: { + textRows: ScoredRetrievalRow[]; + vectorRows: ScoredRetrievalRow[]; + textWeight: number; + vectorWeight: number; + limit: number; +}): FusedRetrievalRow[] { + const { + textRows, + vectorRows, + limit, + } = args; + const { textWeight, vectorWeight } = normalizeWeights(args.vectorWeight, args.textWeight); + const dedupedTextRows = dedupeBestRows(textRows); + const dedupedVectorRows = dedupeBestRows(vectorRows); + const textNorm = softmaxNormalizeScores(dedupedTextRows.map((row) => row.score)); + const vectorNorm = softmaxNormalizeScores(dedupedVectorRows.map((row) => row.score)); + const fusedByPath = new Map(); + + for (let i = 0; i < dedupedTextRows.length; i++) { + const row = dedupedTextRows[i]; + fusedByPath.set(row.path, { + path: row.path, + content: row.content, + sourceOrder: row.sourceOrder, + creationDate: row.creationDate, + textScore: textNorm[i] ?? 0, + vectorScore: 0, + fusedScore: textWeight * (textNorm[i] ?? 0), + }); + } + + for (let i = 0; i < dedupedVectorRows.length; i++) { + const row = dedupedVectorRows[i]; + const existing = fusedByPath.get(row.path); + const vectorScore = vectorNorm[i] ?? 0; + if (existing) { + if (existing.content.length === 0 && row.content.length > 0) existing.content = row.content; + existing.sourceOrder = Math.min(existing.sourceOrder, row.sourceOrder); + if (!existing.creationDate || row.creationDate < existing.creationDate) existing.creationDate = row.creationDate; + existing.vectorScore = vectorScore; + existing.fusedScore = (textWeight * existing.textScore) + (vectorWeight * existing.vectorScore); + continue; + } + fusedByPath.set(row.path, { + path: row.path, + content: row.content, + sourceOrder: row.sourceOrder, + creationDate: row.creationDate, + textScore: 0, + vectorScore, + fusedScore: vectorWeight * vectorScore, + }); + } + + return [...fusedByPath.values()] + .sort((a, b) => + (b.fusedScore - a.fusedScore) + || (b.vectorScore - a.vectorScore) + || (b.textScore - a.textScore) + || (a.sourceOrder - b.sourceOrder) + || a.creationDate.localeCompare(b.creationDate) + || a.path.localeCompare(b.path)) + .slice(0, Math.max(0, limit)); +}