diff --git a/packages/cli/src/commands/status.test.ts b/packages/cli/src/commands/status.test.ts index d39daeb..5c899c7 100644 --- a/packages/cli/src/commands/status.test.ts +++ b/packages/cli/src/commands/status.test.ts @@ -116,3 +116,54 @@ test("status surfaces every group the repo belongs to, alphabetical", async () = assert.match(groupsLine, /groups:\s+alpha, zeta$/); assert.doesNotMatch(groupsLine, /unrelated/); }); + +test("status reports bm25-only + summaries count from the retrieval probe", async () => { + const home = await scratch(); + const repoPath = await seedRepo(home, "bm25repo"); + const cap = captureStdout(); + try { + await runStatus(repoPath, { + home, + probeRetrieval: async () => ({ summaries: 0, vectors: "bm25-only" }), + }); + } finally { + cap.restore(); + } + assert.ok( + cap.lines.some((l) => /^summaries:\s+0$/.test(l)), + `expected 'summaries: 0'; got:\n${cap.lines.join("\n")}`, + ); + assert.ok(cap.lines.some((l) => /^vectors:\s+bm25-only$/.test(l))); +}); + +test("status reports populated vectors when the probe says so", async () => { + const home = await scratch(); + const repoPath = await seedRepo(home, "hybridrepo"); + const cap = captureStdout(); + try { + await runStatus(repoPath, { + home, + probeRetrieval: async () => ({ summaries: 42, vectors: "populated" }), + }); + } finally { + cap.restore(); + } + assert.ok(cap.lines.some((l) => /^summaries:\s+42$/.test(l))); + assert.ok(cap.lines.some((l) => /^vectors:\s+populated$/.test(l))); +}); + +test("status degrades to summaries:- / vectors:unknown when the store can't open", async () => { + const home = await scratch(); + const repoPath = await seedRepo(home, "degraded"); + const cap = captureStdout(); + try { + // Default probe: no graph.lbug exists in the seeded repo → undefined. + await runStatus(repoPath, { home, probeRetrieval: async () => undefined }); + } finally { + cap.restore(); + } + assert.ok(cap.lines.some((l) => /^summaries:\s+-$/.test(l))); + assert.ok(cap.lines.some((l) => /^vectors:\s+unknown$/.test(l))); + // The rest of status still renders (groups line present). + assert.ok(cap.lines.some((l) => l.startsWith("groups:"))); +}); diff --git a/packages/cli/src/commands/status.ts b/packages/cli/src/commands/status.ts index 1390d5a..b15c903 100644 --- a/packages/cli/src/commands/status.ts +++ b/packages/cli/src/commands/status.ts @@ -8,12 +8,48 @@ */ import { resolve } from "node:path"; +import { embeddingsPopulated } from "@opencodehub/search"; import { readStoreMeta } from "@opencodehub/storage"; import { listGroups } from "../groups.js"; import { readRegistry } from "../registry.js"; +import { openStoreForCommand } from "./open-store.js"; + +/** + * Retrieval-mode probe result for the status output. `summaries` is the count + * of distinct nodes with an LLM summary (dense-leg input); `vectors` reports + * whether the embeddings table is populated. Both are best-effort: a degraded + * or absent store yields `summaries: null`. + */ +export interface RetrievalState { + readonly summaries: number | null; + readonly vectors: "populated" | "bm25-only"; +} export interface StatusOptions { readonly home?: string; + /** + * Test seam: open a read-only store and return its retrieval state. Defaults + * to opening the real composed store. Tests inject a stub so they don't need + * a live graph.lbug on disk. + */ + readonly probeRetrieval?: (repoPath: string) => Promise; +} + +async function defaultProbeRetrieval(repoPath: string): Promise { + let store: Awaited>["store"] | undefined; + try { + const opened = await openStoreForCommand({ repo: repoPath, readOnly: true }); + store = opened.store; + const summaries = await store.temporal.countSymbolSummaries(); + const populated = await embeddingsPopulated(store.graph); + return { summaries, vectors: populated ? "populated" : "bm25-only" }; + } catch { + // No index / degraded store / missing binding — caller degrades the + // output rather than failing the whole status command. + return undefined; + } finally { + await store?.close(); + } } export async function runStatus(path: string, opts: StatusOptions = {}): Promise { @@ -34,6 +70,24 @@ export async function runStatus(path: string, opts: StatusOptions = {}): Promise console.log(`lastCommit: ${meta.lastCommit ?? "-"}`); console.log(`nodes: ${meta.nodeCount}`); console.log(`edges: ${meta.edgeCount}`); + + // Retrieval mode. `query` runs BM25-only unless the embeddings table is + // populated AND the active embedder's modelId matches `meta.embedderModelId` + // — so report the embedder id from meta (no second probe) alongside the + // vector state, instead of implying hybrid will fire. Summaries are a + // distinct table (dense-leg context), not what gates BM25-vs-hybrid; we + // surface the count so an empty-summaries index is visible. + const probe = opts.probeRetrieval ?? defaultProbeRetrieval; + const retrieval = await probe(repoPath); + if (retrieval === undefined) { + console.log("summaries: -"); + console.log("vectors: unknown"); + } else { + console.log(`summaries: ${retrieval.summaries ?? "-"}`); + console.log(`vectors: ${retrieval.vectors}`); + } + console.log(`embedder: ${meta.embedderModelId ?? "none"}`); + if (registryHit === undefined) { console.log("registry: missing — run `codehub analyze` to re-register"); } else { diff --git a/packages/storage/src/duckdb-adapter.ts b/packages/storage/src/duckdb-adapter.ts index 522c036..d319c7d 100644 --- a/packages/storage/src/duckdb-adapter.ts +++ b/packages/storage/src/duckdb-adapter.ts @@ -329,6 +329,25 @@ export class DuckDbStore implements ITemporalStore { } } + async countSymbolSummaries(): Promise { + try { + const c = this.requireConn(); + const stmt = await c.prepare("SELECT COUNT(DISTINCT node_id) AS n FROM symbol_summaries"); + try { + const reader = await stmt.runAndReadAll(); + const first = reader.getRowObjects()[0] as Record | undefined; + const n = first?.["n"]; + return typeof n === "bigint" ? Number(n) : typeof n === "number" ? n : 0; + } finally { + stmt.destroySync(); + } + } catch { + // Missing table / degraded store → report 0 rather than throwing, so + // `codehub status` degrades gracefully. + return 0; + } + } + // -------------------------------------------------------------------------- // exec — read-only SQL escape hatch (codehub query --sql, MCP sql tool) // -------------------------------------------------------------------------- diff --git a/packages/storage/src/interface.test.ts b/packages/storage/src/interface.test.ts index 0e19b09..e3e42bd 100644 --- a/packages/storage/src/interface.test.ts +++ b/packages/storage/src/interface.test.ts @@ -122,6 +122,7 @@ test("ITemporalStore-shaped value lacks graph methods at runtime", () => { bulkLoadSymbolSummaries: async () => {}, lookupSymbolSummary: async () => undefined, lookupSymbolSummariesByNode: async () => [], + countSymbolSummaries: async () => 0, }; const bag = temporalOnly as unknown as Record; diff --git a/packages/storage/src/interface.ts b/packages/storage/src/interface.ts index b1371d0..3e37713 100644 --- a/packages/storage/src/interface.ts +++ b/packages/storage/src/interface.ts @@ -467,6 +467,14 @@ export interface ITemporalStore { * deterministically when more than one row per node is present. */ lookupSymbolSummariesByNode(nodeIds: readonly string[]): Promise; + /** + * Count distinct nodes that have at least one summary row. Used by + * `codehub status` to report whether LLM symbol summaries were generated + * for this index (they feed the dense-retrieval leg). Returns 0 — never + * throws — when the table is missing or the store is degraded, so status + * degrades gracefully. + */ + countSymbolSummaries(): Promise; } // ─────────────────────────────────────────────────────────────────────────────