Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions packages/cli/src/commands/status.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,54 @@ test("status surfaces every group the repo belongs to, alphabetical", async () =
assert.match(groupsLine, /groups:\s+alpha, zeta$/);
assert.doesNotMatch(groupsLine, /unrelated/);
});

test("status reports bm25-only + summaries count from the retrieval probe", async () => {
const home = await scratch();
const repoPath = await seedRepo(home, "bm25repo");
const cap = captureStdout();
try {
await runStatus(repoPath, {
home,
probeRetrieval: async () => ({ summaries: 0, vectors: "bm25-only" }),
});
} finally {
cap.restore();
}
assert.ok(
cap.lines.some((l) => /^summaries:\s+0$/.test(l)),
`expected 'summaries: 0'; got:\n${cap.lines.join("\n")}`,
);
assert.ok(cap.lines.some((l) => /^vectors:\s+bm25-only$/.test(l)));
});

test("status reports populated vectors when the probe says so", async () => {
const home = await scratch();
const repoPath = await seedRepo(home, "hybridrepo");
const cap = captureStdout();
try {
await runStatus(repoPath, {
home,
probeRetrieval: async () => ({ summaries: 42, vectors: "populated" }),
});
} finally {
cap.restore();
}
assert.ok(cap.lines.some((l) => /^summaries:\s+42$/.test(l)));
assert.ok(cap.lines.some((l) => /^vectors:\s+populated$/.test(l)));
});

test("status degrades to summaries:- / vectors:unknown when the store can't open", async () => {
const home = await scratch();
const repoPath = await seedRepo(home, "degraded");
const cap = captureStdout();
try {
// Default probe: no graph.lbug exists in the seeded repo → undefined.
await runStatus(repoPath, { home, probeRetrieval: async () => undefined });
} finally {
cap.restore();
}
assert.ok(cap.lines.some((l) => /^summaries:\s+-$/.test(l)));
assert.ok(cap.lines.some((l) => /^vectors:\s+unknown$/.test(l)));
// The rest of status still renders (groups line present).
assert.ok(cap.lines.some((l) => l.startsWith("groups:")));
});
54 changes: 54 additions & 0 deletions packages/cli/src/commands/status.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,48 @@
*/

import { resolve } from "node:path";
import { embeddingsPopulated } from "@opencodehub/search";
import { readStoreMeta } from "@opencodehub/storage";
import { listGroups } from "../groups.js";
import { readRegistry } from "../registry.js";
import { openStoreForCommand } from "./open-store.js";

/**
* Retrieval-mode probe result for the status output. `summaries` is the count
* of distinct nodes with an LLM summary (dense-leg input); `vectors` reports
* whether the embeddings table is populated. Both are best-effort: a degraded
* or absent store yields `summaries: null`.
*/
export interface RetrievalState {
readonly summaries: number | null;
readonly vectors: "populated" | "bm25-only";
}

export interface StatusOptions {
readonly home?: string;
/**
* Test seam: open a read-only store and return its retrieval state. Defaults
* to opening the real composed store. Tests inject a stub so they don't need
* a live graph.lbug on disk.
*/
readonly probeRetrieval?: (repoPath: string) => Promise<RetrievalState | undefined>;
}

async function defaultProbeRetrieval(repoPath: string): Promise<RetrievalState | undefined> {
let store: Awaited<ReturnType<typeof openStoreForCommand>>["store"] | undefined;
try {
const opened = await openStoreForCommand({ repo: repoPath, readOnly: true });
store = opened.store;
const summaries = await store.temporal.countSymbolSummaries();
const populated = await embeddingsPopulated(store.graph);
return { summaries, vectors: populated ? "populated" : "bm25-only" };
} catch {
// No index / degraded store / missing binding — caller degrades the
// output rather than failing the whole status command.
return undefined;
} finally {
await store?.close();
}
}

export async function runStatus(path: string, opts: StatusOptions = {}): Promise<void> {
Expand All @@ -34,6 +70,24 @@ export async function runStatus(path: string, opts: StatusOptions = {}): Promise
console.log(`lastCommit: ${meta.lastCommit ?? "-"}`);
console.log(`nodes: ${meta.nodeCount}`);
console.log(`edges: ${meta.edgeCount}`);

// Retrieval mode. `query` runs BM25-only unless the embeddings table is
// populated AND the active embedder's modelId matches `meta.embedderModelId`
// — so report the embedder id from meta (no second probe) alongside the
// vector state, instead of implying hybrid will fire. Summaries are a
// distinct table (dense-leg context), not what gates BM25-vs-hybrid; we
// surface the count so an empty-summaries index is visible.
const probe = opts.probeRetrieval ?? defaultProbeRetrieval;
const retrieval = await probe(repoPath);
if (retrieval === undefined) {
console.log("summaries: -");
console.log("vectors: unknown");
} else {
console.log(`summaries: ${retrieval.summaries ?? "-"}`);
console.log(`vectors: ${retrieval.vectors}`);
}
console.log(`embedder: ${meta.embedderModelId ?? "none"}`);

if (registryHit === undefined) {
console.log("registry: missing — run `codehub analyze` to re-register");
} else {
Expand Down
19 changes: 19 additions & 0 deletions packages/storage/src/duckdb-adapter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,25 @@ export class DuckDbStore implements ITemporalStore {
}
}

async countSymbolSummaries(): Promise<number> {
try {
const c = this.requireConn();
const stmt = await c.prepare("SELECT COUNT(DISTINCT node_id) AS n FROM symbol_summaries");
try {
const reader = await stmt.runAndReadAll();
const first = reader.getRowObjects()[0] as Record<string, unknown> | undefined;
const n = first?.["n"];
return typeof n === "bigint" ? Number(n) : typeof n === "number" ? n : 0;
} finally {
stmt.destroySync();
}
} catch {
// Missing table / degraded store → report 0 rather than throwing, so
// `codehub status` degrades gracefully.
return 0;
}
}

// --------------------------------------------------------------------------
// exec — read-only SQL escape hatch (codehub query --sql, MCP sql tool)
// --------------------------------------------------------------------------
Expand Down
1 change: 1 addition & 0 deletions packages/storage/src/interface.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ test("ITemporalStore-shaped value lacks graph methods at runtime", () => {
bulkLoadSymbolSummaries: async () => {},
lookupSymbolSummary: async () => undefined,
lookupSymbolSummariesByNode: async () => [],
countSymbolSummaries: async () => 0,
};

const bag = temporalOnly as unknown as Record<string, unknown>;
Expand Down
8 changes: 8 additions & 0 deletions packages/storage/src/interface.ts
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,14 @@ export interface ITemporalStore {
* deterministically when more than one row per node is present.
*/
lookupSymbolSummariesByNode(nodeIds: readonly string[]): Promise<readonly SymbolSummaryRow[]>;
/**
* Count distinct nodes that have at least one summary row. Used by
* `codehub status` to report whether LLM symbol summaries were generated
* for this index (they feed the dense-retrieval leg). Returns 0 — never
* throws — when the table is missing or the store is degraded, so status
* degrades gracefully.
*/
countSymbolSummaries(): Promise<number>;
}

// ─────────────────────────────────────────────────────────────────────────────
Expand Down
Loading