From f50ff8ce8f0605a3170526597aad2a00aba7a571 Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Wed, 1 Jul 2026 18:19:59 -0600
Subject: [PATCH 01/10] feat(search): add configurable remote embedding
 provider for codegraph embed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Lets codegraph embed call a self-hosted or third-party OpenAI-compatible
/embeddings endpoint instead of only the bundled local model. Set
embeddings.provider: "openai" and llm.baseUrl to point at any server
implementing that request/response shape (text-embeddings-inference, Ollama,
LM Studio, vLLM, etc.) — reuses the existing llm.apiKey/apiKeyCommand secret
resolution. No new npm dependencies; built on Node's global fetch.

README.md and docs/guides/configuration.md updates land in a follow-up
commit on this branch alongside the search-side fix (docs check acknowledged).

Closes #1713

Impact: 19 functions changed, 13 affected
---
 src/cli/commands/embed.ts                     |  25 ++-
 src/cli/commands/models.ts                    |   7 +
 src/domain/search/generator.ts                |  39 +++-
 src/domain/search/index.ts                    |   2 +
 src/domain/search/providers/remote.ts         | 125 +++++++++++++
 src/infrastructure/config.ts                  |  14 +-
 src/types.ts                                  |  10 ++
 .../search/embedding-remote-generator.test.ts |  77 ++++++++
 .../search/embedding-remote-provider.test.ts  | 169 ++++++++++++++++++
 tests/unit/config.test.ts                     |  11 +-
 tests/unit/embed-command.test.ts              | 117 ++++++++++++
 11 files changed, 584 insertions(+), 12 deletions(-)
 create mode 100644 src/domain/search/providers/remote.ts
 create mode 100644 tests/search/embedding-remote-generator.test.ts
 create mode 100644 tests/search/embedding-remote-provider.test.ts
 create mode 100644 tests/unit/embed-command.test.ts
diff --git a/src/cli/commands/embed.ts b/src/cli/commands/embed.ts
index 547ef8b11..0dd94c101 100644
--- a/src/cli/commands/embed.ts
+++ b/src/cli/commands/embed.ts
@@ -6,6 +6,7 @@ import {
   DEFAULT_MODEL,
   EMBEDDING_STRATEGIES,
   MODELS,
+  resolveRemoteEmbeddingOptions,
 } from '../../domain/search/index.js';
 import { info, warn } from '../../infrastructure/logger.js';
 import type { CommandDefinition } from '../types.js';
@@ -48,15 +49,29 @@ export const command: CommandDefinition = {
     ],
     ['-d, --db <path>', 'Path to graph.db'],
   ],
-  validate([_dir], opts) {
+  validate([_dir], opts, ctx) {
     if (!(EMBEDDING_STRATEGIES as readonly string[]).includes(opts.strategy)) {
       return `Unknown strategy: ${opts.strategy}. Available: ${EMBEDDING_STRATEGIES.join(', ')}`;
     }
+    const provider = ctx.config.embeddings?.provider ?? null;
+    if (provider && provider !== 'openai') {
+      return (
+        `Unsupported embeddings.provider "${provider}". Currently supported: "openai" ` +
+        '(any OpenAI-compatible /embeddings endpoint, including self-hosted servers).'
+      );
+    }
+    if (provider && !opts.model && !ctx.config.embeddings?.model) {
+      return (
+        `embeddings.provider is set to "${provider}" but no model is configured. ` +
+        'Set embeddings.model to the model identifier your endpoint expects, or pass --model.'
+      );
+    }
   },
   async execute([dir], opts, ctx) {
     const root = path.resolve(dir || '.');
     const dbPath = opts.db as string | undefined;
     const embeddingsConfig = ctx.config.embeddings;
+    const provider = embeddingsConfig?.provider ?? null;
     const flagModel = opts.model as string | undefined;
     const configModel = (embeddingsConfig?.model as string | null | undefined) ?? null;
 
@@ -65,6 +80,10 @@ export const command: CommandDefinition = {
       model = flagModel;
     } else if (configModel) {
       model = configModel;
+    } else if (provider) {
+      // Unreachable in practice — validate() rejects a provider with no model
+      // before execute() runs — but keeps this branch type-safe.
+      model = DEFAULT_MODEL;
     } else {
       const sticky = resolveStickyModel(dbPath);
       if (sticky) {
@@ -77,6 +96,8 @@ export const command: CommandDefinition = {
       }
     }
 
-    await buildEmbeddings(root, model, dbPath, { strategy: opts.strategy });
+    const remote =
+      provider === 'openai' ? resolveRemoteEmbeddingOptions(ctx.config, model) : undefined;
+    await buildEmbeddings(root, model, dbPath, { strategy: opts.strategy, remote });
   },
 };
diff --git a/src/cli/commands/models.ts b/src/cli/commands/models.ts
index e575e36ee..f24fa23ea 100644
--- a/src/cli/commands/models.ts
+++ b/src/cli/commands/models.ts
@@ -7,6 +7,13 @@ export const command: CommandDefinition = {
   execute(_args, _opts, ctx) {
     const embeddingsConfig = ctx.config.embeddings;
     const defaultModel = (embeddingsConfig?.model as string) || DEFAULT_MODEL;
+
+    if (embeddingsConfig?.provider) {
+      console.log(
+        `\nembeddings.provider is set to "${embeddingsConfig.provider}" — codegraph embed will call ` +
+          `model "${embeddingsConfig.model}" at llm.baseUrl instead of a local model below.`,
+      );
+    }
     console.log('\nAvailable embedding models:\n');
 
     interface ModelEntry {
diff --git a/src/domain/search/generator.ts b/src/domain/search/generator.ts
index 02e43f1ca..9cf5d25ee 100644
--- a/src/domain/search/generator.ts
+++ b/src/domain/search/generator.ts
@@ -5,6 +5,11 @@ import { warn } from '../../infrastructure/logger.js';
 import { DbError } from '../../shared/errors.js';
 import type { BetterSqlite3Database, NodeRow } from '../../types.js';
 import { embed, getModelConfig } from './models.js';
+import {
+  DEFAULT_REMOTE_CONTEXT_WINDOW,
+  embedRemote,
+  type RemoteEmbeddingOptions,
+} from './providers/remote.js';
 import { buildSourceText } from './strategies/source.js';
 import { buildStructuredText } from './strategies/structured.js';
 
@@ -195,6 +200,13 @@ function persistEmbeddings(
 
 export interface BuildEmbeddingsOptions {
   strategy?: EmbeddingStrategy;
+  /**
+   * When set, embeddings are generated via a remote OpenAI-compatible
+   * endpoint instead of the local bundled model. `modelKey` is then treated
+   * as an opaque model identifier passed straight to the endpoint, not a
+   * local registry key.
+   */
+  remote?: RemoteEmbeddingOptions;
 }
 
 /**
@@ -225,12 +237,21 @@ export async function buildEmbeddings(
   const nodeCount = [...byFile.values()].reduce((acc, list) => acc + list.length, 0);
   console.log(`Building embeddings for ${nodeCount} symbols (strategy: ${strategy})...`);
 
-  const config = getModelConfig(modelKey);
-  const prepared = prepareEmbeddingTexts(byFile, db, resolvedRoot, strategy, config.contextWindow);
+  let contextWindow: number;
+  let displayName: string;
+  if (options.remote) {
+    contextWindow = DEFAULT_REMOTE_CONTEXT_WINDOW;
+    displayName = options.remote.model;
+  } else {
+    const modelConfig = getModelConfig(modelKey);
+    contextWindow = modelConfig.contextWindow;
+    displayName = modelConfig.name;
+  }
+  const prepared = prepareEmbeddingTexts(byFile, db, resolvedRoot, strategy, contextWindow);
 
   if (prepared.overflowCount > 0) {
     warn(
-      `${prepared.overflowCount} symbol(s) exceeded model context window (${config.contextWindow} tokens) and were truncated`,
+      `${prepared.overflowCount} symbol(s) exceeded model context window (${contextWindow} tokens) and were truncated`,
     );
   }
 
@@ -247,13 +268,17 @@ export async function buildEmbeddings(
     );
   }
 
-  console.log(`Embedding ${prepared.texts.length} symbols...`);
-  const { vectors, dim } = await embed(prepared.texts, modelKey);
+  console.log(
+    `Embedding ${prepared.texts.length} symbols${options.remote ? ` via remote provider (${displayName})` : ''}...`,
+  );
+  const { vectors, dim } = options.remote
+    ? await embedRemote(prepared.texts, options.remote)
+    : await embed(prepared.texts, modelKey);
 
-  persistEmbeddings(db, prepared, vectors as Float32Array[], dim, config.name, strategy);
+  persistEmbeddings(db, prepared, vectors as Float32Array[], dim, displayName, strategy);
 
   console.log(
-    `\nStored ${vectors.length} embeddings (${dim}d, ${config.name}, strategy: ${strategy}) in graph.db`,
+    `\nStored ${vectors.length} embeddings (${dim}d, ${displayName}, strategy: ${strategy}) in graph.db`,
   );
   closeDb(db);
 }
diff --git a/src/domain/search/index.ts b/src/domain/search/index.ts
index dc3ba85c1..6b7c355c6 100644
--- a/src/domain/search/index.ts
+++ b/src/domain/search/index.ts
@@ -8,6 +8,8 @@ export type { BuildEmbeddingsOptions } from './generator.js';
 export { buildEmbeddings, estimateTokens } from './generator.js';
 export type { ModelConfig } from './models.js';
 export { DEFAULT_MODEL, disposeModel, EMBEDDING_STRATEGIES, embed, MODELS } from './models.js';
+export type { RemoteEmbeddingOptions } from './providers/remote.js';
+export { embedRemote, resolveRemoteEmbeddingOptions } from './providers/remote.js';
 export { search } from './search/cli-formatter.js';
 export { hybridSearchData } from './search/hybrid.js';
 export { ftsSearchData } from './search/keyword.js';
diff --git a/src/domain/search/providers/remote.ts b/src/domain/search/providers/remote.ts
new file mode 100644
index 000000000..b6ba46c73
--- /dev/null
+++ b/src/domain/search/providers/remote.ts
@@ -0,0 +1,125 @@
+import { ConfigError, EngineError } from '../../../shared/errors.js';
+import type { CodegraphConfig } from '../../../types.js';
+
+/** Batch size for remote `/embeddings` requests. Conservative default — most
+ * OpenAI-compatible servers accept much larger batches, but this keeps
+ * individual request bodies and timeouts predictable across unknown hosts. */
+const REMOTE_BATCH_SIZE = 32;
+
+/**
+ * Context window assumed for remote models when truncating oversized symbols.
+ * Remote model context limits aren't known ahead of time (unlike the local
+ * registry in `models.ts`), so this is a conservative default matching most
+ * modern embedding models rather than a per-model lookup.
+ */
+export const DEFAULT_REMOTE_CONTEXT_WINDOW = 8192;
+
+export interface RemoteEmbeddingOptions {
+  baseUrl: string;
+  model: string;
+  apiKey?: string | null;
+}
+
+interface OpenAIEmbeddingItem {
+  embedding: number[];
+  index: number;
+}
+
+interface OpenAIEmbeddingResponse {
+  data: OpenAIEmbeddingItem[];
+}
+
+function embeddingsEndpoint(baseUrl: string): string {
+  const trimmed = baseUrl.replace(/\/+$/, '');
+  return trimmed.endsWith('/embeddings') ? trimmed : `${trimmed}/embeddings`;
+}
+
+/**
+ * Resolve the remote embedding endpoint config from `llm.*`, given the
+ * already-resolved model identifier (from `--model` / `embeddings.model`).
+ * Throws a ConfigError if `llm.baseUrl` isn't set — there's no sensible
+ * default host for a self-hosted endpoint.
+ */
+export function resolveRemoteEmbeddingOptions(
+  config: Pick<CodegraphConfig, 'llm'>,
+  model: string,
+): RemoteEmbeddingOptions {
+  const baseUrl = config.llm.baseUrl;
+  if (!baseUrl) {
+    throw new ConfigError(
+      'embeddings.provider is "openai" but llm.baseUrl is not set. ' +
+        'Point it at your embeddings endpoint, e.g. "http://localhost:8080/v1" ' +
+        '(config key "llm.baseUrl" or env var CODEGRAPH_LLM_BASE_URL).',
+    );
+  }
+  return { baseUrl, model, apiKey: config.llm.apiKey };
+}
+
+/**
+ * Generate embeddings via a remote OpenAI-compatible `/embeddings` endpoint.
+ * Works with OpenAI itself and any self-hosted server implementing the same
+ * request/response shape (text-embeddings-inference, Ollama, LM Studio, vLLM).
+ */
+export async function embedRemote(
+  texts: string[],
+  options: RemoteEmbeddingOptions,
+): Promise<{ vectors: Float32Array[]; dim: number }> {
+  if (texts.length === 0) return { vectors: [], dim: 0 };
+
+  const url = embeddingsEndpoint(options.baseUrl);
+  const headers: Record<string, string> = { 'Content-Type': 'application/json' };
+  if (options.apiKey) headers.Authorization = `Bearer ${options.apiKey}`;
+
+  const results: Float32Array[] = [];
+  let dim = 0;
+
+  for (let i = 0; i < texts.length; i += REMOTE_BATCH_SIZE) {
+    const batch = texts.slice(i, i + REMOTE_BATCH_SIZE);
+
+    let response: Response;
+    try {
+      response = await fetch(url, {
+        method: 'POST',
+        headers,
+        body: JSON.stringify({ model: options.model, input: batch }),
+      });
+    } catch (err: unknown) {
+      throw new EngineError(
+        `Failed to reach remote embedding endpoint at ${url}: ${err instanceof Error ? err.message : String(err)}`,
+        { cause: err instanceof Error ? err : undefined },
+      );
+    }
+
+    if (!response.ok) {
+      const body = await response.text().catch(() => '');
+      throw new EngineError(
+        `Remote embedding endpoint ${url} returned ${response.status} ${response.statusText}` +
+          (body ? `: ${body.slice(0, 500)}` : ''),
+      );
+    }
+
+    const json = (await response.json()) as OpenAIEmbeddingResponse;
+    if (!Array.isArray(json.data) || json.data.length !== batch.length) {
+      throw new EngineError(
+        `Remote embedding endpoint ${url} returned an unexpected response shape ` +
+          `(expected ${batch.length} embeddings, got ${json.data?.length ?? 0})`,
+      );
+    }
+
+    // OpenAI-compatible servers aren't guaranteed to preserve input order — sort by index.
+    const sorted = [...json.data].sort((a, b) => a.index - b.index);
+    for (const item of sorted) {
+      const vec = Float32Array.from(item.embedding);
+      if (dim === 0) dim = vec.length;
+      results.push(vec);
+    }
+
+    if (texts.length > REMOTE_BATCH_SIZE) {
+      process.stderr.write(
+        `  Embedded ${Math.min(i + REMOTE_BATCH_SIZE, texts.length)}/${texts.length}\r`,
+      );
+    }
+  }
+
+  return { vectors: results, dim };
+}
diff --git a/src/infrastructure/config.ts b/src/infrastructure/config.ts
index 1e5492ee9..7958aa6a1 100644
--- a/src/infrastructure/config.ts
+++ b/src/infrastructure/config.ts
@@ -38,7 +38,11 @@ export const DEFAULTS = {
     defaultLimit: 20,
     excludeTests: false,
   },
-  embeddings: { model: null as string | null, llmProvider: null as string | null },
+  embeddings: {
+    model: null as string | null,
+    llmProvider: null as string | null,
+    provider: null as string | null,
+  },
   llm: {
     provider: null as string | null,
     model: null as string | null,
@@ -676,7 +680,12 @@ export function loadConfigWithProvenance(
   }
 
   // Layer 3+: env overrides (LLM keys)
-  const ENV_LLM_KEYS = ['CODEGRAPH_LLM_PROVIDER', 'CODEGRAPH_LLM_API_KEY', 'CODEGRAPH_LLM_MODEL'];
+  const ENV_LLM_KEYS = [
+    'CODEGRAPH_LLM_PROVIDER',
+    'CODEGRAPH_LLM_API_KEY',
+    'CODEGRAPH_LLM_MODEL',
+    'CODEGRAPH_LLM_BASE_URL',
+  ];
   if (ENV_LLM_KEYS.some((k) => process.env[k] !== undefined)) {
     provenance.llm = 'env';
   }
@@ -688,6 +697,7 @@ const ENV_LLM_MAP: Record<string, string> = {
   CODEGRAPH_LLM_PROVIDER: 'provider',
   CODEGRAPH_LLM_API_KEY: 'apiKey',
   CODEGRAPH_LLM_MODEL: 'model',
+  CODEGRAPH_LLM_BASE_URL: 'baseUrl',
 };
 
 export function applyEnvOverrides(config: CodegraphConfig): CodegraphConfig {
diff --git a/src/types.ts b/src/types.ts
index 40bcb1b4e..3fcabd8b1 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -1408,6 +1408,16 @@ export interface CodegraphConfig {
   embeddings: {
     model: string | null;
     llmProvider: string | null;
+    /**
+     * Embedding backend for `codegraph embed`. `null` (default) uses the
+     * local bundled model via `@huggingface/transformers`. `"openai"` calls
+     * a remote OpenAI-compatible `/embeddings` endpoint configured via
+     * `llm.baseUrl` — this covers self-hosted servers (text-embeddings-inference,
+     * Ollama, LM Studio, vLLM, etc.) that implement the same request/response
+     * shape, not just OpenAI itself. When set, `embeddings.model` must be the
+     * model identifier the endpoint expects.
+     */
+    provider: string | null;
   };
 
   llm: {
diff --git a/tests/search/embedding-remote-generator.test.ts b/tests/search/embedding-remote-generator.test.ts
new file mode 100644
index 000000000..28862be9f
--- /dev/null
+++ b/tests/search/embedding-remote-generator.test.ts
@@ -0,0 +1,77 @@
+import fs from 'node:fs';
+import os from 'node:os';
+import path from 'node:path';
+import Database from 'better-sqlite3';
+import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, test, vi } from 'vitest';
+import { initSchema } from '../../src/db/index.js';
+import { buildEmbeddings } from '../../src/domain/search/index.js';
+
+// buildEmbeddings must never touch @huggingface/transformers on the remote
+// path — mocking it to throw proves the remote branch doesn't fall through
+// to the local loader.
+vi.mock('@huggingface/transformers', () => {
+  throw new Error('local transformers pipeline should not be loaded on the remote path');
+});
+
+function insertNode(db, name, kind, file, line, endLine) {
+  return db
+    .prepare('INSERT INTO nodes (name, kind, file, line, end_line) VALUES (?, ?, ?, ?, ?)')
+    .run(name, kind, file, line, endLine).lastInsertRowid;
+}
+
+describe('buildEmbeddings with a remote provider', () => {
+  let tmpDir: string, dbPath: string;
+  const fetchMock = vi.fn();
+
+  beforeAll(() => {
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-remote-embed-'));
+    fs.writeFileSync(path.join(tmpDir, 'math.js'), 'export function add(a, b) { return a + b; }\n');
+
+    const dbDir = path.join(tmpDir, '.codegraph');
+    fs.mkdirSync(dbDir, { recursive: true });
+    dbPath = path.join(dbDir, 'graph.db');
+
+    const db = new Database(dbPath);
+    db.pragma('journal_mode = WAL');
+    initSchema(db);
+    insertNode(db, 'add', 'function', 'math.js', 1, 1);
+    db.close();
+  });
+
+  afterAll(() => {
+    if (tmpDir) fs.rmSync(tmpDir, { recursive: true, force: true });
+  });
+
+  beforeEach(() => {
+    vi.stubGlobal('fetch', fetchMock);
+    fetchMock.mockResolvedValue(
+      new Response(JSON.stringify({ data: [{ embedding: [0.1, 0.2, 0.3, 0.4], index: 0 }] }), {
+        status: 200,
+      }),
+    );
+  });
+
+  afterEach(() => {
+    vi.unstubAllGlobals();
+    fetchMock.mockReset();
+  });
+
+  test('dispatches to the remote endpoint and persists its response', async () => {
+    await buildEmbeddings(tmpDir, 'my-remote-model', dbPath, {
+      remote: { baseUrl: 'http://localhost:9999/v1', model: 'my-remote-model', apiKey: 'sk-x' },
+    });
+
+    expect(fetchMock).toHaveBeenCalledTimes(1);
+    expect(fetchMock.mock.calls[0][0]).toBe('http://localhost:9999/v1/embeddings');
+
+    const db = new Database(dbPath, { readonly: true });
+    const count = db.prepare('SELECT COUNT(*) as c FROM embeddings').get().c;
+    const modelMeta = db.prepare("SELECT value FROM embedding_meta WHERE key = 'model'").get();
+    const dimMeta = db.prepare("SELECT value FROM embedding_meta WHERE key = 'dim'").get();
+    db.close();
+
+    expect(count).toBe(1);
+    expect(modelMeta.value).toBe('my-remote-model');
+    expect(dimMeta.value).toBe('4');
+  });
+});
diff --git a/tests/search/embedding-remote-provider.test.ts b/tests/search/embedding-remote-provider.test.ts
new file mode 100644
index 000000000..c1910a271
--- /dev/null
+++ b/tests/search/embedding-remote-provider.test.ts
@@ -0,0 +1,169 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+import {
+  embedRemote,
+  resolveRemoteEmbeddingOptions,
+} from '../../src/domain/search/providers/remote.js';
+import { ConfigError, EngineError } from '../../src/shared/errors.js';
+
+describe('resolveRemoteEmbeddingOptions', () => {
+  it('builds options from llm config', () => {
+    const options = resolveRemoteEmbeddingOptions(
+      {
+        llm: {
+          provider: 'openai',
+          model: null,
+          baseUrl: 'http://localhost:8080/v1',
+          apiKey: 'sk-test',
+          apiKeyCommand: null,
+        },
+      },
+      'my-embed-model',
+    );
+    expect(options).toEqual({
+      baseUrl: 'http://localhost:8080/v1',
+      model: 'my-embed-model',
+      apiKey: 'sk-test',
+    });
+  });
+
+  it('throws ConfigError when llm.baseUrl is not set', () => {
+    expect(() =>
+      resolveRemoteEmbeddingOptions(
+        {
+          llm: {
+            provider: 'openai',
+            model: null,
+            baseUrl: null,
+            apiKey: null,
+            apiKeyCommand: null,
+          },
+        },
+        'my-embed-model',
+      ),
+    ).toThrow(ConfigError);
+  });
+});
+
+describe('embedRemote', () => {
+  const fetchMock = vi.fn();
+
+  beforeEach(() => {
+    vi.stubGlobal('fetch', fetchMock);
+  });
+
+  afterEach(() => {
+    vi.unstubAllGlobals();
+    fetchMock.mockReset();
+  });
+
+  it('returns an empty result without a network call for empty input', async () => {
+    const result = await embedRemote([], { baseUrl: 'http://localhost:8080/v1', model: 'm' });
+    expect(result).toEqual({ vectors: [], dim: 0 });
+    expect(fetchMock).not.toHaveBeenCalled();
+  });
+
+  it('posts to <baseUrl>/embeddings and parses an OpenAI-shaped response', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(
+        JSON.stringify({
+          data: [
+            { embedding: [0.1, 0.2, 0.3], index: 0 },
+            { embedding: [0.4, 0.5, 0.6], index: 1 },
+          ],
+        }),
+        { status: 200, headers: { 'Content-Type': 'application/json' } },
+      ),
+    );
+
+    const result = await embedRemote(['a', 'b'], {
+      baseUrl: 'http://localhost:8080/v1',
+      model: 'my-model',
+      apiKey: 'sk-test',
+    });
+
+    expect(fetchMock).toHaveBeenCalledTimes(1);
+    const [url, init] = fetchMock.mock.calls[0];
+    expect(url).toBe('http://localhost:8080/v1/embeddings');
+    expect(init.method).toBe('POST');
+    expect(init.headers.Authorization).toBe('Bearer sk-test');
+    expect(JSON.parse(init.body)).toEqual({ model: 'my-model', input: ['a', 'b'] });
+
+    expect(result.dim).toBe(3);
+    expect(result.vectors).toHaveLength(2);
+    // Compare against Float32-rounded expectations — embedRemote stores vectors
+    // as Float32Array, which loses precision relative to the JSON doubles.
+    expect(Array.from(result.vectors[0])).toEqual(Array.from(Float32Array.from([0.1, 0.2, 0.3])));
+    expect(Array.from(result.vectors[1])).toEqual(Array.from(Float32Array.from([0.4, 0.5, 0.6])));
+  });
+
+  it('does not double up when baseUrl already ends with /embeddings', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify({ data: [{ embedding: [1], index: 0 }] }), { status: 200 }),
+    );
+    await embedRemote(['x'], { baseUrl: 'http://localhost:8080/v1/embeddings', model: 'm' });
+    expect(fetchMock.mock.calls[0][0]).toBe('http://localhost:8080/v1/embeddings');
+  });
+
+  it('sorts response items by index to restore input order', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(
+        JSON.stringify({
+          data: [
+            { embedding: [2], index: 1 },
+            { embedding: [1], index: 0 },
+          ],
+        }),
+        { status: 200 },
+      ),
+    );
+    const result = await embedRemote(['a', 'b'], { baseUrl: 'http://x', model: 'm' });
+    expect(Array.from(result.vectors[0])).toEqual([1]);
+    expect(Array.from(result.vectors[1])).toEqual([2]);
+  });
+
+  it('omits the Authorization header when no apiKey is configured', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify({ data: [{ embedding: [1], index: 0 }] }), { status: 200 }),
+    );
+    await embedRemote(['a'], { baseUrl: 'http://x', model: 'm' });
+    const [, init] = fetchMock.mock.calls[0];
+    expect(init.headers.Authorization).toBeUndefined();
+  });
+
+  it('batches requests larger than the batch size', async () => {
+    const texts = Array.from({ length: 40 }, (_, i) => `text-${i}`);
+    fetchMock.mockImplementation(async (_url, init) => {
+      const body = JSON.parse(init.body);
+      const data = body.input.map((_text: string, i: number) => ({ embedding: [1], index: i }));
+      return new Response(JSON.stringify({ data }), { status: 200 });
+    });
+    const result = await embedRemote(texts, { baseUrl: 'http://x', model: 'm' });
+    expect(fetchMock).toHaveBeenCalledTimes(2); // 32 + 8
+    expect(result.vectors).toHaveLength(40);
+  });
+
+  it('throws EngineError on a non-2xx response', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response('bad request', { status: 400, statusText: 'Bad Request' }),
+    );
+    await expect(embedRemote(['a'], { baseUrl: 'http://x', model: 'm' })).rejects.toThrow(
+      EngineError,
+    );
+  });
+
+  it('throws EngineError when the response shape does not match the input length', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify({ data: [{ embedding: [1], index: 0 }] }), { status: 200 }),
+    );
+    await expect(embedRemote(['a', 'b'], { baseUrl: 'http://x', model: 'm' })).rejects.toThrow(
+      EngineError,
+    );
+  });
+
+  it('throws EngineError when the network request itself fails', async () => {
+    fetchMock.mockRejectedValueOnce(new Error('ECONNREFUSED'));
+    await expect(embedRemote(['a'], { baseUrl: 'http://x', model: 'm' })).rejects.toThrow(
+      EngineError,
+    );
+  });
+});
diff --git a/tests/unit/config.test.ts b/tests/unit/config.test.ts
index b5e2211a5..7ba43397d 100644
--- a/tests/unit/config.test.ts
+++ b/tests/unit/config.test.ts
@@ -58,7 +58,7 @@ describe('DEFAULTS', () => {
   });
 
   it('has embeddings defaults', () => {
-    expect(DEFAULTS.embeddings).toEqual({ model: null, llmProvider: null });
+    expect(DEFAULTS.embeddings).toEqual({ model: null, llmProvider: null, provider: null });
   });
 
   it('has llm defaults', () => {
@@ -328,6 +328,7 @@ describe('applyEnvOverrides', () => {
     'CODEGRAPH_LLM_PROVIDER',
     'CODEGRAPH_LLM_API_KEY',
     'CODEGRAPH_LLM_MODEL',
+    'CODEGRAPH_LLM_BASE_URL',
     'CODEGRAPH_ENGINE',
     'CODEGRAPH_FAST_SKIP_DIAG',
   ];
@@ -362,6 +363,14 @@ describe('applyEnvOverrides', () => {
     expect(config.llm.model).toBe('gpt-4');
   });
 
+  it('overrides llm.baseUrl from env', () => {
+    process.env.CODEGRAPH_LLM_BASE_URL = 'http://localhost:8080/v1';
+    const config = applyEnvOverrides({
+      llm: { provider: null, model: null, baseUrl: null, apiKey: null },
+    });
+    expect(config.llm.baseUrl).toBe('http://localhost:8080/v1');
+  });
+
   it('env vars take priority over file config', () => {
     process.env.CODEGRAPH_LLM_PROVIDER = 'anthropic';
     const dir = fs.mkdtempSync(path.join(tmpDir, 'env-priority-'));
diff --git a/tests/unit/embed-command.test.ts b/tests/unit/embed-command.test.ts
new file mode 100644
index 000000000..482b4e995
--- /dev/null
+++ b/tests/unit/embed-command.test.ts
@@ -0,0 +1,117 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+
+vi.mock('../../src/domain/search/index.js', async (importOriginal) => {
+  const actual = await importOriginal();
+  return { ...actual, buildEmbeddings: vi.fn() };
+});
+vi.mock('../../src/db/index.js', () => ({
+  openReadonlyOrFail: vi.fn(() => {
+    throw new Error('no db in this test');
+  }),
+}));
+vi.mock('../../src/db/repository/embeddings.js', () => ({ getEmbeddingMeta: vi.fn() }));
+
+const { command } = await import('../../src/cli/commands/embed.js');
+const { buildEmbeddings } = await import('../../src/domain/search/index.js');
+
+function fakeCtx(embeddings: Record<string, unknown>, llm: Record<string, unknown> = {}) {
+  return {
+    config: {
+      embeddings: { model: null, llmProvider: null, provider: null, ...embeddings },
+      llm: {
+        provider: null,
+        model: null,
+        baseUrl: null,
+        apiKey: null,
+        apiKeyCommand: null,
+        ...llm,
+      },
+    },
+  } as never;
+}
+
+describe('embed command validate()', () => {
+  it('rejects an unknown strategy', () => {
+    const err = command.validate!([undefined], { strategy: 'bogus' } as never, fakeCtx({}));
+    expect(err).toMatch(/Unknown strategy/);
+  });
+
+  it('rejects an unsupported embeddings.provider', () => {
+    const err = command.validate!(
+      [undefined],
+      { strategy: 'structured' } as never,
+      fakeCtx({ provider: 'anthropic' }),
+    );
+    expect(err).toMatch(/Unsupported embeddings.provider/);
+  });
+
+  it('rejects provider "openai" with no model configured', () => {
+    const err = command.validate!(
+      [undefined],
+      { strategy: 'structured' } as never,
+      fakeCtx({ provider: 'openai' }),
+    );
+    expect(err).toMatch(/no model is configured/);
+  });
+
+  it('accepts provider "openai" with a config model', () => {
+    const err = command.validate!(
+      [undefined],
+      { strategy: 'structured' } as never,
+      fakeCtx({ provider: 'openai', model: 'text-embedding-3-small' }),
+    );
+    expect(err).toBeUndefined();
+  });
+
+  it('accepts provider "openai" with a --model flag', () => {
+    const err = command.validate!(
+      [undefined],
+      { strategy: 'structured', model: 'text-embedding-3-small' } as never,
+      fakeCtx({ provider: 'openai' }),
+    );
+    expect(err).toBeUndefined();
+  });
+
+  it('accepts no provider at all', () => {
+    const err = command.validate!([undefined], { strategy: 'structured' } as never, fakeCtx({}));
+    expect(err).toBeUndefined();
+  });
+});
+
+describe('embed command execute()', () => {
+  beforeEach(() => {
+    vi.mocked(buildEmbeddings).mockClear();
+  });
+
+  afterEach(() => {
+    vi.mocked(buildEmbeddings).mockReset();
+  });
+
+  it('passes a resolved remote config through to buildEmbeddings when provider is "openai"', async () => {
+    const ctx = fakeCtx(
+      { provider: 'openai', model: 'text-embedding-3-small' },
+      { baseUrl: 'http://localhost:8080/v1', apiKey: 'sk-test' },
+    );
+
+    await command.execute!([undefined], { strategy: 'structured' } as never, ctx);
+
+    expect(buildEmbeddings).toHaveBeenCalledTimes(1);
+    const [, model, , options] = vi.mocked(buildEmbeddings).mock.calls[0]!;
+    expect(model).toBe('text-embedding-3-small');
+    expect(options.remote).toEqual({
+      baseUrl: 'http://localhost:8080/v1',
+      model: 'text-embedding-3-small',
+      apiKey: 'sk-test',
+    });
+  });
+
+  it('does not build a remote config when no provider is set', async () => {
+    const ctx = fakeCtx({ model: 'minilm' });
+
+    await command.execute!([undefined], { strategy: 'structured' } as never, ctx);
+
+    expect(buildEmbeddings).toHaveBeenCalledTimes(1);
+    const [, , , options] = vi.mocked(buildEmbeddings).mock.calls[0]!;
+    expect(options.remote).toBeUndefined();
+  });
+});

From 565ec44795a2acebb36a055681ffaf1a89e64c61 Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Wed, 1 Jul 2026 18:20:12 -0600
Subject: [PATCH 02/10] fix(search): route semantic search queries through the
 configured remote embedding provider
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

codegraph search (semantic/hybrid modes) and the semantic_search MCP tool
always embedded the query text with the local model, even when the stored
embeddings were built via a remote provider. That produced a dimension
mismatch or nonsense similarity scores instead of actually querying the
remote endpoint. Query embedding now uses the same provider that produced
the stored embeddings.

docs check acknowledged — README.md/configuration.md land in the next commit.

Impact: 5 functions changed, 9 affected
---
 src/domain/search/search/prepare.ts          |  5 +-
 src/domain/search/search/semantic.ts         | 33 ++++++--
 tests/search/embedding-remote-search.test.ts | 89 ++++++++++++++++++++
 3 files changed, 121 insertions(+), 6 deletions(-)
 create mode 100644 tests/search/embedding-remote-search.test.ts

diff --git a/src/domain/search/search/prepare.ts b/src/domain/search/search/prepare.ts
index 3907aa5b6..f5da7dc1e 100644
--- a/src/domain/search/search/prepare.ts
+++ b/src/domain/search/search/prepare.ts
@@ -20,6 +20,9 @@ export interface PreparedSearch {
   }>;
   modelKey: string | null;
   storedDim: number | null;
+  /** Raw model identifier recorded at embed time — set even when it isn't a
+   * local registry key (e.g. a remote provider's model name). */
+  storedModel: string | null;
 }
 
 export interface PrepareSearchOpts {
@@ -87,7 +90,7 @@ export function prepareSearch(
     let rows = db.prepare(sql).all(...params) as PreparedSearch['rows'];
     rows = applyFilters(rows, opts);
 
-    return { db, rows, modelKey, storedDim };
+    return { db, rows, modelKey, storedDim, storedModel };
   } catch (err) {
     db.close();
     throw err;
diff --git a/src/domain/search/search/semantic.ts b/src/domain/search/search/semantic.ts
index 2c0b82616..f376183fe 100644
--- a/src/domain/search/search/semantic.ts
+++ b/src/domain/search/search/semantic.ts
@@ -2,10 +2,33 @@ import { loadConfig } from '../../../infrastructure/config.js';
 import { warn } from '../../../infrastructure/logger.js';
 import type { BetterSqlite3Database, CodegraphConfig } from '../../../types.js';
 import { normalizeSymbol } from '../../queries.js';
-import { embed } from '../models.js';
+import { embed, MODELS } from '../models.js';
+import { embedRemote, resolveRemoteEmbeddingOptions } from '../providers/remote.js';
 import { cosineSim } from '../stores/sqlite-blob.js';
 import { type PreparedSearch, prepareSearch } from './prepare.js';
 
+/**
+ * Embed query text with whichever backend produced the stored embeddings.
+ * `modelKey` is a resolved local registry key (from `--model` or matched
+ * against `MODELS`), or an arbitrary identifier (an explicit `--model`
+ * override, or unmatched) when embeddings were built via a remote provider.
+ */
+async function embedQuery(
+  texts: string[],
+  config: CodegraphConfig,
+  modelKey: string | null,
+  storedModel: string | null,
+): Promise<{ vectors: Float32Array[]; dim: number }> {
+  const isKnownLocalModel = modelKey != null && modelKey in MODELS;
+  if (!isKnownLocalModel && config.embeddings?.provider === 'openai') {
+    const remoteModel = modelKey || storedModel;
+    if (remoteModel) {
+      return embedRemote(texts, resolveRemoteEmbeddingOptions(config, remoteModel));
+    }
+  }
+  return embed(texts, modelKey ?? undefined);
+}
+
 export interface SemanticSearchOpts {
   config?: CodegraphConfig;
   limit?: number;
@@ -61,13 +84,13 @@ export async function searchData(
 
   const prepared = prepareSearch(customDbPath, opts);
   if (!prepared) return null;
-  const { db, rows, modelKey, storedDim } = prepared;
+  const { db, rows, modelKey, storedDim, storedModel } = prepared;
 
   try {
     const {
       vectors: [queryVec],
       dim,
-    } = await embed([query], modelKey ?? undefined);
+    } = await embedQuery([query], config, modelKey, storedModel);
 
     if (checkDimensionMismatch(storedDim, dim)) return null;
 
@@ -192,10 +215,10 @@ export async function multiSearchData(
 
   const prepared = prepareSearch(customDbPath, opts);
   if (!prepared) return null;
-  const { db, rows, modelKey, storedDim } = prepared;
+  const { db, rows, modelKey, storedDim, storedModel } = prepared;
 
   try {
-    const { vectors: queryVecs, dim } = await embed(queries, modelKey ?? undefined);
+    const { vectors: queryVecs, dim } = await embedQuery(queries, config, modelKey, storedModel);
 
     warnOnSimilarQueries(queries, queryVecs as Float32Array[], similarityWarnThreshold);
 
diff --git a/tests/search/embedding-remote-search.test.ts b/tests/search/embedding-remote-search.test.ts
new file mode 100644
index 000000000..e1ef8fad4
--- /dev/null
+++ b/tests/search/embedding-remote-search.test.ts
@@ -0,0 +1,89 @@
+import fs from 'node:fs';
+import os from 'node:os';
+import path from 'node:path';
+import Database from 'better-sqlite3';
+import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, test, vi } from 'vitest';
+import { initSchema } from '../../src/db/index.js';
+import { buildEmbeddings, searchData } from '../../src/domain/search/index.js';
+
+// buildEmbeddings/searchData must never touch @huggingface/transformers when a
+// remote provider is configured for both the index and query embedding steps.
+vi.mock('@huggingface/transformers', () => {
+  throw new Error('local transformers pipeline should not be loaded on the remote path');
+});
+
+function insertNode(db, name, kind, file, line, endLine) {
+  return db
+    .prepare('INSERT INTO nodes (name, kind, file, line, end_line) VALUES (?, ?, ?, ?, ?)')
+    .run(name, kind, file, line, endLine).lastInsertRowid;
+}
+
+describe('semantic search against remotely-built embeddings', () => {
+  let tmpDir: string, dbPath: string;
+  const fetchMock = vi.fn();
+  const config = {
+    embeddings: { model: 'my-remote-model', llmProvider: null, provider: 'openai' },
+    llm: {
+      provider: null,
+      model: null,
+      baseUrl: 'http://localhost:9999/v1',
+      apiKey: 'sk-x',
+      apiKeyCommand: null,
+    },
+    search: { defaultMinScore: 0, rrfK: 60, topK: 15, similarityWarnThreshold: 0.85 },
+  } as never;
+
+  beforeAll(() => {
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-remote-search-'));
+    fs.writeFileSync(path.join(tmpDir, 'math.js'), 'export function add(a, b) { return a + b; }\n');
+
+    const dbDir = path.join(tmpDir, '.codegraph');
+    fs.mkdirSync(dbDir, { recursive: true });
+    dbPath = path.join(dbDir, 'graph.db');
+
+    const db = new Database(dbPath);
+    db.pragma('journal_mode = WAL');
+    initSchema(db);
+    insertNode(db, 'add', 'function', 'math.js', 1, 1);
+    db.close();
+  });
+
+  afterAll(() => {
+    if (tmpDir) fs.rmSync(tmpDir, { recursive: true, force: true });
+  });
+
+  beforeEach(() => {
+    vi.stubGlobal('fetch', fetchMock);
+    // Every call (index or query) gets the same fixed vector, so the indexed
+    // symbol always scores a perfect match against any query.
+    fetchMock.mockImplementation(async (_url, init) => {
+      const body = JSON.parse(init.body);
+      const data = body.input.map((_text: string, i: number) => ({
+        embedding: [1, 0, 0, 0],
+        index: i,
+      }));
+      return new Response(JSON.stringify({ data }), { status: 200 });
+    });
+  });
+
+  afterEach(() => {
+    vi.unstubAllGlobals();
+    fetchMock.mockReset();
+  });
+
+  test('query embedding is routed to the remote provider, not the local model', async () => {
+    await buildEmbeddings(tmpDir, 'my-remote-model', dbPath, {
+      remote: { baseUrl: 'http://localhost:9999/v1', model: 'my-remote-model', apiKey: 'sk-x' },
+    });
+
+    const result = await searchData('addition helper', dbPath, { config });
+
+    expect(result).not.toBeNull();
+    expect(result!.results.map((r) => r.name)).toContain('add');
+    // One call to build the index embedding, one to embed the query.
+    expect(fetchMock).toHaveBeenCalledTimes(2);
+    for (const call of fetchMock.mock.calls) {
+      expect(call[0]).toBe('http://localhost:9999/v1/embeddings');
+    }
+  });
+});

From 7348b2650ad84be04e963f8e9ef5776dd536d07e Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Wed, 1 Jul 2026 18:20:20 -0600
Subject: [PATCH 03/10] docs: document the remote embedding provider option

Covers embeddings.provider/llm.baseUrl config, the CODEGRAPH_LLM_BASE_URL
env override, and that codegraph search auto-routes queries through the
same remote endpoint.
---
 README.md                    |  2 +-
 docs/guides/configuration.md | 27 +++++++++++++++++++++++++--
 2 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index f54c56f5c..45c960cc8 100644
--- a/README.md
+++ b/README.md
@@ -398,7 +398,7 @@ codegraph cycles --functions   # Function-level cycles
 
 ### Semantic Search
 
-Local embeddings for every function, method, and class — search by natural language. Everything runs locally using [@huggingface/transformers](https://huggingface.co/docs/transformers.js) — no API keys needed.
+Local embeddings for every function, method, and class — search by natural language. Everything runs locally using [@huggingface/transformers](https://huggingface.co/docs/transformers.js) — no API keys needed. Prefer a remote or self-hosted model instead? Set `embeddings.provider: "openai"` and `llm.baseUrl` in your config to call any OpenAI-compatible `/embeddings` endpoint — see [configuration.md](docs/guides/configuration.md#embeddings-embeddings).
 
 ```bash
 codegraph embed                # Build embeddings (default: nomic)
diff --git a/docs/guides/configuration.md b/docs/guides/configuration.md
index f3d34c17f..2b163217a 100644
--- a/docs/guides/configuration.md
+++ b/docs/guides/configuration.md
@@ -173,12 +173,34 @@ Defaults applied to graph queries when the CLI flag is omitted.
 
 ## Embeddings (`embeddings`)
 
-Controls the local embedding model used by `codegraph embed` and `codegraph search`.
+Controls the embedding backend used by `codegraph embed` and `codegraph search`.
 
 | Key | Type | Default | Purpose |
 |-----|------|---------|---------|
-| `model` | `string \| null` | `null` | Model registry key (see `src/domain/search/models.ts`). When `null`, `codegraph embed` reuses the model already stored in the database, or falls back to the built-in default (`"nomic"`) for fresh graphs. Common options: `"nomic"`, `"nomic-v1.5"`, `"bge-large"`. |
+| `model` | `string \| null` | `null` | When `provider` is `null` (local, default): a model registry key (see `src/domain/search/models.ts`). When `null`, `codegraph embed` reuses the model already stored in the database, or falls back to the built-in default (`"nomic"`) for fresh graphs. Common options: `"nomic"`, `"nomic-v1.5"`, `"bge-large"`. When `provider` is `"openai"`: the model identifier your endpoint expects (e.g. `"text-embedding-3-small"`, or whatever name your self-hosted server registers) — required in that case. |
 | `llmProvider` | `string \| null` | `null` | Optional LLM provider for query expansion. `null` disables it. |
+| `provider` | `string \| null` | `null` | Embedding backend. `null` (default) uses the local bundled model via `@huggingface/transformers`. `"openai"` calls a remote OpenAI-compatible `/embeddings` endpoint configured via `llm.baseUrl` — this covers self-hosted servers (text-embeddings-inference, Ollama, LM Studio, vLLM, etc.), not just OpenAI itself. |
+
+### Remote embedding provider
+
+Point `codegraph embed` at a self-hosted or third-party embedding endpoint instead of downloading a local model:
+
+```json
+{
+  "embeddings": {
+    "provider": "openai",
+    "model": "my-embedding-model"
+  },
+  "llm": {
+    "baseUrl": "http://my-tailnet-host:8080/v1",
+    "apiKeyCommand": "op read op://vault/embeddings/api-key"
+  }
+}
+```
+
+The endpoint must accept `POST <baseUrl>/embeddings` with `{ "model": "...", "input": ["text", ...] }` and return `{ "data": [{ "embedding": [...], "index": 0 }, ...] }` — the same shape OpenAI's API uses. `llm.apiKey`/`llm.apiKeyCommand` are optional; omit them for endpoints that don't require auth. Vector dimensionality is read from the response, so there's no model registry to keep in sync.
+
+`codegraph search` (semantic and hybrid modes) and the `semantic_search` MCP tool automatically embed the query through the same remote endpoint when the stored embeddings were built with `embeddings.provider: "openai"` — no extra configuration needed.
 
 ---
 
@@ -215,6 +237,7 @@ These env vars override the corresponding `llm.*` fields when set:
 - `CODEGRAPH_LLM_PROVIDER` → `llm.provider`
 - `CODEGRAPH_LLM_MODEL` → `llm.model`
 - `CODEGRAPH_LLM_API_KEY` → `llm.apiKey`
+- `CODEGRAPH_LLM_BASE_URL` → `llm.baseUrl`
 
 ---
 

From 68cddb66d7ee417d5b1224986cac5f067a88d17d Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Wed, 1 Jul 2026 19:32:49 -0600
Subject: [PATCH 04/10] docs: cross-link llm credentials section to the remote
 embedding provider
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The llm.* fields (baseUrl, apiKey, apiKeyCommand) are now reused by
embeddings.provider: "openai", not just chat-completion features —
the LLM credentials section didn't mention that reuse.
---
 docs/guides/configuration.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/guides/configuration.md b/docs/guides/configuration.md
index 2b163217a..f75efb8f4 100644
--- a/docs/guides/configuration.md
+++ b/docs/guides/configuration.md
@@ -206,7 +206,7 @@ The endpoint must accept `POST <baseUrl>/embeddings` with `{ "model": "...", "in
 
 ## LLM credentials (`llm`)
 
-Used by features that call out to a chat-completion API (e.g. query expansion). Codegraph never hardcodes a provider — you pick one.
+Used by features that call out to a chat-completion API (e.g. query expansion), and reused by the [remote embedding provider](#embeddings-embeddings) (`baseUrl`, `apiKey`, `apiKeyCommand`) so credentials aren't duplicated across features. Codegraph never hardcodes a provider — you pick one.
 
 | Key | Type | Default | Purpose |
 |-----|------|---------|---------|

From 91eb0da9c70eae12c1ae2ada86529ffe843ef0a5 Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Wed, 1 Jul 2026 19:39:57 -0600
Subject: [PATCH 05/10] fix: add request timeout and cross-batch dimension
 validation to remote embedding provider (#1716)

Greptile review: embedRemote's fetch calls had no timeout, so an
unresponsive self-hosted server would hang the process indefinitely with
no actionable error. Wrap each batch request in an AbortController with a
configurable ceiling (llm.requestTimeoutMs, default 120s) and throw an
EngineError describing the elapsed time on abort.

Also validate that every vector in a response has the same dimension as
the first one seen; a misbehaving server that returns mixed-length
embeddings across (or within) a batch now fails fast with a clear error
instead of silently corrupting `dim` and producing garbage similarity
scores or a TypedArray range error far from the source.

Impact: 4 functions changed, 9 affected
---
 docs/guides/configuration.md                  |  1 +
 src/domain/search/providers/remote.ts         | 38 ++++++++++++++++++-
 src/infrastructure/config.ts                  |  1 +
 src/types.ts                                  |  6 +++
 .../search/embedding-remote-provider.test.ts  | 35 +++++++++++++++++
 tests/unit/config.test.ts                     |  1 +
 6 files changed, 80 insertions(+), 2 deletions(-)

diff --git a/docs/guides/configuration.md b/docs/guides/configuration.md
index f75efb8f4..02e08c5d6 100644
--- a/docs/guides/configuration.md
+++ b/docs/guides/configuration.md
@@ -215,6 +215,7 @@ Used by features that call out to a chat-completion API (e.g. query expansion),
 | `baseUrl` | `string \| null` | `null` | Override the provider's base URL (for compatible proxies, local servers, etc.). |
 | `apiKey` | `string \| null` | `null` | Plaintext API key. Prefer `apiKeyCommand` or env vars over this. |
 | `apiKeyCommand` | `string \| null` | `null` | Shell-out command that prints the key to stdout. Split on whitespace and run via `execFileSync` (no shell — `$(...)`, pipes, globs, and variable expansion are not supported). 10s timeout, 64 KB max output. |
+| `requestTimeoutMs` | `number` | `120000` | Per-request timeout for remote HTTP calls made against `baseUrl` (currently the [remote embedding provider](#embeddings-embeddings)). Aborts and throws if a self-hosted server hangs mid-request instead of blocking indefinitely. |
 
 Resolution order (first non-empty wins): `apiKeyCommand` output → `CODEGRAPH_LLM_API_KEY` env var → `apiKey` field.
 
diff --git a/src/domain/search/providers/remote.ts b/src/domain/search/providers/remote.ts
index b6ba46c73..372484f86 100644
--- a/src/domain/search/providers/remote.ts
+++ b/src/domain/search/providers/remote.ts
@@ -18,8 +18,17 @@ export interface RemoteEmbeddingOptions {
   baseUrl: string;
   model: string;
   apiKey?: string | null;
+  /** Per-request timeout in ms. Defaults to `DEFAULT_REQUEST_TIMEOUT_MS` when omitted. */
+  timeoutMs?: number;
 }
 
+/**
+ * Fallback per-request timeout when `RemoteEmbeddingOptions.timeoutMs` isn't
+ * supplied (e.g. direct `embedRemote` calls that bypass config resolution).
+ * Mirrors `DEFAULTS.llm.requestTimeoutMs` in `infrastructure/config.ts`.
+ */
+const DEFAULT_REQUEST_TIMEOUT_MS = 120_000;
+
 interface OpenAIEmbeddingItem {
   embedding: number[];
   index: number;
@@ -52,7 +61,12 @@ export function resolveRemoteEmbeddingOptions(
         '(config key "llm.baseUrl" or env var CODEGRAPH_LLM_BASE_URL).',
     );
   }
-  return { baseUrl, model, apiKey: config.llm.apiKey };
+  return {
+    baseUrl,
+    model,
+    apiKey: config.llm.apiKey,
+    timeoutMs: config.llm.requestTimeoutMs,
+  };
 }
 
 /**
@@ -71,23 +85,36 @@ export async function embedRemote(
   if (options.apiKey) headers.Authorization = `Bearer ${options.apiKey}`;
 
   const results: Float32Array[] = [];
+  const timeoutMs = options.timeoutMs ?? DEFAULT_REQUEST_TIMEOUT_MS;
   let dim = 0;
 
   for (let i = 0; i < texts.length; i += REMOTE_BATCH_SIZE) {
     const batch = texts.slice(i, i + REMOTE_BATCH_SIZE);
 
+    const controller = new AbortController();
+    const timeoutHandle = setTimeout(() => controller.abort(), timeoutMs);
+
     let response: Response;
     try {
       response = await fetch(url, {
         method: 'POST',
         headers,
         body: JSON.stringify({ model: options.model, input: batch }),
+        signal: controller.signal,
       });
     } catch (err: unknown) {
+      if (err instanceof Error && err.name === 'AbortError') {
+        throw new EngineError(
+          `Remote embedding endpoint ${url} did not respond within ${timeoutMs}ms ` +
+            `(batch ${Math.floor(i / REMOTE_BATCH_SIZE) + 1})`,
+        );
+      }
       throw new EngineError(
         `Failed to reach remote embedding endpoint at ${url}: ${err instanceof Error ? err.message : String(err)}`,
         { cause: err instanceof Error ? err : undefined },
       );
+    } finally {
+      clearTimeout(timeoutHandle);
     }
 
     if (!response.ok) {
@@ -110,7 +137,14 @@ export async function embedRemote(
     const sorted = [...json.data].sort((a, b) => a.index - b.index);
     for (const item of sorted) {
       const vec = Float32Array.from(item.embedding);
-      if (dim === 0) dim = vec.length;
+      if (dim === 0) {
+        dim = vec.length;
+      } else if (vec.length !== dim) {
+        throw new EngineError(
+          `Remote embedding endpoint ${url} returned inconsistent vector dimensions ` +
+            `(expected ${dim}, got ${vec.length} for response item index ${item.index})`,
+        );
+      }
       results.push(vec);
     }
 
diff --git a/src/infrastructure/config.ts b/src/infrastructure/config.ts
index 7958aa6a1..c8a146873 100644
--- a/src/infrastructure/config.ts
+++ b/src/infrastructure/config.ts
@@ -49,6 +49,7 @@ export const DEFAULTS = {
     baseUrl: null as string | null,
     apiKey: null as string | null,
     apiKeyCommand: null as string | null,
+    requestTimeoutMs: 120_000,
   },
   search: { defaultMinScore: 0.2, rrfK: 60, topK: 15, similarityWarnThreshold: 0.85 },
   ci: { failOnCycles: false, impactThreshold: null as number | null },
diff --git a/src/types.ts b/src/types.ts
index 3fcabd8b1..9f85a9f33 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -1433,6 +1433,12 @@ export interface CodegraphConfig {
      * values are rejected with a `ConfigError` at load time.
      */
     apiKeyCommand: string | null;
+    /**
+     * Per-request timeout (ms) for remote HTTP calls made against `llm.baseUrl`
+     * (currently the remote embedding provider). Prevents an unresponsive
+     * self-hosted server from hanging the process indefinitely. Default: 120000.
+     */
+    requestTimeoutMs: number;
   };
 
   search: {
diff --git a/tests/search/embedding-remote-provider.test.ts b/tests/search/embedding-remote-provider.test.ts
index c1910a271..63f143140 100644
--- a/tests/search/embedding-remote-provider.test.ts
+++ b/tests/search/embedding-remote-provider.test.ts
@@ -15,6 +15,7 @@ describe('resolveRemoteEmbeddingOptions', () => {
           baseUrl: 'http://localhost:8080/v1',
           apiKey: 'sk-test',
           apiKeyCommand: null,
+          requestTimeoutMs: 120_000,
         },
       },
       'my-embed-model',
@@ -23,6 +24,7 @@ describe('resolveRemoteEmbeddingOptions', () => {
       baseUrl: 'http://localhost:8080/v1',
       model: 'my-embed-model',
       apiKey: 'sk-test',
+      timeoutMs: 120_000,
     });
   });
 
@@ -36,6 +38,7 @@ describe('resolveRemoteEmbeddingOptions', () => {
             baseUrl: null,
             apiKey: null,
             apiKeyCommand: null,
+            requestTimeoutMs: 120_000,
           },
         },
         'my-embed-model',
@@ -166,4 +169,36 @@ describe('embedRemote', () => {
       EngineError,
     );
   });
+
+  it('aborts and throws EngineError when a request exceeds timeoutMs', async () => {
+    fetchMock.mockImplementation((_url, init: { signal: AbortSignal }) => {
+      return new Promise((_resolve, reject) => {
+        init.signal.addEventListener('abort', () => {
+          const err = new Error('This operation was aborted');
+          err.name = 'AbortError';
+          reject(err);
+        });
+      });
+    });
+    await expect(
+      embedRemote(['a'], { baseUrl: 'http://x', model: 'm', timeoutMs: 10 }),
+    ).rejects.toThrow(/did not respond within 10ms/);
+  });
+
+  it('throws EngineError when a later item has a different vector dimension than earlier items', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(
+        JSON.stringify({
+          data: [
+            { embedding: [1, 2, 3], index: 0 },
+            { embedding: [1, 2], index: 1 },
+          ],
+        }),
+        { status: 200 },
+      ),
+    );
+    await expect(embedRemote(['a', 'b'], { baseUrl: 'http://x', model: 'm' })).rejects.toThrow(
+      /inconsistent vector dimensions/,
+    );
+  });
 });
diff --git a/tests/unit/config.test.ts b/tests/unit/config.test.ts
index 7ba43397d..7979a6313 100644
--- a/tests/unit/config.test.ts
+++ b/tests/unit/config.test.ts
@@ -68,6 +68,7 @@ describe('DEFAULTS', () => {
       baseUrl: null,
       apiKey: null,
       apiKeyCommand: null,
+      requestTimeoutMs: 120_000,
     });
   });
 

From c383e9bb4af45c2ca97745bc1567a60d22f7568c Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Wed, 1 Jul 2026 19:40:08 -0600
Subject: [PATCH 06/10] fix: route query embeddings using embed-time provider
 metadata, not live config (#1716)

Greptile review: embedQuery() decided which backend to call by inspecting
config.embeddings?.provider at search time rather than what actually
produced the stored embeddings. If a DB was embedded with provider:
"openai" and the config later drifted (e.g. the field was cleared on a
different machine), routing silently fell back to the local model. When
the local and remote models happen to share an output dimension, the
existing dimension-mismatch guard wouldn't catch it, and search would
compute cosine similarity between incompatible vector spaces without any
error.

Record the provider ("openai" or omitted for local) in embedding_meta at
persist time, thread it through prepareSearch as `storedProvider`, and
key embedQuery's routing decision off that instead of the live config.
The live config is still used to resolve where to send the request
(llm.baseUrl/apiKey), just not whether to send it there at all.

Impact: 7 functions changed, 11 affected
---
 src/domain/search/generator.ts               | 14 ++++++++++-
 src/domain/search/search/prepare.ts          |  9 ++++++-
 src/domain/search/search/semantic.ts         | 25 ++++++++++++++++----
 tests/search/embedding-remote-search.test.ts | 25 ++++++++++++++++++++
 4 files changed, 66 insertions(+), 7 deletions(-)

diff --git a/src/domain/search/generator.ts b/src/domain/search/generator.ts
index 9cf5d25ee..d9b47cf0a 100644
--- a/src/domain/search/generator.ts
+++ b/src/domain/search/generator.ts
@@ -172,6 +172,7 @@ function persistEmbeddings(
   dim: number,
   modelName: string,
   strategy: EmbeddingStrategy,
+  provider: string | null,
 ): void {
   const { nodeIds, nodeNames, previews, texts, overflowCount } = prepared;
   const insert = db.prepare(
@@ -194,6 +195,12 @@ function persistEmbeddings(
     if (overflowCount > 0) {
       insertMeta.run('truncated_count', String(overflowCount));
     }
+    // Record which backend produced these vectors so search-time routing
+    // (`embedQuery` in `search/semantic.ts`) can key off embed-time truth
+    // instead of the live config, which may have drifted since `embed` ran.
+    if (provider) {
+      insertMeta.run('provider', provider);
+    }
   });
   insertAll();
 }
@@ -275,7 +282,12 @@ export async function buildEmbeddings(
     ? await embedRemote(prepared.texts, options.remote)
     : await embed(prepared.texts, modelKey);
 
-  persistEmbeddings(db, prepared, vectors as Float32Array[], dim, displayName, strategy);
+  // Only "openai" (OpenAI-compatible /embeddings) is currently supported as a
+  // remote provider — `options.remote` being set implies it. Recorded so
+  // search-time routing doesn't have to trust the live config (see
+  // `embedQuery` in `search/semantic.ts`).
+  const provider = options.remote ? 'openai' : null;
+  persistEmbeddings(db, prepared, vectors as Float32Array[], dim, displayName, strategy, provider);
 
   console.log(
     `\nStored ${vectors.length} embeddings (${dim}d, ${displayName}, strategy: ${strategy}) in graph.db`,
diff --git a/src/domain/search/search/prepare.ts b/src/domain/search/search/prepare.ts
index f5da7dc1e..a28330ced 100644
--- a/src/domain/search/search/prepare.ts
+++ b/src/domain/search/search/prepare.ts
@@ -23,6 +23,12 @@ export interface PreparedSearch {
   /** Raw model identifier recorded at embed time — set even when it isn't a
    * local registry key (e.g. a remote provider's model name). */
   storedModel: string | null;
+  /**
+   * Embedding backend recorded at embed time (e.g. `"openai"`), or `null` for
+   * the local bundled model. Search-time routing must key off this rather
+   * than the live config — the config may have changed since `embed` ran.
+   */
+  storedProvider: string | null;
 }
 
 export interface PrepareSearchOpts {
@@ -47,6 +53,7 @@ export function prepareSearch(
     }
 
     const storedModel = getEmbeddingMeta(db, 'model') || null;
+    const storedProvider = getEmbeddingMeta(db, 'provider') || null;
     const dimStr = getEmbeddingMeta(db, 'dim');
     const storedDim = dimStr ? parseInt(dimStr, 10) : null;
 
@@ -90,7 +97,7 @@ export function prepareSearch(
     let rows = db.prepare(sql).all(...params) as PreparedSearch['rows'];
     rows = applyFilters(rows, opts);
 
-    return { db, rows, modelKey, storedDim, storedModel };
+    return { db, rows, modelKey, storedDim, storedModel, storedProvider };
   } catch (err) {
     db.close();
     throw err;
diff --git a/src/domain/search/search/semantic.ts b/src/domain/search/search/semantic.ts
index f376183fe..07f8f4bb2 100644
--- a/src/domain/search/search/semantic.ts
+++ b/src/domain/search/search/semantic.ts
@@ -12,15 +12,24 @@ import { type PreparedSearch, prepareSearch } from './prepare.js';
  * `modelKey` is a resolved local registry key (from `--model` or matched
  * against `MODELS`), or an arbitrary identifier (an explicit `--model`
  * override, or unmatched) when embeddings were built via a remote provider.
+ *
+ * Routing is decided from `storedProvider` — the provider recorded in
+ * `embedding_meta` at embed time — rather than the live config. If the
+ * config drifted after `embed` ran (e.g. `embeddings.provider` unset later
+ * on a different machine), trusting live config here would silently route
+ * the query through the wrong backend instead of the one that actually
+ * produced the stored vectors, which can produce misleading similarity
+ * scores rather than an obvious error.
  */
 async function embedQuery(
   texts: string[],
   config: CodegraphConfig,
   modelKey: string | null,
   storedModel: string | null,
+  storedProvider: string | null,
 ): Promise<{ vectors: Float32Array[]; dim: number }> {
   const isKnownLocalModel = modelKey != null && modelKey in MODELS;
-  if (!isKnownLocalModel && config.embeddings?.provider === 'openai') {
+  if (!isKnownLocalModel && storedProvider === 'openai') {
     const remoteModel = modelKey || storedModel;
     if (remoteModel) {
       return embedRemote(texts, resolveRemoteEmbeddingOptions(config, remoteModel));
@@ -84,13 +93,13 @@ export async function searchData(
 
   const prepared = prepareSearch(customDbPath, opts);
   if (!prepared) return null;
-  const { db, rows, modelKey, storedDim, storedModel } = prepared;
+  const { db, rows, modelKey, storedDim, storedModel, storedProvider } = prepared;
 
   try {
     const {
       vectors: [queryVec],
       dim,
-    } = await embedQuery([query], config, modelKey, storedModel);
+    } = await embedQuery([query], config, modelKey, storedModel, storedProvider);
 
     if (checkDimensionMismatch(storedDim, dim)) return null;
 
@@ -215,10 +224,16 @@ export async function multiSearchData(
 
   const prepared = prepareSearch(customDbPath, opts);
   if (!prepared) return null;
-  const { db, rows, modelKey, storedDim, storedModel } = prepared;
+  const { db, rows, modelKey, storedDim, storedModel, storedProvider } = prepared;
 
   try {
-    const { vectors: queryVecs, dim } = await embedQuery(queries, config, modelKey, storedModel);
+    const { vectors: queryVecs, dim } = await embedQuery(
+      queries,
+      config,
+      modelKey,
+      storedModel,
+      storedProvider,
+    );
 
     warnOnSimilarQueries(queries, queryVecs as Float32Array[], similarityWarnThreshold);
 
diff --git a/tests/search/embedding-remote-search.test.ts b/tests/search/embedding-remote-search.test.ts
index e1ef8fad4..5c1c7850e 100644
--- a/tests/search/embedding-remote-search.test.ts
+++ b/tests/search/embedding-remote-search.test.ts
@@ -86,4 +86,29 @@ describe('semantic search against remotely-built embeddings', () => {
       expect(call[0]).toBe('http://localhost:9999/v1/embeddings');
     }
   });
+
+  test('query embedding still routes remotely when embeddings.provider config drifts after embed', async () => {
+    await buildEmbeddings(tmpDir, 'my-remote-model', dbPath, {
+      remote: { baseUrl: 'http://localhost:9999/v1', model: 'my-remote-model', apiKey: 'sk-x' },
+    });
+
+    // Simulate config drift: whoever/whatever runs `search` no longer has
+    // embeddings.provider set to "openai" (e.g. cleared on a CI machine, or a
+    // different .codegraphrc.json applies). Routing must still honor the
+    // provider recorded in embedding_meta at embed time, not this live value
+    // — otherwise the query would silently fall back to the local model.
+    const driftedConfig = {
+      ...config,
+      embeddings: { model: 'my-remote-model', llmProvider: null, provider: null },
+    } as never;
+
+    const result = await searchData('addition helper', dbPath, { config: driftedConfig });
+
+    expect(result).not.toBeNull();
+    expect(result!.results.map((r) => r.name)).toContain('add');
+    expect(fetchMock).toHaveBeenCalledTimes(2);
+    for (const call of fetchMock.mock.calls) {
+      expect(call[0]).toBe('http://localhost:9999/v1/embeddings');
+    }
+  });
 });

From f1030adf168665608d2f058400fc52ba3ae593a3 Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Wed, 1 Jul 2026 19:42:48 -0600
Subject: [PATCH 07/10] fix: validate the embedding field before
 Float32Array.from() in remote provider (#1716)

Greptile review: the response-shape check only verified json.data.length
matched the batch size, not that each item actually carried a valid
embedding array. A conforming-length response with one malformed entry
(e.g. { "index": 0 } with no embedding) made Float32Array.from(undefined)
throw a raw TypeError instead of the wrapped EngineError used everywhere
else in this module, producing a confusing, untraceable failure.

Impact: 1 functions changed, 9 affected
---
 src/domain/search/providers/remote.ts          |  6 ++++++
 tests/search/embedding-remote-provider.test.ts | 10 ++++++++++
 2 files changed, 16 insertions(+)

diff --git a/src/domain/search/providers/remote.ts b/src/domain/search/providers/remote.ts
index 372484f86..545a1b1d9 100644
--- a/src/domain/search/providers/remote.ts
+++ b/src/domain/search/providers/remote.ts
@@ -136,6 +136,12 @@ export async function embedRemote(
     // OpenAI-compatible servers aren't guaranteed to preserve input order — sort by index.
     const sorted = [...json.data].sort((a, b) => a.index - b.index);
     for (const item of sorted) {
+      if (!Array.isArray(item.embedding)) {
+        throw new EngineError(
+          `Remote embedding endpoint ${url} returned an item with a missing or non-array ` +
+            `"embedding" field (index ${item.index})`,
+        );
+      }
       const vec = Float32Array.from(item.embedding);
       if (dim === 0) {
         dim = vec.length;
diff --git a/tests/search/embedding-remote-provider.test.ts b/tests/search/embedding-remote-provider.test.ts
index 63f143140..43591cad6 100644
--- a/tests/search/embedding-remote-provider.test.ts
+++ b/tests/search/embedding-remote-provider.test.ts
@@ -201,4 +201,14 @@ describe('embedRemote', () => {
       /inconsistent vector dimensions/,
     );
   });
+
+  it('throws EngineError instead of a raw TypeError when an item is missing the embedding field', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify({ data: [{ index: 0 }] }), { status: 200 }),
+    );
+    await expect(embedRemote(['a'], { baseUrl: 'http://x', model: 'm' })).rejects.toMatchObject({
+      name: 'EngineError',
+      message: expect.stringContaining('missing or non-array "embedding" field'),
+    });
+  });
 });

From a68dd67a0d28af7b14b8b6241c6f7494e214e7bf Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Wed, 1 Jul 2026 19:57:34 -0600
Subject: [PATCH 08/10] test: cover embedding_meta provider bookkeeping across
 a remote-to-local switch

Verifies that a full local-model rebuild never inherits a stale 'openai'
provider marker from a prior remote-provider build, since buildEmbeddings
always wipes embedding_meta before repopulating it.
---
 .../embedding-provider-metadata.test.ts       | 91 +++++++++++++++++++
 1 file changed, 91 insertions(+)
 create mode 100644 tests/search/embedding-provider-metadata.test.ts

diff --git a/tests/search/embedding-provider-metadata.test.ts b/tests/search/embedding-provider-metadata.test.ts
new file mode 100644
index 000000000..6fcc0696c
--- /dev/null
+++ b/tests/search/embedding-provider-metadata.test.ts
@@ -0,0 +1,91 @@
+import fs from 'node:fs';
+import os from 'node:os';
+import path from 'node:path';
+import Database from 'better-sqlite3';
+import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, test, vi } from 'vitest';
+import { initSchema } from '../../src/db/index.js';
+
+// Local pipeline mock — needed because this suite switches back to the local
+// model after a remote run, unlike the other embedding-remote-*.test.ts files
+// which only ever exercise the remote path and mock transformers to throw.
+vi.mock('@huggingface/transformers', () => ({
+  pipeline: async () => async (batch) => {
+    const dim = 4;
+    const data = new Float32Array(dim * batch.length);
+    for (let t = 0; t < batch.length; t++) {
+      data[t * dim] = 0.5;
+    }
+    return { data };
+  },
+  cos_sim: () => 0,
+}));
+
+import { buildEmbeddings } from '../../src/domain/search/index.js';
+
+function insertNode(db, name, kind, file, line, endLine) {
+  return db
+    .prepare('INSERT INTO nodes (name, kind, file, line, end_line) VALUES (?, ?, ?, ?, ?)')
+    .run(name, kind, file, line, endLine).lastInsertRowid;
+}
+
+function getProviderMeta(dbPath: string): string | undefined {
+  const db = new Database(dbPath, { readonly: true });
+  const row = db.prepare("SELECT value FROM embedding_meta WHERE key = 'provider'").get() as
+    | { value: string }
+    | undefined;
+  db.close();
+  return row?.value;
+}
+
+describe('embedding_meta provider bookkeeping across provider switches', () => {
+  let tmpDir: string, dbPath: string;
+  const fetchMock = vi.fn();
+
+  beforeAll(() => {
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-provider-meta-'));
+    fs.writeFileSync(path.join(tmpDir, 'math.js'), 'export function add(a, b) { return a + b; }\n');
+
+    const dbDir = path.join(tmpDir, '.codegraph');
+    fs.mkdirSync(dbDir, { recursive: true });
+    dbPath = path.join(dbDir, 'graph.db');
+
+    const db = new Database(dbPath);
+    db.pragma('journal_mode = WAL');
+    initSchema(db);
+    insertNode(db, 'add', 'function', 'math.js', 1, 1);
+    db.close();
+  });
+
+  afterAll(() => {
+    if (tmpDir) fs.rmSync(tmpDir, { recursive: true, force: true });
+  });
+
+  beforeEach(() => {
+    vi.stubGlobal('fetch', fetchMock);
+    fetchMock.mockResolvedValue(
+      new Response(JSON.stringify({ data: [{ embedding: [0.1, 0.2, 0.3, 0.4], index: 0 }] }), {
+        status: 200,
+      }),
+    );
+  });
+
+  afterEach(() => {
+    vi.unstubAllGlobals();
+    fetchMock.mockReset();
+  });
+
+  test('a full rebuild with the local model does not carry over a prior remote provider value', async () => {
+    // `buildEmbeddings` always deletes every embedding_meta row up front
+    // (loadNodesByFile) before persistEmbeddings writes fresh ones, so a
+    // later local-model build can never inherit a stale 'openai' marker from
+    // an earlier remote build — this test locks in that invariant.
+    await buildEmbeddings(tmpDir, 'my-remote-model', dbPath, {
+      remote: { baseUrl: 'http://localhost:9999/v1', model: 'my-remote-model', apiKey: 'sk-x' },
+    });
+    expect(getProviderMeta(dbPath)).toBe('openai');
+
+    await buildEmbeddings(tmpDir, 'minilm', dbPath, {});
+
+    expect(getProviderMeta(dbPath)).not.toBe('openai');
+  });
+});

From 4fcdc9cb043585da78e54f0915604e8984f8af2a Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Wed, 1 Jul 2026 20:23:09 -0600
Subject: [PATCH 09/10] fix(cli): avoid printing literal "null" in the models
 banner

codegraph models has no validate() gate like embed does, so a config
with embeddings.provider set but embeddings.model unset would
interpolate the null model straight into the banner text.

Impact: 2 functions changed, 0 affected
---
 src/cli/commands/models.ts        |  3 ++-
 tests/unit/models-command.test.ts | 42 +++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 1 deletion(-)
 create mode 100644 tests/unit/models-command.test.ts

diff --git a/src/cli/commands/models.ts b/src/cli/commands/models.ts
index f24fa23ea..2bf2854b8 100644
--- a/src/cli/commands/models.ts
+++ b/src/cli/commands/models.ts
@@ -9,9 +9,10 @@ export const command: CommandDefinition = {
     const defaultModel = (embeddingsConfig?.model as string) || DEFAULT_MODEL;
 
     if (embeddingsConfig?.provider) {
+      const remoteModel = embeddingsConfig.model || '(not configured — set embeddings.model)';
       console.log(
         `\nembeddings.provider is set to "${embeddingsConfig.provider}" — codegraph embed will call ` +
-          `model "${embeddingsConfig.model}" at llm.baseUrl instead of a local model below.`,
+          `model "${remoteModel}" at llm.baseUrl instead of a local model below.`,
       );
     }
     console.log('\nAvailable embedding models:\n');
diff --git a/tests/unit/models-command.test.ts b/tests/unit/models-command.test.ts
new file mode 100644
index 000000000..9f642bbb7
--- /dev/null
+++ b/tests/unit/models-command.test.ts
@@ -0,0 +1,42 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+import { command } from '../../src/cli/commands/models.js';
+
+function fakeCtx(embeddings: Record<string, unknown>) {
+  return {
+    config: {
+      embeddings: { model: null, llmProvider: null, provider: null, ...embeddings },
+    },
+  } as never;
+}
+
+describe('models command', () => {
+  let logSpy: ReturnType<typeof vi.spyOn>;
+
+  beforeEach(() => {
+    logSpy = vi.spyOn(console, 'log').mockImplementation(() => undefined);
+  });
+
+  afterEach(() => {
+    logSpy.mockRestore();
+  });
+
+  it('does not print the literal string "null" when a remote provider has no model configured', () => {
+    command.execute!([], {} as never, fakeCtx({ provider: 'openai' }));
+
+    const banner = logSpy.mock.calls.map((call) => call[0]).find((line) => /openai/.test(line));
+    expect(banner).toBeDefined();
+    expect(banner).not.toMatch(/model "null"/);
+    expect(banner).toMatch(/not configured/);
+  });
+
+  it('prints the configured model name when a remote provider has a model set', () => {
+    command.execute!(
+      [],
+      {} as never,
+      fakeCtx({ provider: 'openai', model: 'text-embedding-3-small' }),
+    );
+
+    const banner = logSpy.mock.calls.map((call) => call[0]).find((line) => /openai/.test(line));
+    expect(banner).toMatch(/model "text-embedding-3-small"/);
+  });
+});

From 4e54788aa5093680a6ff76bcee611c62a75e12b1 Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Wed, 1 Jul 2026 20:23:16 -0600
Subject: [PATCH 10/10] test: verify requestTimeoutMs is actually threaded into
 remote embed options

fakeCtx's llm defaults omitted requestTimeoutMs, so timeoutMs was
undefined on both sides of the toEqual comparison and the assertion
passed without ever checking the value was threaded through.
---
 tests/unit/embed-command.test.ts | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/unit/embed-command.test.ts b/tests/unit/embed-command.test.ts
index 482b4e995..8c85595aa 100644
--- a/tests/unit/embed-command.test.ts
+++ b/tests/unit/embed-command.test.ts
@@ -90,7 +90,7 @@ describe('embed command execute()', () => {
   it('passes a resolved remote config through to buildEmbeddings when provider is "openai"', async () => {
     const ctx = fakeCtx(
       { provider: 'openai', model: 'text-embedding-3-small' },
-      { baseUrl: 'http://localhost:8080/v1', apiKey: 'sk-test' },
+      { baseUrl: 'http://localhost:8080/v1', apiKey: 'sk-test', requestTimeoutMs: 5000 },
     );
 
     await command.execute!([undefined], { strategy: 'structured' } as never, ctx);
@@ -102,6 +102,7 @@ describe('embed command execute()', () => {
       baseUrl: 'http://localhost:8080/v1',
       model: 'text-embedding-3-small',
       apiKey: 'sk-test',
+      timeoutMs: 5000,
     });
   });