optave · carlos-alm · Jul 2, 2026 · Jul 2, 2026 · Jul 2, 2026 · Jul 2, 2026
diff --git a/README.md b/README.md
@@ -398,7 +398,7 @@ codegraph cycles --functions   # Function-level cycles
 
 ### Semantic Search
 
-Local embeddings for every function, method, and class — search by natural language. Everything runs locally using [@huggingface/transformers](https://huggingface.co/docs/transformers.js) — no API keys needed.
+Local embeddings for every function, method, and class — search by natural language. Everything runs locally using [@huggingface/transformers](https://huggingface.co/docs/transformers.js) — no API keys needed. Prefer a remote or self-hosted model instead? Set `embeddings.provider: "openai"` and `llm.baseUrl` in your config to call any OpenAI-compatible `/embeddings` endpoint — see [configuration.md](docs/guides/configuration.md#embeddings-embeddings).
 
 ```bash
 codegraph embed                # Build embeddings (default: nomic)

diff --git a/docs/guides/configuration.md b/docs/guides/configuration.md
@@ -173,18 +173,40 @@ Defaults applied to graph queries when the CLI flag is omitted.
 
 ## Embeddings (`embeddings`)
 
-Controls the local embedding model used by `codegraph embed` and `codegraph search`.
+Controls the embedding backend used by `codegraph embed` and `codegraph search`.
 
 | Key | Type | Default | Purpose |
 |-----|------|---------|---------|
-| `model` | `string \| null` | `null` | Model registry key (see `src/domain/search/models.ts`). When `null`, `codegraph embed` reuses the model already stored in the database, or falls back to the built-in default (`"nomic"`) for fresh graphs. Common options: `"nomic"`, `"nomic-v1.5"`, `"bge-large"`. |
+| `model` | `string \| null` | `null` | When `provider` is `null` (local, default): a model registry key (see `src/domain/search/models.ts`). When `null`, `codegraph embed` reuses the model already stored in the database, or falls back to the built-in default (`"nomic"`) for fresh graphs. Common options: `"nomic"`, `"nomic-v1.5"`, `"bge-large"`. When `provider` is `"openai"`: the model identifier your endpoint expects (e.g. `"text-embedding-3-small"`, or whatever name your self-hosted server registers) — required in that case. |
 | `llmProvider` | `string \| null` | `null` | Optional LLM provider for query expansion. `null` disables it. |
+| `provider` | `string \| null` | `null` | Embedding backend. `null` (default) uses the local bundled model via `@huggingface/transformers`. `"openai"` calls a remote OpenAI-compatible `/embeddings` endpoint configured via `llm.baseUrl` — this covers self-hosted servers (text-embeddings-inference, Ollama, LM Studio, vLLM, etc.), not just OpenAI itself. |
+
+### Remote embedding provider
+
+Point `codegraph embed` at a self-hosted or third-party embedding endpoint instead of downloading a local model:
+
+```json
+{
+  "embeddings": {
+    "provider": "openai",
+    "model": "my-embedding-model"
+  },
+  "llm": {
+    "baseUrl": "http://my-tailnet-host:8080/v1",
+    "apiKeyCommand": "op read op://vault/embeddings/api-key"
+  }
+}
+```
+
+The endpoint must accept `POST <baseUrl>/embeddings` with `{ "model": "...", "input": ["text", ...] }` and return `{ "data": [{ "embedding": [...], "index": 0 }, ...] }` — the same shape OpenAI's API uses. `llm.apiKey`/`llm.apiKeyCommand` are optional; omit them for endpoints that don't require auth. Vector dimensionality is read from the response, so there's no model registry to keep in sync.
+
+`codegraph search` (semantic and hybrid modes) and the `semantic_search` MCP tool automatically embed the query through the same remote endpoint when the stored embeddings were built with `embeddings.provider: "openai"` — no extra configuration needed.
 
 ---
 
 ## LLM credentials (`llm`)
 
-Used by features that call out to a chat-completion API (e.g. query expansion). Codegraph never hardcodes a provider — you pick one.
+Used by features that call out to a chat-completion API (e.g. query expansion), and reused by the [remote embedding provider](#embeddings-embeddings) (`baseUrl`, `apiKey`, `apiKeyCommand`) so credentials aren't duplicated across features. Codegraph never hardcodes a provider — you pick one.
 
 | Key | Type | Default | Purpose |
 |-----|------|---------|---------|
@@ -193,6 +215,7 @@ Used by features that call out to a chat-completion API (e.g. query expansion).
 | `baseUrl` | `string \| null` | `null` | Override the provider's base URL (for compatible proxies, local servers, etc.). |
 | `apiKey` | `string \| null` | `null` | Plaintext API key. Prefer `apiKeyCommand` or env vars over this. |
 | `apiKeyCommand` | `string \| null` | `null` | Shell-out command that prints the key to stdout. Split on whitespace and run via `execFileSync` (no shell — `$(...)`, pipes, globs, and variable expansion are not supported). 10s timeout, 64 KB max output. |
+| `requestTimeoutMs` | `number` | `120000` | Per-request timeout for remote HTTP calls made against `baseUrl` (currently the [remote embedding provider](#embeddings-embeddings)). Aborts and throws if a self-hosted server hangs mid-request instead of blocking indefinitely. |
 
 Resolution order (first non-empty wins): `apiKeyCommand` output → `CODEGRAPH_LLM_API_KEY` env var → `apiKey` field.
 
@@ -215,6 +238,7 @@ These env vars override the corresponding `llm.*` fields when set:
 - `CODEGRAPH_LLM_PROVIDER` → `llm.provider`
 - `CODEGRAPH_LLM_MODEL` → `llm.model`
 - `CODEGRAPH_LLM_API_KEY` → `llm.apiKey`
+- `CODEGRAPH_LLM_BASE_URL` → `llm.baseUrl`
 
 ---
 

diff --git a/src/cli/commands/embed.ts b/src/cli/commands/embed.ts
@@ -6,6 +6,7 @@ import {
   DEFAULT_MODEL,
   EMBEDDING_STRATEGIES,
   MODELS,
+  resolveRemoteEmbeddingOptions,
 } from '../../domain/search/index.js';
 import { info, warn } from '../../infrastructure/logger.js';
 import type { CommandDefinition } from '../types.js';
@@ -48,15 +49,29 @@ export const command: CommandDefinition = {
     ],
     ['-d, --db <path>', 'Path to graph.db'],
   ],
-  validate([_dir], opts) {
+  validate([_dir], opts, ctx) {
     if (!(EMBEDDING_STRATEGIES as readonly string[]).includes(opts.strategy)) {
       return `Unknown strategy: ${opts.strategy}. Available: ${EMBEDDING_STRATEGIES.join(', ')}`;
     }
+    const provider = ctx.config.embeddings?.provider ?? null;
+    if (provider && provider !== 'openai') {
+      return (
+        `Unsupported embeddings.provider "${provider}". Currently supported: "openai" ` +
+        '(any OpenAI-compatible /embeddings endpoint, including self-hosted servers).'
+      );
+    }
+    if (provider && !opts.model && !ctx.config.embeddings?.model) {
+      return (
+        `embeddings.provider is set to "${provider}" but no model is configured. ` +
+        'Set embeddings.model to the model identifier your endpoint expects, or pass --model.'
+      );
+    }
   },
   async execute([dir], opts, ctx) {
     const root = path.resolve(dir || '.');
     const dbPath = opts.db as string | undefined;
     const embeddingsConfig = ctx.config.embeddings;
+    const provider = embeddingsConfig?.provider ?? null;
     const flagModel = opts.model as string | undefined;
     const configModel = (embeddingsConfig?.model as string | null | undefined) ?? null;
 
@@ -65,6 +80,10 @@ export const command: CommandDefinition = {
       model = flagModel;
     } else if (configModel) {
       model = configModel;
+    } else if (provider) {
+      // Unreachable in practice — validate() rejects a provider with no model
+      // before execute() runs — but keeps this branch type-safe.
+      model = DEFAULT_MODEL;
     } else {
       const sticky = resolveStickyModel(dbPath);
       if (sticky) {
@@ -77,6 +96,8 @@ export const command: CommandDefinition = {
       }
     }
 
-    await buildEmbeddings(root, model, dbPath, { strategy: opts.strategy });
+    const remote =
+      provider === 'openai' ? resolveRemoteEmbeddingOptions(ctx.config, model) : undefined;
+    await buildEmbeddings(root, model, dbPath, { strategy: opts.strategy, remote });
   },
 };
diff --git a/src/cli/commands/models.ts b/src/cli/commands/models.ts
@@ -7,6 +7,14 @@ export const command: CommandDefinition = {
   execute(_args, _opts, ctx) {
     const embeddingsConfig = ctx.config.embeddings;
     const defaultModel = (embeddingsConfig?.model as string) || DEFAULT_MODEL;
+
+    if (embeddingsConfig?.provider) {
+      const remoteModel = embeddingsConfig.model || '(not configured — set embeddings.model)';
+      console.log(
+        `\nembeddings.provider is set to "${embeddingsConfig.provider}" — codegraph embed will call ` +
+          `model "${remoteModel}" at llm.baseUrl instead of a local model below.`,
+      );
+    }
     console.log('\nAvailable embedding models:\n');
 
     interface ModelEntry {

diff --git a/src/domain/search/generator.ts b/src/domain/search/generator.ts
@@ -5,6 +5,11 @@ import { warn } from '../../infrastructure/logger.js';
 import { DbError } from '../../shared/errors.js';
 import type { BetterSqlite3Database, NodeRow } from '../../types.js';
 import { embed, getModelConfig } from './models.js';
+import {
+  DEFAULT_REMOTE_CONTEXT_WINDOW,
+  embedRemote,
+  type RemoteEmbeddingOptions,
+} from './providers/remote.js';
 import { buildSourceText } from './strategies/source.js';
 import { buildStructuredText } from './strategies/structured.js';
 
@@ -167,6 +172,7 @@ function persistEmbeddings(
   dim: number,
   modelName: string,
   strategy: EmbeddingStrategy,
+  provider: string | null,
 ): void {
   const { nodeIds, nodeNames, previews, texts, overflowCount } = prepared;
   const insert = db.prepare(
@@ -189,12 +195,25 @@ function persistEmbeddings(
     if (overflowCount > 0) {
       insertMeta.run('truncated_count', String(overflowCount));
     }
+    // Record which backend produced these vectors so search-time routing
+    // (`embedQuery` in `search/semantic.ts`) can key off embed-time truth
+    // instead of the live config, which may have drifted since `embed` ran.
+    if (provider) {
+      insertMeta.run('provider', provider);
+    }
   });
   insertAll();
 }
 
 export interface BuildEmbeddingsOptions {
   strategy?: EmbeddingStrategy;
+  /**
+   * When set, embeddings are generated via a remote OpenAI-compatible
+   * endpoint instead of the local bundled model. `modelKey` is then treated
+   * as an opaque model identifier passed straight to the endpoint, not a
+   * local registry key.
+   */
+  remote?: RemoteEmbeddingOptions;
 }
 
 /**
@@ -225,12 +244,21 @@ export async function buildEmbeddings(
   const nodeCount = [...byFile.values()].reduce((acc, list) => acc + list.length, 0);
   console.log(`Building embeddings for ${nodeCount} symbols (strategy: ${strategy})...`);
 
-  const config = getModelConfig(modelKey);
-  const prepared = prepareEmbeddingTexts(byFile, db, resolvedRoot, strategy, config.contextWindow);
+  let contextWindow: number;
+  let displayName: string;
+  if (options.remote) {
+    contextWindow = DEFAULT_REMOTE_CONTEXT_WINDOW;
+    displayName = options.remote.model;
+  } else {
+    const modelConfig = getModelConfig(modelKey);
+    contextWindow = modelConfig.contextWindow;
+    displayName = modelConfig.name;
+  }
+  const prepared = prepareEmbeddingTexts(byFile, db, resolvedRoot, strategy, contextWindow);
 
   if (prepared.overflowCount > 0) {
     warn(
-      `${prepared.overflowCount} symbol(s) exceeded model context window (${config.contextWindow} tokens) and were truncated`,
+      `${prepared.overflowCount} symbol(s) exceeded model context window (${contextWindow} tokens) and were truncated`,
     );
   }
 
@@ -247,13 +275,22 @@ export async function buildEmbeddings(
     );
   }
 
-  console.log(`Embedding ${prepared.texts.length} symbols...`);
-  const { vectors, dim } = await embed(prepared.texts, modelKey);
+  console.log(
+    `Embedding ${prepared.texts.length} symbols${options.remote ? ` via remote provider (${displayName})` : ''}...`,
+  );
+  const { vectors, dim } = options.remote
+    ? await embedRemote(prepared.texts, options.remote)
+    : await embed(prepared.texts, modelKey);
 
-  persistEmbeddings(db, prepared, vectors as Float32Array[], dim, config.name, strategy);
+  // Only "openai" (OpenAI-compatible /embeddings) is currently supported as a
+  // remote provider — `options.remote` being set implies it. Recorded so
+  // search-time routing doesn't have to trust the live config (see
+  // `embedQuery` in `search/semantic.ts`).
+  const provider = options.remote ? 'openai' : null;
+  persistEmbeddings(db, prepared, vectors as Float32Array[], dim, displayName, strategy, provider);
 
   console.log(
-    `\nStored ${vectors.length} embeddings (${dim}d, ${config.name}, strategy: ${strategy}) in graph.db`,
+    `\nStored ${vectors.length} embeddings (${dim}d, ${displayName}, strategy: ${strategy}) in graph.db`,
   );
   closeDb(db);
 }
diff --git a/src/domain/search/index.ts b/src/domain/search/index.ts
@@ -8,6 +8,8 @@ export type { BuildEmbeddingsOptions } from './generator.js';
 export { buildEmbeddings, estimateTokens } from './generator.js';
 export type { ModelConfig } from './models.js';
 export { DEFAULT_MODEL, disposeModel, EMBEDDING_STRATEGIES, embed, MODELS } from './models.js';
+export type { RemoteEmbeddingOptions } from './providers/remote.js';
+export { embedRemote, resolveRemoteEmbeddingOptions } from './providers/remote.js';
 export { search } from './search/cli-formatter.js';
 export { hybridSearchData } from './search/hybrid.js';
 export { ftsSearchData } from './search/keyword.js';