diff --git a/src/common/config.ts b/src/common/config.ts index e7ece022..6e404b86 100644 --- a/src/common/config.ts +++ b/src/common/config.ts @@ -319,7 +319,7 @@ export function warnAboutDeprecatedOrUnknownCliArgs( if (knownArgs.connectionString) { usedDeprecatedArgument = true; warn( - "The --connectionString argument is deprecated. Prefer using the MDB_MCP_CONNECTION_STRING environment variable or the first positional argument for the connection string." + "Warning: The --connectionString argument is deprecated. Prefer using the MDB_MCP_CONNECTION_STRING environment variable or the first positional argument for the connection string." ); } @@ -333,15 +333,15 @@ export function warnAboutDeprecatedOrUnknownCliArgs( if (!valid) { usedInvalidArgument = true; if (suggestion) { - warn(`Invalid command line argument '${providedKey}'. Did you mean '${suggestion}'?`); + warn(`Warning: Invalid command line argument '${providedKey}'. Did you mean '${suggestion}'?`); } else { - warn(`Invalid command line argument '${providedKey}'.`); + warn(`Warning: Invalid command line argument '${providedKey}'.`); } } } if (usedInvalidArgument || usedDeprecatedArgument) { - warn("Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server."); + warn("- Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server."); } if (usedInvalidArgument) { @@ -372,6 +372,24 @@ export function registerKnownSecretsInRootKeychain(userConfig: Partial void): void { + const vectorSearchEnabled = config.previewFeatures.includes("vectorSearch"); + const embeddingsProviderConfigured = !!config.voyageApiKey; + if (vectorSearchEnabled && !embeddingsProviderConfigured) { + warn(`\ +Warning: Vector search is enabled but no embeddings provider is configured. +- Set an embeddings provider configuration option to enable auto-embeddings during document insertion and text-based queries with $vectorSearch.\ +`); + } + + if (!vectorSearchEnabled && embeddingsProviderConfigured) { + warn(`\ +Warning: An embeddings provider is configured but the 'vectorSearch' preview feature is not enabled. +- Enable vector search by adding 'vectorSearch' to the 'previewFeatures' configuration option, or remove the embeddings provider configuration if not needed.\ +`); + } +} + export function setupUserConfig({ cli, env }: { cli: string[]; env: Record }): UserConfig { const rawConfig = { ...parseEnvConfig(env), @@ -392,6 +410,7 @@ export function setupUserConfig({ cli, env }: { cli: string[]; env: Record console.warn(message)); registerKnownSecretsInRootKeychain(userConfig); return userConfig; } diff --git a/src/common/search/vectorSearchEmbeddingsManager.ts b/src/common/search/vectorSearchEmbeddingsManager.ts index dfcd4e28..7384f1b0 100644 --- a/src/common/search/vectorSearchEmbeddingsManager.ts +++ b/src/common/search/vectorSearchEmbeddingsManager.ts @@ -24,10 +24,8 @@ export type VectorFieldIndexDefinition = { export type VectorFieldValidationError = { path: string; expectedNumDimensions: number; - expectedQuantization: Quantization; actualNumDimensions: number | "unknown"; - actualQuantization: Quantization | "unknown"; - error: "dimension-mismatch" | "quantization-mismatch" | "not-a-vector" | "not-numeric"; + error: "dimension-mismatch" | "not-a-vector" | "not-numeric"; }; export type EmbeddingNamespace = `${string}.${string}`; @@ -116,9 +114,9 @@ export class VectorSearchEmbeddingsManager { if (embeddingValidationResults.length > 0) { const embeddingValidationMessages = embeddingValidationResults.map( (validation) => - `- Field ${validation.path} is an embedding with ${validation.expectedNumDimensions} dimensions and ${validation.expectedQuantization}` + - ` quantization, and the provided value is not compatible. Actual dimensions: ${validation.actualNumDimensions}, ` + - `actual quantization: ${validation.actualQuantization}. Error: ${validation.error}` + `- Field ${validation.path} is an embedding with ${validation.expectedNumDimensions} dimensions,` + + ` and the provided value is not compatible. Actual dimensions: ${validation.actualNumDimensions},` + + ` Error: ${validation.error}` ); throw new MongoDBError( @@ -179,16 +177,36 @@ export class VectorSearchEmbeddingsManager { let fieldRef: unknown = document; const constructError = ( - details: Partial> + details: Partial> ): VectorFieldValidationError => ({ path: definition.path, expectedNumDimensions: definition.numDimensions, - expectedQuantization: definition.quantization, actualNumDimensions: details.actualNumDimensions ?? "unknown", - actualQuantization: details.actualQuantization ?? "unknown", error: details.error ?? "not-a-vector", }); + const extractUnderlyingVector = (fieldRef: unknown): ArrayLike | undefined => { + if (fieldRef instanceof BSON.Binary) { + try { + return fieldRef.toFloat32Array(); + } catch { + // nothing to do here + } + + try { + return fieldRef.toBits(); + } catch { + // nothing to do here + } + } + + if (Array.isArray(fieldRef)) { + return fieldRef as Array; + } + + return undefined; + }; + for (const field of fieldPath) { if (fieldRef && typeof fieldRef === "object" && field in fieldRef) { fieldRef = (fieldRef as Record)[field]; @@ -197,70 +215,25 @@ export class VectorSearchEmbeddingsManager { } } - switch (definition.quantization) { - // Because quantization is not defined by the user - // we have to trust them in the format they use. - case "none": - return undefined; - case "scalar": - case "binary": - if (fieldRef instanceof BSON.Binary) { - try { - const elements = fieldRef.toFloat32Array(); - if (elements.length !== definition.numDimensions) { - return constructError({ - actualNumDimensions: elements.length, - actualQuantization: "binary", - error: "dimension-mismatch", - }); - } - - return undefined; - } catch { - // bits are also supported - try { - const bits = fieldRef.toBits(); - if (bits.length !== definition.numDimensions) { - return constructError({ - actualNumDimensions: bits.length, - actualQuantization: "binary", - error: "dimension-mismatch", - }); - } - - return undefined; - } catch { - return constructError({ - actualQuantization: "binary", - error: "not-a-vector", - }); - } - } - } else { - if (!Array.isArray(fieldRef)) { - return constructError({ - error: "not-a-vector", - }); - } - - if (fieldRef.length !== definition.numDimensions) { - return constructError({ - actualNumDimensions: fieldRef.length, - actualQuantization: "scalar", - error: "dimension-mismatch", - }); - } - - if (!fieldRef.every((e) => this.isANumber(e))) { - return constructError({ - actualNumDimensions: fieldRef.length, - actualQuantization: "scalar", - error: "not-numeric", - }); - } - } + const maybeVector = extractUnderlyingVector(fieldRef); + if (!maybeVector) { + return constructError({ + error: "not-a-vector", + }); + } - break; + if (maybeVector.length !== definition.numDimensions) { + return constructError({ + actualNumDimensions: maybeVector.length, + error: "dimension-mismatch", + }); + } + + if (Array.isArray(maybeVector) && maybeVector.some((e) => !this.isANumber(e))) { + return constructError({ + actualNumDimensions: maybeVector.length, + error: "not-numeric", + }); } return undefined; diff --git a/src/tools/mongodb/create/createIndex.ts b/src/tools/mongodb/create/createIndex.ts index 68ad4d91..fcbc12ee 100644 --- a/src/tools/mongodb/create/createIndex.ts +++ b/src/tools/mongodb/create/createIndex.ts @@ -80,7 +80,7 @@ export class CreateIndexTool extends MongoDBToolBase { ]) ) .describe( - "The index definition. Use 'classic' for standard indexes and 'vectorSearch' for vector search indexes" + `The index definition. Use 'classic' for standard indexes${this.isFeatureEnabled("vectorSearch") ? " and 'vectorSearch' for vector search indexes" : ""}.` ), }; diff --git a/src/tools/mongodb/metadata/explain.ts b/src/tools/mongodb/metadata/explain.ts index d1f7c686..a98d4f6e 100644 --- a/src/tools/mongodb/metadata/explain.ts +++ b/src/tools/mongodb/metadata/explain.ts @@ -4,7 +4,7 @@ import type { ToolArgs, OperationType } from "../../tool.js"; import { formatUntrustedData } from "../../tool.js"; import { z } from "zod"; import type { Document } from "mongodb"; -import { AggregateArgs } from "../read/aggregate.js"; +import { getAggregateArgs } from "../read/aggregate.js"; import { FindArgs } from "../read/find.js"; import { CountArgs } from "../read/count.js"; @@ -20,7 +20,7 @@ export class ExplainTool extends MongoDBToolBase { z.discriminatedUnion("name", [ z.object({ name: z.literal("aggregate"), - arguments: z.object(AggregateArgs), + arguments: z.object(getAggregateArgs(this.isFeatureEnabled("vectorSearch"))), }), z.object({ name: z.literal("find"), diff --git a/src/tools/mongodb/mongodbSchemas.ts b/src/tools/mongodb/mongodbSchemas.ts index cfae16b2..392273ba 100644 --- a/src/tools/mongodb/mongodbSchemas.ts +++ b/src/tools/mongodb/mongodbSchemas.ts @@ -42,7 +42,7 @@ export type EmbeddingParameters = { export const zSupportedEmbeddingParameters = zVoyageEmbeddingParameters.extend({ model: zVoyageModels }); export type SupportedEmbeddingParameters = z.infer; -export const AnyVectorSearchStage = zEJSON(); +export const AnyAggregateStage = zEJSON(); export const VectorSearchStage = z.object({ $vectorSearch: z .object({ diff --git a/src/tools/mongodb/read/aggregate.ts b/src/tools/mongodb/read/aggregate.ts index d6a624cf..4ef641b4 100644 --- a/src/tools/mongodb/read/aggregate.ts +++ b/src/tools/mongodb/read/aggregate.ts @@ -12,18 +12,17 @@ import { collectCursorUntilMaxBytesLimit } from "../../../helpers/collectCursorU import { operationWithFallback } from "../../../helpers/operationWithFallback.js"; import { AGG_COUNT_MAX_TIME_MS_CAP, ONE_MB, CURSOR_LIMITS_TO_LLM_TEXT } from "../../../helpers/constants.js"; import { LogId } from "../../../common/logger.js"; -import { AnyVectorSearchStage, VectorSearchStage } from "../mongodbSchemas.js"; +import { AnyAggregateStage, VectorSearchStage } from "../mongodbSchemas.js"; import { assertVectorSearchFilterFieldsAreIndexed, type VectorSearchIndex, } from "../../../helpers/assertVectorSearchFilterFieldsAreIndexed.js"; -export const AggregateArgs = { - pipeline: z.array(z.union([AnyVectorSearchStage, VectorSearchStage])).describe( - `An array of aggregation stages to execute. +const pipelineDescriptionWithVectorSearch = `\ +An array of aggregation stages to execute. \`$vectorSearch\` **MUST** be the first stage of the pipeline, or the first stage of a \`$unionWith\` subpipeline. ### Usage Rules for \`$vectorSearch\` -- **Unset embeddings:** +- **Unset embeddings:** Unless the user explicitly requests the embeddings, add an \`$unset\` stage **at the end of the pipeline** to remove the embedding field and avoid context limits. **The $unset stage in this situation is mandatory**. - **Pre-filtering:** If the user requests additional filtering, include filters in \`$vectorSearch.filter\` only for pre-filter fields in the vector index. @@ -32,20 +31,28 @@ If the user requests additional filtering, include filters in \`$vectorSearch.fi For all remaining filters, add a $match stage after $vectorSearch. ### Note to LLM - If unsure which fields are filterable, use the collection-indexes tool to determine valid prefilter fields. -- If no requested filters are valid prefilters, omit the filter key from $vectorSearch.` - ), - responseBytesLimit: z.number().optional().default(ONE_MB).describe(`\ +- If no requested filters are valid prefilters, omit the filter key from $vectorSearch.\ +`; + +const genericPipelineDescription = "An array of aggregation stages to execute."; + +export const getAggregateArgs = (vectorSearchEnabled: boolean) => + ({ + pipeline: z + .array(vectorSearchEnabled ? z.union([AnyAggregateStage, VectorSearchStage]) : AnyAggregateStage) + .describe(vectorSearchEnabled ? pipelineDescriptionWithVectorSearch : genericPipelineDescription), + responseBytesLimit: z.number().optional().default(ONE_MB).describe(`\ The maximum number of bytes to return in the response. This value is capped by the server's configured maxBytesPerQuery and cannot be exceeded. \ Note to LLM: If the entire aggregation result is required, use the "export" tool instead of increasing this limit.\ `), -}; + }) as const; export class AggregateTool extends MongoDBToolBase { public name = "aggregate"; protected description = "Run an aggregation against a MongoDB collection"; protected argsShape = { ...DbOperationArgs, - ...AggregateArgs, + ...getAggregateArgs(this.isFeatureEnabled("vectorSearch")), }; public operationType: OperationType = "read"; diff --git a/src/tools/mongodb/read/export.ts b/src/tools/mongodb/read/export.ts index e2ac194b..a12ed9fb 100644 --- a/src/tools/mongodb/read/export.ts +++ b/src/tools/mongodb/read/export.ts @@ -6,7 +6,7 @@ import type { OperationType, ToolArgs } from "../../tool.js"; import { DbOperationArgs, MongoDBToolBase } from "../mongodbTool.js"; import { FindArgs } from "./find.js"; import { jsonExportFormat } from "../../../common/exportsManager.js"; -import { AggregateArgs } from "./aggregate.js"; +import { getAggregateArgs } from "./aggregate.js"; export class ExportTool extends MongoDBToolBase { public name = "export"; @@ -32,7 +32,9 @@ export class ExportTool extends MongoDBToolBase { name: z .literal("aggregate") .describe("The literal name 'aggregate' to represent an aggregation cursor as target."), - arguments: z.object(AggregateArgs).describe("The arguments for 'aggregate' operation."), + arguments: z + .object(getAggregateArgs(this.isFeatureEnabled("vectorSearch"))) + .describe("The arguments for 'aggregate' operation."), }), ]) ) diff --git a/tests/integration/tools/mongodb/create/createIndex.test.ts b/tests/integration/tools/mongodb/create/createIndex.test.ts index f76bb5ba..a4c4a7be 100644 --- a/tests/integration/tools/mongodb/create/createIndex.test.ts +++ b/tests/integration/tools/mongodb/create/createIndex.test.ts @@ -13,6 +13,22 @@ import { ObjectId, type Collection, type Document, type IndexDirection } from "m import { afterEach, beforeEach, describe, expect, it } from "vitest"; describeWithMongoDB("createIndex tool when search is not enabled", (integration) => { + validateToolMetadata(integration, "create-index", "Create an index for a collection", [ + ...databaseCollectionParameters, + { + name: "definition", + type: "array", + description: "The index definition. Use 'classic' for standard indexes.", + required: true, + }, + { + name: "name", + type: "string", + description: "The name of the index", + required: false, + }, + ]); + it("doesn't allow creating vector search indexes", async () => { expect(integration.mcpServer().userConfig.previewFeatures).to.not.include("vectorSearch"); @@ -99,7 +115,7 @@ describeWithMongoDB( name: "definition", type: "array", description: - "The index definition. Use 'classic' for standard indexes and 'vectorSearch' for vector search indexes", + "The index definition. Use 'classic' for standard indexes and 'vectorSearch' for vector search indexes.", required: true, }, { diff --git a/tests/integration/tools/mongodb/create/insertMany.test.ts b/tests/integration/tools/mongodb/create/insertMany.test.ts index e9964e26..fdee8163 100644 --- a/tests/integration/tools/mongodb/create/insertMany.test.ts +++ b/tests/integration/tools/mongodb/create/insertMany.test.ts @@ -124,6 +124,24 @@ describeWithMongoDB( await collection.drop(); }); + validateToolMetadata(integration, "insert-many", "Insert an array of documents into a MongoDB collection", [ + ...databaseCollectionParameters, + { + name: "documents", + type: "array", + description: + "The array of documents to insert, matching the syntax of the document argument of db.collection.insertMany().", + required: true, + }, + { + name: "embeddingParameters", + type: "object", + description: + "The embedding model and its parameters to use to generate embeddings for fields with vector search indexes. Note to LLM: If unsure which embedding model to use, ask the user before providing one.", + required: false, + }, + ]); + it("inserts a document when the embedding is correct", async () => { await createVectorSearchIndexAndWait(integration.mongoClient(), database, "test", [ { @@ -152,12 +170,12 @@ describeWithMongoDB( expect(docCount).toBe(1); }); - it("returns an error when there is a search index and quantisation is wrong", async () => { + it("returns an error when there is a search index and embeddings parameter are wrong", async () => { await createVectorSearchIndexAndWait(integration.mongoClient(), database, "test", [ { type: "vector", path: "embedding", - numDimensions: 8, + numDimensions: 256, similarity: "euclidean", quantization: "scalar", }, @@ -169,6 +187,18 @@ describeWithMongoDB( database: database, collection: "test", documents: [{ embedding: "oopsie" }], + // Note: We are intentionally commenting out the + // embeddingParameters so that we can simulate the idea + // of unknown or mismatched quantization. + + // embeddingParameters: { outputDimension: 256, + // outputDtype: "float", model: "voyage-3-large", input: + // [ + // { + // embedding: "oopsie", + // }, + // ], + // }, }, }); @@ -176,7 +206,7 @@ describeWithMongoDB( expect(content).toContain("Error running insert-many"); const untrustedContent = getDataFromUntrustedContent(content); expect(untrustedContent).toContain( - "- Field embedding is an embedding with 8 dimensions and scalar quantization, and the provided value is not compatible. Actual dimensions: unknown, actual quantization: unknown. Error: not-a-vector" + "- Field embedding is an embedding with 256 dimensions, and the provided value is not compatible. Actual dimensions: unknown, Error: not-a-vector" ); const oopsieCount = await collection.countDocuments({ @@ -590,6 +620,8 @@ describeWithMongoDB( { getUserConfig: () => ({ ...defaultTestConfig, + // This is expected to be set through the CI env. When not set we + // get a warning in the run logs. voyageApiKey: process.env.TEST_MDB_MCP_VOYAGE_API_KEY ?? "", previewFeatures: ["vectorSearch"], }), @@ -621,7 +653,9 @@ describeWithMongoDB( { getUserConfig: () => ({ ...defaultTestConfig, - voyageApiKey: "valid-key", + // This is expected to be set through the CI env. When not set we + // get a warning in the run logs. + voyageApiKey: process.env.TEST_MDB_MCP_VOYAGE_API_KEY ?? "", previewFeatures: ["vectorSearch"], }), } diff --git a/tests/integration/tools/mongodb/mongodbTool.test.ts b/tests/integration/tools/mongodb/mongodbTool.test.ts index ca3bc423..de538292 100644 --- a/tests/integration/tools/mongodb/mongodbTool.test.ts +++ b/tests/integration/tools/mongodb/mongodbTool.test.ts @@ -71,11 +71,9 @@ class UnusableVoyageTool extends MongoDBToolBase { protected argsShape = {}; override verifyAllowed(): boolean { - if (this.config.voyageApiKey.trim()) { - return super.verifyAllowed(); - } return false; } + public async execute(): Promise { await this.ensureConnected(); return { content: [{ type: "text", text: "Something" }] }; diff --git a/tests/integration/tools/mongodb/read/aggregate.test.ts b/tests/integration/tools/mongodb/read/aggregate.test.ts index 11b29e4e..a2317ad7 100644 --- a/tests/integration/tools/mongodb/read/aggregate.test.ts +++ b/tests/integration/tools/mongodb/read/aggregate.test.ts @@ -27,19 +27,7 @@ describeWithMongoDB("aggregate tool", (integration) => { ...databaseCollectionParameters, { name: "pipeline", - description: `An array of aggregation stages to execute. -\`$vectorSearch\` **MUST** be the first stage of the pipeline, or the first stage of a \`$unionWith\` subpipeline. -### Usage Rules for \`$vectorSearch\` -- **Unset embeddings:** - Unless the user explicitly requests the embeddings, add an \`$unset\` stage **at the end of the pipeline** to remove the embedding field and avoid context limits. **The $unset stage in this situation is mandatory**. -- **Pre-filtering:** -If the user requests additional filtering, include filters in \`$vectorSearch.filter\` only for pre-filter fields in the vector index. - NEVER include fields in $vectorSearch.filter that are not part of the vector index. -- **Post-filtering:** - For all remaining filters, add a $match stage after $vectorSearch. -### Note to LLM -- If unsure which fields are filterable, use the collection-indexes tool to determine valid prefilter fields. -- If no requested filters are valid prefilters, omit the filter key from $vectorSearch.`, + description: "An array of aggregation stages to execute.", type: "array", required: true, }, @@ -406,6 +394,34 @@ describeWithMongoDB( await integration.mongoClient().db(integration.randomDbName()).collection("databases").drop(); }); + validateToolMetadata(integration, "aggregate", "Run an aggregation against a MongoDB collection", [ + ...databaseCollectionParameters, + { + name: "pipeline", + description: `An array of aggregation stages to execute. +\`$vectorSearch\` **MUST** be the first stage of the pipeline, or the first stage of a \`$unionWith\` subpipeline. +### Usage Rules for \`$vectorSearch\` +- **Unset embeddings:** + Unless the user explicitly requests the embeddings, add an \`$unset\` stage **at the end of the pipeline** to remove the embedding field and avoid context limits. **The $unset stage in this situation is mandatory**. +- **Pre-filtering:** +If the user requests additional filtering, include filters in \`$vectorSearch.filter\` only for pre-filter fields in the vector index. + NEVER include fields in $vectorSearch.filter that are not part of the vector index. +- **Post-filtering:** + For all remaining filters, add a $match stage after $vectorSearch. +### Note to LLM +- If unsure which fields are filterable, use the collection-indexes tool to determine valid prefilter fields. +- If no requested filters are valid prefilters, omit the filter key from $vectorSearch.`, + type: "array", + required: true, + }, + { + name: "responseBytesLimit", + description: `The maximum number of bytes to return in the response. This value is capped by the server's configured maxBytesPerQuery and cannot be exceeded. Note to LLM: If the entire aggregation result is required, use the "export" tool instead of increasing this limit.`, + type: "number", + required: false, + }, + ]); + it("should throw an exception when using an index that does not exist", async () => { await waitUntilSearchIsReady(integration.mongoClient()); diff --git a/tests/unit/common/config.test.ts b/tests/unit/common/config.test.ts index 5c671ca7..ebd4d0bb 100644 --- a/tests/unit/common/config.test.ts +++ b/tests/unit/common/config.test.ts @@ -5,11 +5,13 @@ import { registerKnownSecretsInRootKeychain, warnAboutDeprecatedOrUnknownCliArgs, UserConfigSchema, + warnIfVectorSearchNotEnabledCorrectly, } from "../../../src/common/config.js"; import { getLogPath, getExportsPath } from "../../../src/common/configUtils.js"; import type { CliOptions } from "@mongosh/arg-parser"; import { Keychain } from "../../../src/common/keychain.js"; import type { Secret } from "../../../src/common/keychain.js"; +import { defaultTestConfig } from "../../integration/helpers.js"; describe("config", () => { it("should generate defaults from UserConfigSchema that match expected values", () => { @@ -686,14 +688,14 @@ describe("config", () => { describe("CLI arguments", () => { const referDocMessage = - "Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server."; + "- Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server."; type TestCase = { readonly cliArg: keyof (CliOptions & UserConfig); readonly warning: string }; const testCases = [ { cliArg: "connectionString", warning: - "The --connectionString argument is deprecated. Prefer using the MDB_MCP_CONNECTION_STRING environment variable or the first positional argument for the connection string.", + "Warning: The --connectionString argument is deprecated. Prefer using the MDB_MCP_CONNECTION_STRING environment variable or the first positional argument for the connection string.", }, ] as TestCase[]; @@ -742,9 +744,9 @@ describe("CLI arguments", () => { { warn, exit } ); - expect(warn).toHaveBeenCalledWith("Invalid command line argument 'wakanda'."); + expect(warn).toHaveBeenCalledWith("Warning: Invalid command line argument 'wakanda'."); expect(warn).toHaveBeenCalledWith( - "Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server." + "- Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server." ); }); @@ -767,9 +769,11 @@ describe("CLI arguments", () => { { warn, exit } ); - expect(warn).toHaveBeenCalledWith("Invalid command line argument 'readonli'. Did you mean 'readOnly'?"); expect(warn).toHaveBeenCalledWith( - "Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server." + "Warning: Invalid command line argument 'readonli'. Did you mean 'readOnly'?" + ); + expect(warn).toHaveBeenCalledWith( + "- Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server." ); }); @@ -781,10 +785,57 @@ describe("CLI arguments", () => { { warn, exit } ); - expect(warn).toHaveBeenCalledWith("Invalid command line argument 'readonly'. Did you mean 'readOnly'?"); expect(warn).toHaveBeenCalledWith( - "Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server." + "Warning: Invalid command line argument 'readonly'. Did you mean 'readOnly'?" + ); + expect(warn).toHaveBeenCalledWith( + "- Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server." + ); + }); + }); + + describe("warnIfVectorSearchNotEnabledCorrectly", () => { + it("should warn if vectorSearch is enabled but embeddings provider is not configured", () => { + const warnStub = vi.fn(); + warnIfVectorSearchNotEnabledCorrectly( + { + ...defaultTestConfig, + previewFeatures: ["vectorSearch"], + }, + warnStub + ); + expect(warnStub).toBeCalledWith(`\ +Warning: Vector search is enabled but no embeddings provider is configured. +- Set an embeddings provider configuration option to enable auto-embeddings during document insertion and text-based queries with $vectorSearch.\ +`); + }); + + it("should warn if vectorSearch is not enabled but embeddings provider is configured", () => { + const warnStub = vi.fn(); + warnIfVectorSearchNotEnabledCorrectly( + { + ...defaultTestConfig, + voyageApiKey: "random-key", + }, + warnStub + ); + expect(warnStub).toBeCalledWith(`\ +Warning: An embeddings provider is configured but the 'vectorSearch' preview feature is not enabled. +- Enable vector search by adding 'vectorSearch' to the 'previewFeatures' configuration option, or remove the embeddings provider configuration if not needed.\ +`); + }); + + it("should not warn if vectorSearch is enabled correctly", () => { + const warnStub = vi.fn(); + warnIfVectorSearchNotEnabledCorrectly( + { + ...defaultTestConfig, + voyageApiKey: "random-key", + previewFeatures: ["vectorSearch"], + }, + warnStub ); + expect(warnStub).not.toBeCalled(); }); }); diff --git a/tests/unit/common/search/vectorSearchEmbeddingsManager.test.ts b/tests/unit/common/search/vectorSearchEmbeddingsManager.test.ts index 2bf05146..491b6fde 100644 --- a/tests/unit/common/search/vectorSearchEmbeddingsManager.test.ts +++ b/tests/unit/common/search/vectorSearchEmbeddingsManager.test.ts @@ -42,6 +42,13 @@ const embeddingConfig: Map = n [ mapKey, [ + { + type: "vector", + path: "embedding_field_wo_quantization", + numDimensions: 8, + quantization: "none", + similarity: "euclidean", + }, { type: "vector", path: "embedding_field", @@ -278,51 +285,56 @@ describe("VectorSearchEmbeddingsManager", () => { expect(result).toHaveLength(0); }); - it("documents inserting the field with wrong type are invalid", async () => { - const result = await embeddings.findFieldsWithWrongEmbeddings( - { database, collection }, - { embedding_field: "some text" } - ); - - expect(result).toHaveLength(1); - }); - - it("documents inserting the field with wrong dimensions are invalid", async () => { - const result = await embeddings.findFieldsWithWrongEmbeddings( - { database, collection }, - { embedding_field: [1, 2, 3] } - ); + it.each(["embedding_field", "embedding_field_wo_quantization"] as const)( + "documents inserting the field with wrong type are invalid - $0", + async (field) => { + const result = await embeddings.findFieldsWithWrongEmbeddings( + { database, collection }, + { [field]: "some text" } + ); - expect(result).toHaveLength(1); - const expectedError: VectorFieldValidationError = { - actualNumDimensions: 3, - actualQuantization: "scalar", - error: "dimension-mismatch", - expectedNumDimensions: 8, - expectedQuantization: "scalar", - path: "embedding_field", - }; - expect(result[0]).toEqual(expectedError); - }); + expect(result).toHaveLength(1); + } + ); - it("documents inserting the field with correct dimensions, but wrong type are invalid", async () => { - const result = await embeddings.findFieldsWithWrongEmbeddings( - { database, collection }, - { embedding_field: ["1", "2", "3", "4", "5", "6", "7", "8"] } - ); + it.each(["embedding_field", "embedding_field_wo_quantization"] as const)( + "documents inserting the field with wrong dimensions are invalid - path = $0", + async (path) => { + const result = await embeddings.findFieldsWithWrongEmbeddings( + { database, collection }, + { [path]: [1, 2, 3] } + ); + + expect(result).toHaveLength(1); + const expectedError: VectorFieldValidationError = { + actualNumDimensions: 3, + error: "dimension-mismatch", + expectedNumDimensions: 8, + path, + }; + expect(result[0]).toEqual(expectedError); + } + ); - expect(result).toHaveLength(1); - const expectedError: VectorFieldValidationError = { - actualNumDimensions: 8, - actualQuantization: "scalar", - error: "not-numeric", - expectedNumDimensions: 8, - expectedQuantization: "scalar", - path: "embedding_field", - }; - - expect(result[0]).toEqual(expectedError); - }); + it.each(["embedding_field", "embedding_field_wo_quantization"] as const)( + "documents inserting the field with correct dimensions, but wrong type are invalid - $0", + async (path) => { + const result = await embeddings.findFieldsWithWrongEmbeddings( + { database, collection }, + { [path]: ["1", "2", "3", "4", "5", "6", "7", "8"] } + ); + + expect(result).toHaveLength(1); + const expectedError: VectorFieldValidationError = { + actualNumDimensions: 8, + error: "not-numeric", + expectedNumDimensions: 8, + path, + }; + + expect(result[0]).toEqual(expectedError); + } + ); it("documents inserting the field with correct dimensions and quantization in binary are valid", async () => { const result = await embeddings.findFieldsWithWrongEmbeddings( @@ -458,14 +470,6 @@ describe("VectorSearchEmbeddingsManager", () => { ).rejects.toThrow(/Actual dimensions: 3/); }); - it("throws error with details about quantization", async () => { - await expect( - embeddings.assertFieldsHaveCorrectEmbeddings({ database, collection }, [ - { embedding_field: [1, 2, 3] }, - ]) - ).rejects.toThrow(/actual quantization: scalar/); - }); - it("throws error with details about error type", async () => { await expect( embeddings.assertFieldsHaveCorrectEmbeddings({ database, collection }, [