Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 23 additions & 4 deletions src/common/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ export function warnAboutDeprecatedOrUnknownCliArgs(
if (knownArgs.connectionString) {
usedDeprecatedArgument = true;
warn(
"The --connectionString argument is deprecated. Prefer using the MDB_MCP_CONNECTION_STRING environment variable or the first positional argument for the connection string."
"Warning: The --connectionString argument is deprecated. Prefer using the MDB_MCP_CONNECTION_STRING environment variable or the first positional argument for the connection string."
);
}

Expand All @@ -333,15 +333,15 @@ export function warnAboutDeprecatedOrUnknownCliArgs(
if (!valid) {
usedInvalidArgument = true;
if (suggestion) {
warn(`Invalid command line argument '${providedKey}'. Did you mean '${suggestion}'?`);
warn(`Warning: Invalid command line argument '${providedKey}'. Did you mean '${suggestion}'?`);
} else {
warn(`Invalid command line argument '${providedKey}'.`);
warn(`Warning: Invalid command line argument '${providedKey}'.`);
}
}
}

if (usedInvalidArgument || usedDeprecatedArgument) {
warn("Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server.");
warn("- Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server.");
}

if (usedInvalidArgument) {
Expand Down Expand Up @@ -372,6 +372,24 @@ export function registerKnownSecretsInRootKeychain(userConfig: Partial<UserConfi
maybeRegister(userConfig.username, "user");
}

function warnIfVectorSearchNotEnabledCorrectly(config: UserConfig): void {
const vectorSearchEnabled = config.previewFeatures.includes("vectorSearch");
const embeddingsProviderConfigured = !!config.voyageApiKey;
if (vectorSearchEnabled && !embeddingsProviderConfigured) {
console.warn(`\
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You use warn in other places but console.warn here, is it intentional?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ahh - that's because the place where we use warn is where the console.warn is being provided through the function arguments. Likely for some tests. I had the tests covered differently so it should be fine.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually no, the new warning is not covered. I will write a test quickly.

Warning: Vector search is enabled but no embeddings provider is configured.
- Set an embeddings provider configuration option to enable auto-embeddings during document insertion and text-based queries with $vectorSearch.\
`);
}

if (!vectorSearchEnabled && embeddingsProviderConfigured) {
console.warn(`\
Warning: An embeddings provider is configured but the 'vectorSearch' preview feature is not enabled.
- Enable vector search by adding 'vectorSearch' to the 'previewFeatures' configuration option, or remove the embeddings provider configuration if not needed.\
`);
}
}

export function setupUserConfig({ cli, env }: { cli: string[]; env: Record<string, unknown> }): UserConfig {
const rawConfig = {
...parseEnvConfig(env),
Expand All @@ -392,6 +410,7 @@ export function setupUserConfig({ cli, env }: { cli: string[]; env: Record<strin
// We don't have as schema defined for all args-parser arguments so we need to merge the raw config with the parsed config.
const userConfig = { ...rawConfig, ...parseResult.data } as UserConfig;

warnIfVectorSearchNotEnabledCorrectly(userConfig);
registerKnownSecretsInRootKeychain(userConfig);
return userConfig;
}
Expand Down
30 changes: 27 additions & 3 deletions src/common/search/vectorSearchEmbeddingsManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -198,9 +198,33 @@ export class VectorSearchEmbeddingsManager {
}

switch (definition.quantization) {
// Because quantization is not defined by the user
// we have to trust them in the format they use.
// Quantization "none" means no quantization is performed, so
// full-fidelity vectors are stored therefore the underlying vector
// must be stored as an array of numbers having the same dimension
// as that of the index.
case "none":
if (!Array.isArray(fieldRef)) {
return constructError({
error: "not-a-vector",
});
}

if (fieldRef.length !== definition.numDimensions) {
return constructError({
actualNumDimensions: fieldRef.length,
actualQuantization: "none",
error: "dimension-mismatch",
});
}

if (fieldRef.some((e) => !this.isANumber(e))) {
return constructError({
actualNumDimensions: fieldRef.length,
actualQuantization: "none",
error: "not-numeric",
});
}

return undefined;
case "scalar":
case "binary":
Expand Down Expand Up @@ -251,7 +275,7 @@ export class VectorSearchEmbeddingsManager {
});
}

if (!fieldRef.every((e) => this.isANumber(e))) {
if (fieldRef.some((e) => !this.isANumber(e))) {
return constructError({
actualNumDimensions: fieldRef.length,
actualQuantization: "scalar",
Expand Down
2 changes: 1 addition & 1 deletion src/tools/mongodb/create/createIndex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ export class CreateIndexTool extends MongoDBToolBase {
])
)
.describe(
"The index definition. Use 'classic' for standard indexes and 'vectorSearch' for vector search indexes"
`The index definition. Use 'classic' for standard indexes${this.isFeatureEnabled("vectorSearch") ? " and 'vectorSearch' for vector search indexes" : ""}.`
),
};

Expand Down
4 changes: 2 additions & 2 deletions src/tools/mongodb/metadata/explain.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import type { ToolArgs, OperationType } from "../../tool.js";
import { formatUntrustedData } from "../../tool.js";
import { z } from "zod";
import type { Document } from "mongodb";
import { AggregateArgs } from "../read/aggregate.js";
import { getAggregateArgs } from "../read/aggregate.js";
import { FindArgs } from "../read/find.js";
import { CountArgs } from "../read/count.js";

Expand All @@ -20,7 +20,7 @@ export class ExplainTool extends MongoDBToolBase {
z.discriminatedUnion("name", [
z.object({
name: z.literal("aggregate"),
arguments: z.object(AggregateArgs),
arguments: z.object(getAggregateArgs(this.isFeatureEnabled("vectorSearch"))),
}),
z.object({
name: z.literal("find"),
Expand Down
2 changes: 1 addition & 1 deletion src/tools/mongodb/mongodbSchemas.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ export type EmbeddingParameters = {
export const zSupportedEmbeddingParameters = zVoyageEmbeddingParameters.extend({ model: zVoyageModels });
export type SupportedEmbeddingParameters = z.infer<typeof zSupportedEmbeddingParameters>;

export const AnyVectorSearchStage = zEJSON();
export const AnyAggregateStage = zEJSON();
export const VectorSearchStage = z.object({
$vectorSearch: z
.object({
Expand Down
25 changes: 15 additions & 10 deletions src/tools/mongodb/read/aggregate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,17 @@ import { collectCursorUntilMaxBytesLimit } from "../../../helpers/collectCursorU
import { operationWithFallback } from "../../../helpers/operationWithFallback.js";
import { AGG_COUNT_MAX_TIME_MS_CAP, ONE_MB, CURSOR_LIMITS_TO_LLM_TEXT } from "../../../helpers/constants.js";
import { LogId } from "../../../common/logger.js";
import { AnyVectorSearchStage, VectorSearchStage } from "../mongodbSchemas.js";
import { AnyAggregateStage, VectorSearchStage } from "../mongodbSchemas.js";
import {
assertVectorSearchFilterFieldsAreIndexed,
type VectorSearchIndex,
} from "../../../helpers/assertVectorSearchFilterFieldsAreIndexed.js";

export const AggregateArgs = {
pipeline: z.array(z.union([AnyVectorSearchStage, VectorSearchStage])).describe(
`An array of aggregation stages to execute.
const pipelineDescription = `\
An array of aggregation stages to execute.
\`$vectorSearch\` **MUST** be the first stage of the pipeline, or the first stage of a \`$unionWith\` subpipeline.
### Usage Rules for \`$vectorSearch\`
- **Unset embeddings:**
- **Unset embeddings:**
Unless the user explicitly requests the embeddings, add an \`$unset\` stage **at the end of the pipeline** to remove the embedding field and avoid context limits. **The $unset stage in this situation is mandatory**.
- **Pre-filtering:**
If the user requests additional filtering, include filters in \`$vectorSearch.filter\` only for pre-filter fields in the vector index.
Expand All @@ -32,20 +31,26 @@ If the user requests additional filtering, include filters in \`$vectorSearch.fi
For all remaining filters, add a $match stage after $vectorSearch.
### Note to LLM
- If unsure which fields are filterable, use the collection-indexes tool to determine valid prefilter fields.
- If no requested filters are valid prefilters, omit the filter key from $vectorSearch.`
),
responseBytesLimit: z.number().optional().default(ONE_MB).describe(`\
- If no requested filters are valid prefilters, omit the filter key from $vectorSearch.\
`;

export const getAggregateArgs = (vectorSearchEnabled: boolean) =>
({
pipeline: z
.array(vectorSearchEnabled ? z.union([AnyAggregateStage, VectorSearchStage]) : AnyAggregateStage)
.describe(pipelineDescription),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The pipeline description here is static and includes references to vector search - should we modify it so that we don't include them if the feature is disabled?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's certainly an option but I think that the description applies in a generic sense to the pipeline and I don't think it would be confusing for the LLMs. Or do you feel otherwise?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, most of the description is usage rules for the $vectorSearch and when vector search is disabled, we take in any ejson for the stages - I don't know if it'll be confusing to the LLM but generally, the way the description is worded, it does imply one could run a $vectorSearch query (which admittedly, they can, as long as they provide the embeddings).

responseBytesLimit: z.number().optional().default(ONE_MB).describe(`\
The maximum number of bytes to return in the response. This value is capped by the server's configured maxBytesPerQuery and cannot be exceeded. \
Note to LLM: If the entire aggregation result is required, use the "export" tool instead of increasing this limit.\
`),
};
}) as const;

export class AggregateTool extends MongoDBToolBase {
public name = "aggregate";
protected description = "Run an aggregation against a MongoDB collection";
protected argsShape = {
...DbOperationArgs,
...AggregateArgs,
...getAggregateArgs(this.isFeatureEnabled("vectorSearch")),
};
public operationType: OperationType = "read";

Expand Down
6 changes: 4 additions & 2 deletions src/tools/mongodb/read/export.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import type { OperationType, ToolArgs } from "../../tool.js";
import { DbOperationArgs, MongoDBToolBase } from "../mongodbTool.js";
import { FindArgs } from "./find.js";
import { jsonExportFormat } from "../../../common/exportsManager.js";
import { AggregateArgs } from "./aggregate.js";
import { getAggregateArgs } from "./aggregate.js";

export class ExportTool extends MongoDBToolBase {
public name = "export";
Expand All @@ -32,7 +32,9 @@ export class ExportTool extends MongoDBToolBase {
name: z
.literal("aggregate")
.describe("The literal name 'aggregate' to represent an aggregation cursor as target."),
arguments: z.object(AggregateArgs).describe("The arguments for 'aggregate' operation."),
arguments: z
.object(getAggregateArgs(this.isFeatureEnabled("vectorSearch")))
.describe("The arguments for 'aggregate' operation."),
}),
])
)
Expand Down
18 changes: 17 additions & 1 deletion tests/integration/tools/mongodb/create/createIndex.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,22 @@ import { ObjectId, type Collection, type Document, type IndexDirection } from "m
import { afterEach, beforeEach, describe, expect, it } from "vitest";

describeWithMongoDB("createIndex tool when search is not enabled", (integration) => {
validateToolMetadata(integration, "create-index", "Create an index for a collection", [
...databaseCollectionParameters,
{
name: "definition",
type: "array",
description: "The index definition. Use 'classic' for standard indexes.",
required: true,
},
{
name: "name",
type: "string",
description: "The name of the index",
required: false,
},
]);

it("doesn't allow creating vector search indexes", async () => {
expect(integration.mcpServer().userConfig.previewFeatures).to.not.include("vectorSearch");

Expand Down Expand Up @@ -99,7 +115,7 @@ describeWithMongoDB(
name: "definition",
type: "array",
description:
"The index definition. Use 'classic' for standard indexes and 'vectorSearch' for vector search indexes",
"The index definition. Use 'classic' for standard indexes and 'vectorSearch' for vector search indexes.",
required: true,
},
{
Expand Down
18 changes: 18 additions & 0 deletions tests/integration/tools/mongodb/create/insertMany.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,24 @@ describeWithMongoDB(
await collection.drop();
});

validateToolMetadata(integration, "insert-many", "Insert an array of documents into a MongoDB collection", [
...databaseCollectionParameters,
{
name: "documents",
type: "array",
description:
"The array of documents to insert, matching the syntax of the document argument of db.collection.insertMany().",
required: true,
},
{
name: "embeddingParameters",
type: "object",
description:
"The embedding model and its parameters to use to generate embeddings for fields with vector search indexes. Note to LLM: If unsure which embedding model to use, ask the user before providing one.",
required: false,
},
]);

it("inserts a document when the embedding is correct", async () => {
await createVectorSearchIndexAndWait(integration.mongoClient(), database, "test", [
{
Expand Down
4 changes: 2 additions & 2 deletions tests/integration/tools/mongodb/read/aggregate.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ describeWithMongoDB("aggregate tool", (integration) => {
...databaseCollectionParameters,
{
name: "pipeline",
description: `An array of aggregation stages to execute.
description: `An array of aggregation stages to execute.
\`$vectorSearch\` **MUST** be the first stage of the pipeline, or the first stage of a \`$unionWith\` subpipeline.
### Usage Rules for \`$vectorSearch\`
- **Unset embeddings:**
- **Unset embeddings:**
Unless the user explicitly requests the embeddings, add an \`$unset\` stage **at the end of the pipeline** to remove the embedding field and avoid context limits. **The $unset stage in this situation is mandatory**.
- **Pre-filtering:**
If the user requests additional filtering, include filters in \`$vectorSearch.filter\` only for pre-filter fields in the vector index.
Expand Down
20 changes: 12 additions & 8 deletions tests/unit/common/config.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -686,14 +686,14 @@ describe("config", () => {

describe("CLI arguments", () => {
const referDocMessage =
"Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server.";
"- Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server.";

type TestCase = { readonly cliArg: keyof (CliOptions & UserConfig); readonly warning: string };
const testCases = [
{
cliArg: "connectionString",
warning:
"The --connectionString argument is deprecated. Prefer using the MDB_MCP_CONNECTION_STRING environment variable or the first positional argument for the connection string.",
"Warning: The --connectionString argument is deprecated. Prefer using the MDB_MCP_CONNECTION_STRING environment variable or the first positional argument for the connection string.",
},
] as TestCase[];

Expand Down Expand Up @@ -742,9 +742,9 @@ describe("CLI arguments", () => {
{ warn, exit }
);

expect(warn).toHaveBeenCalledWith("Invalid command line argument 'wakanda'.");
expect(warn).toHaveBeenCalledWith("Warning: Invalid command line argument 'wakanda'.");
expect(warn).toHaveBeenCalledWith(
"Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server."
"- Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server."
);
});

Expand All @@ -767,9 +767,11 @@ describe("CLI arguments", () => {
{ warn, exit }
);

expect(warn).toHaveBeenCalledWith("Invalid command line argument 'readonli'. Did you mean 'readOnly'?");
expect(warn).toHaveBeenCalledWith(
"Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server."
"Warning: Invalid command line argument 'readonli'. Did you mean 'readOnly'?"
);
expect(warn).toHaveBeenCalledWith(
"- Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server."
);
});

Expand All @@ -781,9 +783,11 @@ describe("CLI arguments", () => {
{ warn, exit }
);

expect(warn).toHaveBeenCalledWith("Invalid command line argument 'readonly'. Did you mean 'readOnly'?");
expect(warn).toHaveBeenCalledWith(
"Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server."
"Warning: Invalid command line argument 'readonly'. Did you mean 'readOnly'?"
);
expect(warn).toHaveBeenCalledWith(
"- Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server."
);
});
});
Expand Down
Loading
Loading