INTPYTHON-832 Add search_by_vector method to VectorStore (#266)

barnuri · Copilot · web-flow · commit cce6df213514 · 2025-11-24T15:56:06.000-06:00
This pull request adds a new method to the `VectorStore` class in
`vectorstores.py`, enhancing its functionality to support similarity
search directly by vector input.

**New functionality:**

* Added the `similarity_search_by_vector` method to the `VectorStore`
class, allowing users to perform similarity searches using a query
vector and returning relevant documents.

---------

Co-authored-by: copilot-swe-agent[bot] &lt;198982749+Copilot@users.noreply.github.com&gt;
Co-authored-by: barnuri &lt;13019522+barnuri@users.noreply.github.com&gt;
diff --git a/libs/langchain-mongodb/langchain_mongodb/vectorstores.py b/libs/langchain-mongodb/langchain_mongodb/vectorstores.py
@@ -870,3 +870,36 @@ def create_vector_search_index(
             wait_until_complete=wait_until_complete,
             **kwargs,
         )  # type: ignore [operator]
+
+    def similarity_search_by_vector(
+        self,
+        embedding: list[float],
+        k: int = 4,
+        **kwargs: Any,
+    ) -> list[Document]:
+        """Return MongoDB documents most similar to the given query vector.
+
+        Atlas Vector Search eliminates the need to run a separate
+        search system alongside your database.
+
+         Args:
+            embedding: Embedding vector to search for.
+            k: (Optional) number of documents to return. Defaults to 4.
+            pre_filter: List of MQL match expressions comparing an indexed field
+            post_filter_pipeline: (Optional) Pipeline of MongoDB aggregation stages
+                to filter/process results after $vectorSearch.
+            oversampling_factor: Multiple of k used when generating number of candidates
+                at each step in the HNSW Vector Search.
+            include_embeddings: If True, the embedding vector of each result
+                will be included in metadata.
+            kwargs: Additional arguments are specific to the search_type
+
+        Returns:
+            List of documents most similar to the query vector.
+        """
+        tuple_list = self._similarity_search_with_score(
+            embedding,
+            k=k,
+            **kwargs,
+        )
+        return [doc for doc, _ in tuple_list]
diff --git a/libs/langchain-mongodb/tests/integration_tests/test_vectorstore_from_texts.py b/libs/langchain-mongodb/tests/integration_tests/test_vectorstore_from_texts.py
@@ -117,3 +117,41 @@ def test_search_pre_filter(
         "Sandwich", k=3, pre_filter={"c": {"$gt": 0}}
     )
     assert len(matches_filter) == 1
+
+
+def test_similarity_search_by_vector(
+    vectorstore: PatchedMongoDBAtlasVectorSearch,
+    embeddings: Embeddings,
+    texts: List[str],
+) -> None:
+    # Test similarity_search_by_vector method
+    # First, embed a query text to get a vector
+    query_text = "Sandwich"
+    query_vector = embeddings.embed_query(query_text)
+
+    # Perform search by vector
+    output = vectorstore.similarity_search_by_vector(query_vector, k=2)
+
+    # Should return results
+    assert len(output) == 2
+    # Results should be Document objects
+    assert all(hasattr(doc, "page_content") for doc in output)
+    assert all(hasattr(doc, "metadata") for doc in output)
+
+
+def test_similarity_search_by_vector_with_filter(
+    vectorstore: PatchedMongoDBAtlasVectorSearch,
+    embeddings: Embeddings,
+) -> None:
+    # Test similarity_search_by_vector with pre_filter
+    query_text = "Sandwich"
+    query_vector = embeddings.embed_query(query_text)
+
+    # Search with filter
+    filtered_output = vectorstore.similarity_search_by_vector(
+        query_vector, k=3, pre_filter={"c": {"$gt": 0}}
+    )
+
+    # Should only return documents matching the filter
+    assert len(filtered_output) == 1
+    assert "c" in filtered_output[0].metadata