Skip to content

Commit cce6df2

Browse files
barnuriCopilot
andauthored
INTPYTHON-832 Add search_by_vector method to VectorStore (#266)
This pull request adds a new method to the `VectorStore` class in `vectorstores.py`, enhancing its functionality to support similarity search directly by vector input. **New functionality:** * Added the `similarity_search_by_vector` method to the `VectorStore` class, allowing users to perform similarity searches using a query vector and returning relevant documents. --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: barnuri <13019522+barnuri@users.noreply.github.com>
1 parent ffcf7ed commit cce6df2

File tree

2 files changed

+71
-0
lines changed

2 files changed

+71
-0
lines changed

libs/langchain-mongodb/langchain_mongodb/vectorstores.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -870,3 +870,36 @@ def create_vector_search_index(
870870
wait_until_complete=wait_until_complete,
871871
**kwargs,
872872
) # type: ignore [operator]
873+
874+
def similarity_search_by_vector(
875+
self,
876+
embedding: list[float],
877+
k: int = 4,
878+
**kwargs: Any,
879+
) -> list[Document]:
880+
"""Return MongoDB documents most similar to the given query vector.
881+
882+
Atlas Vector Search eliminates the need to run a separate
883+
search system alongside your database.
884+
885+
Args:
886+
embedding: Embedding vector to search for.
887+
k: (Optional) number of documents to return. Defaults to 4.
888+
pre_filter: List of MQL match expressions comparing an indexed field
889+
post_filter_pipeline: (Optional) Pipeline of MongoDB aggregation stages
890+
to filter/process results after $vectorSearch.
891+
oversampling_factor: Multiple of k used when generating number of candidates
892+
at each step in the HNSW Vector Search.
893+
include_embeddings: If True, the embedding vector of each result
894+
will be included in metadata.
895+
kwargs: Additional arguments are specific to the search_type
896+
897+
Returns:
898+
List of documents most similar to the query vector.
899+
"""
900+
tuple_list = self._similarity_search_with_score(
901+
embedding,
902+
k=k,
903+
**kwargs,
904+
)
905+
return [doc for doc, _ in tuple_list]

libs/langchain-mongodb/tests/integration_tests/test_vectorstore_from_texts.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,3 +117,41 @@ def test_search_pre_filter(
117117
"Sandwich", k=3, pre_filter={"c": {"$gt": 0}}
118118
)
119119
assert len(matches_filter) == 1
120+
121+
122+
def test_similarity_search_by_vector(
123+
vectorstore: PatchedMongoDBAtlasVectorSearch,
124+
embeddings: Embeddings,
125+
texts: List[str],
126+
) -> None:
127+
# Test similarity_search_by_vector method
128+
# First, embed a query text to get a vector
129+
query_text = "Sandwich"
130+
query_vector = embeddings.embed_query(query_text)
131+
132+
# Perform search by vector
133+
output = vectorstore.similarity_search_by_vector(query_vector, k=2)
134+
135+
# Should return results
136+
assert len(output) == 2
137+
# Results should be Document objects
138+
assert all(hasattr(doc, "page_content") for doc in output)
139+
assert all(hasattr(doc, "metadata") for doc in output)
140+
141+
142+
def test_similarity_search_by_vector_with_filter(
143+
vectorstore: PatchedMongoDBAtlasVectorSearch,
144+
embeddings: Embeddings,
145+
) -> None:
146+
# Test similarity_search_by_vector with pre_filter
147+
query_text = "Sandwich"
148+
query_vector = embeddings.embed_query(query_text)
149+
150+
# Search with filter
151+
filtered_output = vectorstore.similarity_search_by_vector(
152+
query_vector, k=3, pre_filter={"c": {"$gt": 0}}
153+
)
154+
155+
# Should only return documents matching the filter
156+
assert len(filtered_output) == 1
157+
assert "c" in filtered_output[0].metadata

0 commit comments

Comments
 (0)