From 2ac251f362e1af22273539be665aa347a0b5cbc0 Mon Sep 17 00:00:00 2001 From: peopleig Date: Tue, 14 Apr 2026 01:58:37 +0530 Subject: [PATCH 1/3] Update Python client to support batch insert and search --- client/python/USAGE.md | 60 +++++++ client/python/examples/async_usage.py | 2 +- client/python/examples/batch_insert_usage.py | 39 +++++ client/python/examples/search_query_usage.py | 69 ++++++++ client/python/pyproject.toml | 6 +- client/python/tests/test_client.py | 162 ++++++++++++------- client/python/vortexdb/__init__.py | 3 + client/python/vortexdb/client.py | 112 ++++++++----- client/python/vortexdb/models.py | 18 ++- client/python/vortexdb/protoutils.py | 73 ++++++--- 10 files changed, 422 insertions(+), 122 deletions(-) create mode 100644 client/python/examples/batch_insert_usage.py create mode 100644 client/python/examples/search_query_usage.py diff --git a/client/python/USAGE.md b/client/python/USAGE.md index 5a55402..372e3d2 100644 --- a/client/python/USAGE.md +++ b/client/python/USAGE.md @@ -56,6 +56,11 @@ async with AsyncVortexDB( payload=Payload.text("hello async vortex"), ) ``` +### Batch Insertion and Search Support + +The client now supports batch insertion and batch search queries. +Methods of usage and examples available in: +```examples/batch_insert_usage.py``` & ```examples/search_query_usage.py``` --- @@ -115,6 +120,22 @@ Raises --- +#### **Batch Insert** + +Insert multiple vectors with payloads in a single request +``` +batch_insert(*, items: list[tuple[DenseVector, Payload]]) -> list[str] +``` + +Returns +- List of `point_id` (UUID string) + +Raises +- `TypeError` if input structure is invalid +- gRPC-mapped errors (see Error Handling) + +--- + #### **Get** Fetch a point by its ID @@ -149,6 +170,32 @@ Raises --- +#### **Batch Search** + +Search for nearest neighbours for multiple queries in a single request +``` +batch_search( + *, + queries, + similarity: Similarity | None = None, + limit: int | None = None, +) -> list[list[str]] +``` + +Returns +- `TypeError` for invalid query formats +- `ValueError` if required parameters are missing + +Supported Input Formats: +The `queries` parameter is flexible and supports multiple formats: +- List of `SearchQuery` objects +- List of `(DenseVector, Similarity, Limit)` tuples +- List of `(DenseVector, Similarity)` tuples with a global `Limit` +- List of `(DenseVector, Limit)` tuples with a global `Similarity` +- List of `DenseVector` with global `Similarity` and `Limit` + +--- + #### **Delete** Delete a point by its ID @@ -214,6 +261,19 @@ All fields are directly accessible: --- +### `SearchQuery` + +``` +SearchQuery( + vector: DenseVector, + similarity: Similarity, + limit: int, +) +``` +Structured representation of a search request + +--- + ### `Similarity` Enum representing distance functions: diff --git a/client/python/examples/async_usage.py b/client/python/examples/async_usage.py index cce3335..882a926 100644 --- a/client/python/examples/async_usage.py +++ b/client/python/examples/async_usage.py @@ -6,7 +6,7 @@ async def main(): async with AsyncVortexDB( grpc_url="localhost:50051", - api_key="your-api-key", + api_key="my-secret-password", ) as db: point_id = await db.insert( vector=DenseVector([0.1, 0.2, 0.3]), diff --git a/client/python/examples/batch_insert_usage.py b/client/python/examples/batch_insert_usage.py new file mode 100644 index 0000000..3c3aa6c --- /dev/null +++ b/client/python/examples/batch_insert_usage.py @@ -0,0 +1,39 @@ +from vortexdb import VortexDB +from vortexdb import DenseVector, Payload, to_dense_vectors + + +def main(): + db = VortexDB( + grpc_url="localhost:50051", + api_key="my-secret-password", + ) + + raw_vectors = [ + [0.1, 0.2, 0.3], + [0.4, 0.5, 0.6], + [0.7, 0.8, 0.9], + ] + vectors = to_dense_vectors(raw_vectors) + + p1 = Payload.text("hello world") + p2 = Payload.image("/img/a.png") + p3 = Payload.text("foo bar") + + items = [ + (vectors[0], p1), + (vectors[1], p2), + (vectors[2], p3), + ] + + # Batch Insert + point_ids = db.batch_insert(items=items) + print("Inserted ids:\n", point_ids) + + for pid in point_ids: + db.delete(point_id=pid) + + db.close() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/client/python/examples/search_query_usage.py b/client/python/examples/search_query_usage.py new file mode 100644 index 0000000..097fa56 --- /dev/null +++ b/client/python/examples/search_query_usage.py @@ -0,0 +1,69 @@ +from vortexdb import VortexDB +from vortexdb import DenseVector, Similarity, SearchQuery, to_dense_vectors + + +def main(): + db = VortexDB( + grpc_url="localhost:50051", + api_key="my-secret-password", + ) + + raw_vectors = [ + [0.1, 0.2, 0.3], + [0.4, 0.5, 0.6], + [0.7, 0.8, 0.9], + ] + vectors = to_dense_vectors(raw_vectors) + + q = SearchQuery( + vector=vectors[0], + similarity=Similarity.COSINE, + limit=3, + ) + res = db.search(query=q) + print("Single SearchQuery:\n", res) + + # List of SearchQuery + queries = [ + SearchQuery(vectors[0], Similarity.HAMMING, 3), + SearchQuery(vectors[1], Similarity.EUCLIDEAN, 2), + q, + ] + res = db.batch_search(queries=queries) + print("\nBatch SearchQuery:\n", res) + + # List of vectors with global Similarity and Limit + res = db.batch_search( + queries=vectors, + similarity=Similarity.COSINE, + limit=3, + ) + print("\nList of DenseVectors:\n", res) + + # List of tuple (DenseVector, Similarity) with global Limit + queries = [ + (vectors[0], Similarity.COSINE), + (vectors[1], Similarity.MANHATTAN), + ] + res = db.batch_search( + queries=queries, + limit=3, + ) + print("\nList of (DenseVector, Similarity):\n", res) + + # List of tuple (DenseVector, Limit) with global Similarity + queries = [ + (vectors[0], 2), + (vectors[1], 4), + ] + res = db.batch_search( + queries=queries, + similarity=Similarity.COSINE, + ) + print("\nList of (DenseVector, Limit):\n", res) + + db.close() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/client/python/pyproject.toml b/client/python/pyproject.toml index 8528f91..8e11aad 100644 --- a/client/python/pyproject.toml +++ b/client/python/pyproject.toml @@ -24,13 +24,13 @@ classifiers = [ ] dependencies = [ - "grpcio>=1.60", - "protobuf>=4.25", + "grpcio>=1.81.1", + "protobuf>=6.33.5,<7.0.0", ] [project.optional-dependencies] dev = [ - "grpcio-tools>=1.60", + "grpcio-tools>=1.81.1,<2.0.0", "pytest>=7.0", ] diff --git a/client/python/tests/test_client.py b/client/python/tests/test_client.py index a374959..8a6db58 100644 --- a/client/python/tests/test_client.py +++ b/client/python/tests/test_client.py @@ -5,6 +5,7 @@ from vortexdb.connection import GRPCConnection from vortexdb.models import DenseVector, Payload, Similarity, ContentType, Point from vortexdb.exceptions import InvalidArgumentError +from vortexdb.models import SearchQuery @@ -45,7 +46,6 @@ def test_insert_success(client, mock_connection): assert point_id == "point-123" - def test_insert_rejects_invalid_vector(client): with pytest.raises(TypeError): client.insert( @@ -54,32 +54,39 @@ def test_insert_rejects_invalid_vector(client): ) -def test_insert_batch_success(client, mock_connection): - mock_connection.call.return_value = Mock( - ids=[ - Mock(id=Mock(value="p1")), - Mock(id=Mock(value="p2")), - ] - ) +# Batch Insert - point_ids = client.insert_batch( - points=[ - (DenseVector([1, 2, 3]), Payload.text("hello")), - (DenseVector([4, 5, 6]), Payload.text("world")), - ] - ) - - assert point_ids == ["p1", "p2"] +def test_batch_insert_success(client, mock_connection): + response = Mock() + response.ids = [ + Mock(id=Mock(value="p1")), + Mock(id=Mock(value="p2")), + ] + mock_connection.call.return_value = response + items = [ + (DenseVector([1, 2, 3]), Payload.text("a")), + (DenseVector([4, 5, 6]), Payload.text("b")), + ] + result = client.batch_insert(items=items) + assert result == ["p1", "p2"] + +def test_batch_insert_invalid_items_type(client): + with pytest.raises(TypeError): + client.batch_insert(items="not-a-list") +def test_batch_insert_invalid_tuple_structure(client): + items = [ + (DenseVector([1, 2, 3]),), # only one element + ] + with pytest.raises(TypeError): + client.batch_insert(items=items) -def test_insert_batch_rejects_invalid_vector(client): +def test_batch_insert_invalid_vector(client): + items = [ + ([1, 2, 3], Payload.text("a")), # not DenseVector + ] with pytest.raises(TypeError): - client.insert_batch( - points=[ - (DenseVector([1, 2, 3]), Payload.text("hello")), - ([4, 5, 6], Payload.text("world")), - ] - ) + client.batch_insert(items=items) # Get @@ -160,37 +167,11 @@ def test_search_invalid_vector(client): ) -def test_search_batch_success(client, mock_connection): - mock_connection.call.return_value = Mock( - results=[ - Mock( - result_point_ids=[ - Mock(id=Mock(value="p1")), - Mock(id=Mock(value="p2")), - ] - ), - Mock( - result_point_ids=[ - Mock(id=Mock(value="p3")), - ] - ), - ] - ) - - results = client.search_batch( - queries=[ - (DenseVector([1, 2, 3]), Similarity.COSINE, 2), - (DenseVector([4, 5, 6]), Similarity.COSINE, 1), - ] - ) - - assert results == [["p1", "p2"], ["p3"]] - -def test_search_batch_accepts_ef(client, mock_connection): +def test_batch_search_accepts_ef(client, mock_connection): mock_connection.call.return_value = Mock(results=[]) - client.search_batch( + client.batch_search( queries=[ (DenseVector([1, 2, 3]), Similarity.COSINE, 2), (DenseVector([4, 5, 6]), Similarity.COSINE, 1), @@ -201,16 +182,79 @@ def test_search_batch_accepts_ef(client, mock_connection): request = mock_connection.call.call_args.args[1] assert [query.ef for query in request.queries] == [256, 256] +# Batch Search -def test_search_batch_rejects_invalid_vector(client): - with pytest.raises(TypeError): - client.search_batch( - queries=[ - (DenseVector([1, 2, 3]), Similarity.COSINE, 2), - ([4, 5, 6], Similarity.COSINE, 1), - ] - ) +def test_batch_search_full_tuple(client, mock_connection): + mock_connection.call.return_value = Mock( + results=[ + Mock(result_point_ids=[Mock(id=Mock(value="p1"))]), + Mock(result_point_ids=[Mock(id=Mock(value="p2"))]), + ] + ) + queries = [ + (DenseVector([1, 2, 3]), Similarity.COSINE, 2), + (DenseVector([4, 5, 6]), Similarity.EUCLIDEAN, 1), + ] + result = client.batch_search(queries=queries) + assert result == [["p1"], ["p2"]] + +def test_batch_search_vectors_with_global_params(client, mock_connection): + mock_connection.call.return_value = Mock( + results=[ + Mock(result_point_ids=[Mock(id=Mock(value="p1"))]), + ] + ) + queries = [DenseVector([1, 2, 3])] + result = client.batch_search( + queries=queries, + similarity=Similarity.MANHATTAN, + limit=2, + ) + assert result == [["p1"]] +def test_batch_search_vector_similarity_with_global_limit(client, mock_connection): + mock_connection.call.return_value = Mock( + results=[ + Mock(result_point_ids=[Mock(id=Mock(value="p1"))]), + ] + ) + queries = [ + (DenseVector([1, 2, 3]), Similarity.COSINE), + ] + result = client.batch_search( + queries=queries, + limit=2, + ) + assert result == [["p1"]] + +def test_batch_search_searchquery_objects(client, mock_connection): + mock_connection.call.return_value = Mock( + results=[ + Mock(result_point_ids=[Mock(id=Mock(value="p1"))]), + ] + ) + queries = [ + SearchQuery(DenseVector([1, 2, 3]), Similarity.COSINE, 2), + ] + result = client.batch_search(queries=queries) + assert result == [["p1"]] + +def test_batch_search_missing_globals_for_vector(client): + queries = [DenseVector([1, 2, 3])] + with pytest.raises(ValueError): + client.batch_search(queries=queries) + +def test_batch_search_missing_limit(client): + queries = [ + (DenseVector([1, 2, 3]), Similarity.COSINE), + ] + with pytest.raises(ValueError): + client.batch_search(queries=queries) + +def test_batch_search_invalid_format(client): + queries = ["invalid"] + with pytest.raises(TypeError): + client.batch_search(queries=queries) # Close diff --git a/client/python/vortexdb/__init__.py b/client/python/vortexdb/__init__.py index 9d9d694..3b80ddb 100644 --- a/client/python/vortexdb/__init__.py +++ b/client/python/vortexdb/__init__.py @@ -7,6 +7,8 @@ Payload, Point, Similarity, + SearchQuery, + to_dense_vectors, ) from vortexdb.exceptions import ( VortexDBError, @@ -25,6 +27,7 @@ "Payload", "Point", "Similarity", + "SearchQuery", "VortexDBError", "AuthenticationError", "NotFoundError", diff --git a/client/python/vortexdb/client.py b/client/python/vortexdb/client.py index 2104a61..682694c 100644 --- a/client/python/vortexdb/client.py +++ b/client/python/vortexdb/client.py @@ -7,11 +7,11 @@ Payload, Point, Similarity, + SearchQuery, ) from vortexdb import protoutils as proto - class VortexDB: """ High-level Python client for VortexDB """ @@ -52,27 +52,17 @@ def insert(self, *, vector: DenseVector, payload: Payload) -> str: return response.id.value - def insert_batch( - self, - *, - points: Sequence[tuple[DenseVector, Payload]], - ) -> List[str]: + def batch_insert(self, *, items: list[tuple[DenseVector, Payload]]) -> list[str]: """ - Insert multiple vectors with payloads. - Returns: List of point IDs + Insert multiple vectors. + Returns: list of point_id (str) """ - for vector, _ in points: - self._validate_dense_vector(vector) - - request = proto.build_batch_insert_request( - points=list(points), - ) + request = proto.build_batch_insert_request(items=items) response = self._conn.call( self._conn.stub.InsertVectorsBatch, request, ) - return [pid.id.value for pid in response.ids] def get(self, *, point_id: str) -> Point | None: @@ -91,7 +81,6 @@ def get(self, *, point_id: str) -> Point | None: return Point.from_proto(response) - def delete(self, *, point_id: str) -> None: """ Delete a point by ID. @@ -106,16 +95,28 @@ def delete(self, *, point_id: str) -> None: def search( self, *, - vector: DenseVector, - similarity: Similarity, - limit: int, + vector: DenseVector | None = None, + similarity: Similarity | None = None, + limit: int | None = None, + query: SearchQuery | None = None, ef: int | None = None, ) -> List[str]: """ Search for nearest neighbors. Returns: List of point IDs """ - self._validate_dense_vector(vector) + if query is not None: + if not isinstance(query, SearchQuery): + raise TypeError("query must be a SearchQuery") + vector = query.vector + similarity = query.similarity + limit = query.limit + else: + self._validate_dense_vector(vector) + if not isinstance(similarity, Similarity): + raise TypeError("similarity must be a Similarity enum") + if not isinstance(limit, int): + raise TypeError("limit must be an int") request = proto.build_search_request( vector=vector, @@ -123,37 +124,68 @@ def search( limit=limit, ef=ef, ) - response = self._conn.call( self._conn.stub.SearchPoints, request, ) - return [pid.id.value for pid in response.result_point_ids] - def search_batch( + def batch_search( self, *, - queries: Sequence[tuple[DenseVector, Similarity, int]], + queries, + similarity: Similarity | None = None, + limit: int | None = None, ef: int | None = None, ) -> List[List[str]]: """ - Search nearest neighbors for multiple query vectors. - Returns: List of result point ID lists + Flexible batch search. + + Accepts: + - List[SearchQuery] + - List[(DenseVector, Similarity, int)] + - List[(DenseVector, Similarity)] + global limit + - List[(DenseVector, int)] + global similarity + - List[DenseVector] + global similarity + limit """ - for vector, _, _ in queries: - self._validate_dense_vector(vector) - - request = proto.build_batch_search_request( - queries=list(queries), - ef=ef, - ) - - response = self._conn.call( - self._conn.stub.SearchPointsBatch, - request, - ) - + normalized = [] + + for i, q in enumerate(queries): + if hasattr(q, "vector") and hasattr(q, "similarity") and hasattr(q, "limit"): + normalized.append((q.vector, q.similarity, q.limit)) + continue + + if isinstance(q, DenseVector): + if similarity is None or limit is None: + raise ValueError( + f"queries[{i}] requires global similarity and limit" + ) + normalized.append((q, similarity, limit)) + continue + + if isinstance(q, (list, tuple)): + if len(q) == 3: + normalized.append(q) + continue + if len(q) == 2: + a, b = q + + if isinstance(a, DenseVector) and isinstance(b, Similarity): + if limit is None: + raise ValueError(f"queries[{i}] missing global limit") + normalized.append((a, b, limit)) + continue + + if isinstance(a, DenseVector) and isinstance(b, int): + if similarity is None: + raise ValueError(f"queries[{i}] missing global similarity") + normalized.append((a, similarity, b)) + continue + + raise TypeError(f"Invalid query format at index {i}") + + request = proto.build_batch_search_request(queries=normalized, ef=ef) + response = self._conn.call(self._conn.stub.SearchPointsBatch, request) return [ [pid.id.value for pid in result.result_point_ids] for result in response.results @@ -179,4 +211,4 @@ def __enter__(self) -> "VortexDB": return self def __exit__(self, exc_type, exc, tb) -> None: - self.close() + self.close() \ No newline at end of file diff --git a/client/python/vortexdb/models.py b/client/python/vortexdb/models.py index f2cbe19..0bfab5f 100644 --- a/client/python/vortexdb/models.py +++ b/client/python/vortexdb/models.py @@ -70,7 +70,9 @@ def to_proto(self) -> vector_db_pb2.DenseVector: def to_list(self) -> list[float]: return list(self.values) - +# & Helper Function for Batch of DenseVectors +def to_dense_vectors(arr): + return [DenseVector(x) for x in arr] @dataclass(frozen=True) @@ -129,3 +131,17 @@ def pretty(self) -> str: f" payload_type = {self.payload.content_type.name},\n" f" payload = '{self.payload.content}'" ) + +# I added this because using tuples will get messy if we increase fields in a search query +@dataclass(frozen=True) +class SearchQuery: + vector: DenseVector + similarity: Similarity + limit: int + + def to_proto(self) -> vector_db_pb2.SearchRequest: + return vector_db_pb2.SearchRequest( + query_vector=self.vector.to_proto(), + similarity=self.similarity.to_proto(), + limit=self.limit, + ) \ No newline at end of file diff --git a/client/python/vortexdb/protoutils.py b/client/python/vortexdb/protoutils.py index b0f73e5..d91306a 100644 --- a/client/python/vortexdb/protoutils.py +++ b/client/python/vortexdb/protoutils.py @@ -12,18 +12,35 @@ def build_insert_request( payload=payload.to_proto(), ) - def build_batch_insert_request( *, - points: list[tuple[DenseVector, Payload]], + items: list[tuple[DenseVector, Payload]], ) -> vector_db_pb2.InsertVectorsBatchRequest: - return vector_db_pb2.InsertVectorsBatchRequest( - vectors=[ - build_insert_request(vector=vector, payload=payload) - for vector, payload in points - ] - ) + if not isinstance(items, (list,tuple)): + raise TypeError("Items must be a list of (DenseVector, Payload) tuples") + + if not items: + raise ValueError("Items cannot be empty") + + requests = [] + for i, pair in enumerate(items): + if not isinstance(pair, (list,tuple)) or len(pair)!=2: + raise TypeError(f"items[{i}] must be a tuple of (DenseVector, Payload)") + + vector, payload = pair + if not isinstance(vector, DenseVector): + raise TypeError( + f"items[{i}][0] must be a DenseVector" + "Use: DenseVector([1.0, 2.0, 3.0])" + ) + + if not isinstance(payload, Payload): + raise TypeError(f"items[{i}][1] must be Payload") + + requests.append(build_insert_request(vector=vector, payload=payload)) + + return vector_db_pb2.InsertVectorsBatchRequest(vectors = requests) def build_point_id_request(point_id: str) -> vector_db_pb2.PointID: return vector_db_pb2.PointID( @@ -49,16 +66,36 @@ def build_search_request( def build_batch_search_request( *, queries: list[tuple[DenseVector, Similarity, int]], - ef: int | None = None, + ef: int | None=None, ) -> vector_db_pb2.SearchPointsBatchRequest: - return vector_db_pb2.SearchPointsBatchRequest( - queries=[ - build_search_request( - vector=vector, - similarity=similarity, + if not isinstance(queries, (list,tuple)): + raise TypeError("Queries must be a list of (DenseVector, Similarity, Limit (int)) tuples") + + if not queries: + raise ValueError("Queries cannot be empty") + + requests = [] + + for i, trio in enumerate(queries): + if not isinstance(trio, (list,tuple)) or len(trio)!=3: + raise TypeError(f"queries[{i}] must be a tuple of (DenseVector, Similarity, Limit(int))") + + vector, similarity, limit = trio + if not isinstance(vector, DenseVector): + raise TypeError( + f"queries[{i}][0] must be a DenseVector" + "Use: DenseVector([1.0, 2.0, 3.0])" + ) + if not isinstance(similarity, Similarity): + raise TypeError(f"queries[{i}][1] must be Similarity") + if not isinstance(limit, int): + raise TypeError(f"queries[{i}][2] must be an integer value") + + requests.append(vector_db_pb2.SearchRequest( + query_vector=vector.to_proto(), + similarity=similarity.to_proto(), limit=limit, ef=ef, - ) - for vector, similarity, limit in queries - ] - ) + )) + + return vector_db_pb2.SearchPointsBatchRequest(queries=requests) \ No newline at end of file From a471138ba53fb8a24a6d4e7daaa7e1879c9279b9 Mon Sep 17 00:00:00 2001 From: peopleig Date: Sat, 13 Jun 2026 06:18:08 +0530 Subject: [PATCH 2/3] Add batch support to the async client --- client/python/USAGE.md | 15 +- client/python/examples/async_batch_usage.py | 86 ++++++++ client/python/tests/test_async_client.py | 216 +++++++++++++++++++- client/python/vortexdb/async_client.py | 107 +++++++++- 4 files changed, 406 insertions(+), 18 deletions(-) create mode 100644 client/python/examples/async_batch_usage.py diff --git a/client/python/USAGE.md b/client/python/USAGE.md index 372e3d2..be2e5d5 100644 --- a/client/python/USAGE.md +++ b/client/python/USAGE.md @@ -41,10 +41,10 @@ Example available in: ### Async Client Support -For async applications, use `AsyncVortexDB`. It mirrors the synchronous client API and uses `grpc.aio` under the hood. +For async applications, use `AsyncVortexDB`. It mirrors the synchronous client API and uses `grpc.aio` under the hood, including full support for `batch_insert` and `batch_search`. -Example available in: -```examples/async_usage.py``` +Examples available in: +```examples/async_usage.py``` & ```examples/async_batch_usage.py``` ```python async with AsyncVortexDB( @@ -56,11 +56,12 @@ async with AsyncVortexDB( payload=Payload.text("hello async vortex"), ) ``` + ### Batch Insertion and Search Support -The client now supports batch insertion and batch search queries. +Both `VortexDB` and `AsyncVortexDB` support batch insertion and batch search queries. Methods of usage and examples available in: -```examples/batch_insert_usage.py``` & ```examples/search_query_usage.py``` +```examples/batch_insert_usage.py``` & ```examples/search_query_usage.py``` & ```examples/async_batch_usage.py``` --- @@ -76,8 +77,10 @@ Async client class for I/O-heavy applications. It has the same constructor and m ``` await db.insert(...) +await db.batch_insert(...) await db.get(...) await db.search(...) +await db.batch_search(...) await db.delete(...) await db.close() ``` @@ -371,4 +374,4 @@ python -m grpc_tools.protoc \ After running this: - `vector_db_pb2_grpc.py` and `vector_db_pb2.py` will be updated -- No other client code should need changes +- No other client code should need changes \ No newline at end of file diff --git a/client/python/examples/async_batch_usage.py b/client/python/examples/async_batch_usage.py new file mode 100644 index 0000000..bb93a72 --- /dev/null +++ b/client/python/examples/async_batch_usage.py @@ -0,0 +1,86 @@ +import asyncio + +from vortexdb import AsyncVortexDB +from vortexdb import DenseVector, Payload, Similarity, SearchQuery, to_dense_vectors + + +async def main(): + async with AsyncVortexDB( + grpc_url="localhost:50051", + api_key="my-secret-password", + ) as db: + + raw_vectors = [ + [0.1, 0.2, 0.3], + [0.4, 0.5, 0.6], + [0.7, 0.8, 0.9], + ] + vectors = to_dense_vectors(raw_vectors) + + p1 = Payload.text("hello world") + p2 = Payload.image("/img/a.png") + p3 = Payload.text("foo bar") + + items = [ + (vectors[0], p1), + (vectors[1], p2), + (vectors[2], p3), + ] + + # Batch Insert + point_ids = await db.batch_insert(items=items) + print("Inserted ids:\n", point_ids) + + q = SearchQuery( + vector=vectors[0], + similarity=Similarity.COSINE, + limit=3, + ) + res = await db.search(query=q) + print("\nSingle SearchQuery:\n", res) + + # List of SearchQuery + queries = [ + SearchQuery(vectors[0], Similarity.HAMMING, 3), + SearchQuery(vectors[1], Similarity.EUCLIDEAN, 2), + q, + ] + res = await db.batch_search(queries=queries) + print("\nBatch SearchQuery:\n", res) + + # List of vectors with global Similarity and Limit + res = await db.batch_search( + queries=vectors, + similarity=Similarity.COSINE, + limit=3, + ) + print("\nList of DenseVectors:\n", res) + + # List of tuple (DenseVector, Similarity) with global Limit + queries = [ + (vectors[0], Similarity.COSINE), + (vectors[1], Similarity.MANHATTAN), + ] + res = await db.batch_search( + queries=queries, + limit=3, + ) + print("\nList of (DenseVector, Similarity):\n", res) + + # List of tuple (DenseVector, Limit) with global Similarity + queries = [ + (vectors[0], 2), + (vectors[1], 4), + ] + res = await db.batch_search( + queries=queries, + similarity=Similarity.COSINE, + ) + print("\nList of (DenseVector, Limit):\n", res) + + for pid in point_ids: + await db.delete(point_id=pid) + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/client/python/tests/test_async_client.py b/client/python/tests/test_async_client.py index 1667633..878cb42 100644 --- a/client/python/tests/test_async_client.py +++ b/client/python/tests/test_async_client.py @@ -5,7 +5,7 @@ from vortexdb.async_client import AsyncVortexDB from vortexdb.async_connection import AsyncGRPCConnection -from vortexdb.models import ContentType, DenseVector, Payload, Point, Similarity +from vortexdb.models import ContentType, DenseVector, Payload, Point, Similarity, SearchQuery @pytest.fixture @@ -58,6 +58,59 @@ async def run(): asyncio.run(run()) +# Batch Insert + +def test_async_batch_insert_success(client, mock_connection): + async def run(): + response = Mock() + response.ids = [ + Mock(id=Mock(value="p1")), + Mock(id=Mock(value="p2")), + ] + mock_connection.call.return_value = response + + items = [ + (DenseVector([1, 2, 3]), Payload.text("a")), + (DenseVector([4, 5, 6]), Payload.text("b")), + ] + result = await client.batch_insert(items=items) + assert result == ["p1", "p2"] + + asyncio.run(run()) + + +def test_async_batch_insert_invalid_items_type(client): + async def run(): + with pytest.raises(TypeError): + await client.batch_insert(items="not-a-list") + + asyncio.run(run()) + + +def test_async_batch_insert_invalid_tuple_structure(client): + async def run(): + items = [ + (DenseVector([1, 2, 3]),), # only one element + ] + with pytest.raises(TypeError): + await client.batch_insert(items=items) + + asyncio.run(run()) + + +def test_async_batch_insert_invalid_vector(client): + async def run(): + items = [ + ([1, 2, 3], Payload.text("a")), # not DenseVector + ] + with pytest.raises(TypeError): + await client.batch_insert(items=items) + + asyncio.run(run()) + + +# Get + def test_async_get_point_success(client, mock_connection): async def run(): proto_point = Mock() @@ -88,6 +141,8 @@ async def run(): asyncio.run(run()) +# Delete + def test_async_delete_success(client, mock_connection): async def run(): mock_connection.call.return_value = None @@ -99,6 +154,8 @@ async def run(): asyncio.run(run()) +# Search + def test_async_search_success(client, mock_connection): async def run(): mock_connection.call.return_value = Mock( @@ -119,6 +176,39 @@ async def run(): asyncio.run(run()) +def test_async_search_with_query_object(client, mock_connection): + async def run(): + mock_connection.call.return_value = Mock( + result_point_ids=[ + Mock(id=Mock(value="p1")), + ] + ) + + q = SearchQuery(DenseVector([1, 2, 3]), Similarity.COSINE, 2) + results = await client.search(query=q) + + assert results == ["p1"] + + asyncio.run(run()) + + +def test_async_search_accepts_ef(client, mock_connection): + async def run(): + mock_connection.call.return_value = Mock(result_point_ids=[]) + + await client.search( + vector=DenseVector([1, 2, 3]), + similarity=Similarity.COSINE, + limit=2, + ef=128, + ) + + request = mock_connection.call.call_args.args[1] + assert request.ef == 128 + + asyncio.run(run()) + + def test_async_search_invalid_vector(client): async def run(): with pytest.raises(TypeError): @@ -131,6 +221,128 @@ async def run(): asyncio.run(run()) +# Batch Search + +def test_async_batch_search_full_tuple(client, mock_connection): + async def run(): + mock_connection.call.return_value = Mock( + results=[ + Mock(result_point_ids=[Mock(id=Mock(value="p1"))]), + Mock(result_point_ids=[Mock(id=Mock(value="p2"))]), + ] + ) + queries = [ + (DenseVector([1, 2, 3]), Similarity.COSINE, 2), + (DenseVector([4, 5, 6]), Similarity.EUCLIDEAN, 1), + ] + result = await client.batch_search(queries=queries) + assert result == [["p1"], ["p2"]] + + asyncio.run(run()) + + +def test_async_batch_search_searchquery_objects(client, mock_connection): + async def run(): + mock_connection.call.return_value = Mock( + results=[ + Mock(result_point_ids=[Mock(id=Mock(value="p1"))]), + ] + ) + queries = [ + SearchQuery(DenseVector([1, 2, 3]), Similarity.COSINE, 2), + ] + result = await client.batch_search(queries=queries) + assert result == [["p1"]] + + asyncio.run(run()) + + +def test_async_batch_search_vectors_with_global_params(client, mock_connection): + async def run(): + mock_connection.call.return_value = Mock( + results=[ + Mock(result_point_ids=[Mock(id=Mock(value="p1"))]), + ] + ) + queries = [DenseVector([1, 2, 3])] + result = await client.batch_search( + queries=queries, + similarity=Similarity.MANHATTAN, + limit=2, + ) + assert result == [["p1"]] + + asyncio.run(run()) + + +def test_async_batch_search_vector_similarity_with_global_limit(client, mock_connection): + async def run(): + mock_connection.call.return_value = Mock( + results=[ + Mock(result_point_ids=[Mock(id=Mock(value="p1"))]), + ] + ) + queries = [ + (DenseVector([1, 2, 3]), Similarity.COSINE), + ] + result = await client.batch_search( + queries=queries, + limit=2, + ) + assert result == [["p1"]] + + asyncio.run(run()) + + +def test_async_batch_search_accepts_ef(client, mock_connection): + async def run(): + mock_connection.call.return_value = Mock(results=[]) + + await client.batch_search( + queries=[ + (DenseVector([1, 2, 3]), Similarity.COSINE, 2), + (DenseVector([4, 5, 6]), Similarity.COSINE, 1), + ], + ef=256, + ) + + request = mock_connection.call.call_args.args[1] + assert [query.ef for query in request.queries] == [256, 256] + + asyncio.run(run()) + + +def test_async_batch_search_missing_globals_for_vector(client): + async def run(): + queries = [DenseVector([1, 2, 3])] + with pytest.raises(ValueError): + await client.batch_search(queries=queries) + + asyncio.run(run()) + + +def test_async_batch_search_missing_limit(client): + async def run(): + queries = [ + (DenseVector([1, 2, 3]), Similarity.COSINE), + ] + with pytest.raises(ValueError): + await client.batch_search(queries=queries) + + asyncio.run(run()) + + +def test_async_batch_search_invalid_format(client): + async def run(): + queries = ["invalid"] + with pytest.raises(TypeError): + await client.batch_search(queries=queries) + + asyncio.run(run()) + + +# Close / Context Manager + def test_async_close_closes_connection(client, mock_connection): async def run(): await client.close() @@ -155,4 +367,4 @@ async def run(): conn.close.assert_awaited_once() - asyncio.run(run()) + asyncio.run(run()) \ No newline at end of file diff --git a/client/python/vortexdb/async_client.py b/client/python/vortexdb/async_client.py index dd0d95c..3e6ac69 100644 --- a/client/python/vortexdb/async_client.py +++ b/client/python/vortexdb/async_client.py @@ -3,7 +3,7 @@ from vortexdb import protoutils as proto from vortexdb.async_connection import AsyncGRPCConnection from vortexdb.config import VortexDBConfig -from vortexdb.models import DenseVector, Payload, Point, Similarity +from vortexdb.models import DenseVector, Payload, Point, Similarity, SearchQuery class AsyncVortexDB: @@ -48,6 +48,20 @@ async def insert(self, *, vector: DenseVector, payload: Payload) -> str: return response.id.value + async def batch_insert(self, *, items: list[tuple[DenseVector, Payload]]) -> list[str]: + """ + Insert multiple vectors. + Returns: list of point_id (str) + """ + request = proto.build_batch_insert_request(items=items) + + response = await self._conn.call( + self._conn.stub.InsertVectorsBatch, + request, + ) + + return [pid.id.value for pid in response.ids] + async def get(self, *, point_id: str) -> Point | None: """ Retrieve a point by ID. @@ -78,20 +92,32 @@ async def delete(self, *, point_id: str) -> None: async def search( self, *, - vector: DenseVector, - similarity: Similarity, - limit: int, + vector: DenseVector | None = None, + similarity: Similarity | None = None, + limit: int | None = None, + query: SearchQuery | None = None, ef: int | None = None, ) -> List[str]: """ Search for nearest neighbors. Returns: List of point IDs """ - if not isinstance(vector, DenseVector): - raise TypeError( - "vector must be a DenseVector. " - "Use: DenseVector([1.0, 2.0, 3.0])" - ) + if query is not None: + if not isinstance(query, SearchQuery): + raise TypeError("query must be a SearchQuery") + vector = query.vector + similarity = query.similarity + limit = query.limit + else: + if not isinstance(vector, DenseVector): + raise TypeError( + "vector must be a DenseVector. " + "Use: DenseVector([1.0, 2.0, 3.0])" + ) + if not isinstance(similarity, Similarity): + raise TypeError("similarity must be a Similarity enum") + if not isinstance(limit, int): + raise TypeError("limit must be an int") request = proto.build_search_request( vector=vector, @@ -107,6 +133,67 @@ async def search( return [pid.id.value for pid in response.result_point_ids] + async def batch_search( + self, + *, + queries, + similarity: Similarity | None = None, + limit: int | None = None, + ef: int | None = None, + ) -> List[List[str]]: + """ + Flexible batch search. + + Accepts: + - List[SearchQuery] + - List[(DenseVector, Similarity, int)] + - List[(DenseVector, Similarity)] + global limit + - List[(DenseVector, int)] + global similarity + - List[DenseVector] + global similarity + limit + """ + normalized = [] + + for i, q in enumerate(queries): + if hasattr(q, "vector") and hasattr(q, "similarity") and hasattr(q, "limit"): + normalized.append((q.vector, q.similarity, q.limit)) + continue + + if isinstance(q, DenseVector): + if similarity is None or limit is None: + raise ValueError( + f"queries[{i}] requires global similarity and limit" + ) + normalized.append((q, similarity, limit)) + continue + + if isinstance(q, (list, tuple)): + if len(q) == 3: + normalized.append(q) + continue + if len(q) == 2: + a, b = q + + if isinstance(a, DenseVector) and isinstance(b, Similarity): + if limit is None: + raise ValueError(f"queries[{i}] missing global limit") + normalized.append((a, b, limit)) + continue + + if isinstance(a, DenseVector) and isinstance(b, int): + if similarity is None: + raise ValueError(f"queries[{i}] missing global similarity") + normalized.append((a, similarity, b)) + continue + + raise TypeError(f"Invalid query format at index {i}") + + request = proto.build_batch_search_request(queries=normalized, ef=ef) + response = await self._conn.call(self._conn.stub.SearchPointsBatch, request) + return [ + [pid.id.value for pid in result.result_point_ids] + for result in response.results + ] + async def close(self) -> None: """ Close the async gRPC connection. @@ -117,4 +204,4 @@ async def __aenter__(self) -> "AsyncVortexDB": return self async def __aexit__(self, exc_type, exc, tb) -> None: - await self.close() + await self.close() \ No newline at end of file From c9cab731a3403bde483bc1530afe6b57d75f7d2a Mon Sep 17 00:00:00 2001 From: peopleig Date: Sat, 13 Jun 2026 06:29:33 +0530 Subject: [PATCH 3/3] Add master test script for all usage example files --- client/python/examples/all.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 client/python/examples/all.py diff --git a/client/python/examples/all.py b/client/python/examples/all.py new file mode 100644 index 0000000..44547e3 --- /dev/null +++ b/client/python/examples/all.py @@ -0,0 +1,30 @@ +# This file is like a master test. Runs all the examples +# Not exactly the purpose of the examples dir, +# but helps in checking if any code updates haven't broken the API + +from pathlib import Path +import subprocess +import pytest + +# Didn't know I could do this with pytest, so cool +# Just run: pytest ./master_examples.py -v + +EXAMPLES_DIR = Path(__file__).parent +example_files = sorted(EXAMPLES_DIR.glob("*_usage.py")) + + +@pytest.mark.parametrize( + "script_path", + example_files, + ids=lambda p: p.stem, # or p.name +) +def test(script_path): + """Run all example scripts to check if they crash or not""" + result = subprocess.run( + ["python3", str(script_path)], + capture_output=True, + text=True + ) + assert result.returncode == 0, ( + f"Script {script_path} failed with stderr:\n{result.stderr}" + ) \ No newline at end of file