Skip to content

Commit 9c1e269

Browse files
committed
RDBC-889 7.0 Python client with Vector API + half of the tests
1 parent f9b2c95 commit 9c1e269

File tree

17 files changed

+634
-19
lines changed

17 files changed

+634
-19
lines changed

.github/workflows/RavenClient.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@ name: tests/python
22

33
on:
44
push:
5-
branches: [v5.2]
5+
branches: [v7.0]
66
pull_request:
7-
branches: [v5.2]
7+
branches: [v7.0]
88
schedule:
99
- cron: '0 10 * * *'
1010
workflow_dispatch:
@@ -30,7 +30,7 @@ jobs:
3030
strategy:
3131
matrix:
3232
python-version: [ '3.8', '3.9', '3.10' ,'3.11', '3.12']
33-
serverVersion: [ '5.4', '6.2', '7.0' ]
33+
serverVersion: [ '7.0' ]
3434
fail-fast: false
3535

3636
steps:

README.md

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ Install from [PyPi](https://pypi.python.org/pypi), as [ravendb](https://pypi.org
77
pip install ravendb
88
````
99
## Introduction and changelog
10-
Python client API (v5.2) for [RavenDB](https://ravendb.net/) , a NoSQL document database.
10+
Python client API (v7.0) for [RavenDB](https://ravendb.net/) , a NoSQL document database.
1111

1212
Although new API isn't compatible with the previous one, it comes with **many improvements and new features**.
1313
@@ -17,8 +17,6 @@ Although new API isn't compatible with the previous one, it comes with **many im
1717
1818
## Releases
1919
20-
* All client versions 5.2.x are fully compatible with and support RavenDB server releases 5.4 and 6.0.
21-
2220
* [Click here](https://github.com/ravendb/ravendb-python-client/releases) to view all Releases and Changelog.
2321
2422
---

README_pypi.md

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ pip install ravendb
88
```
99

1010
## Introduction
11-
Python client API (v5.2) for [RavenDB](https://ravendb.net/) , a NoSQL document database.
11+
Python client API (v7.0) for [RavenDB](https://ravendb.net/) , a NoSQL document database.
1212

1313
Although new API isn't compatible with the previous one, it comes with **many improvements and new features**.
1414

@@ -19,8 +19,6 @@ Although new API isn't compatible with the previous one, it comes with **many im
1919

2020
## Releases
2121

22-
* All client versions 5.2.x are fully compatible with and support RavenDB server releases 5.4 and 6.0.
23-
2422
* [Click here](https://github.com/ravendb/ravendb-python-client/releases) to view all Releases and Changelog.
2523

2624
## What's new?

ravendb/documents/indexes/abstract_index_creation_tasks.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from abc import abstractmethod, ABC
2-
from typing import Generic, TypeVar, Union, Dict, Set, Callable, Optional, List, TYPE_CHECKING
2+
from typing import Generic, TypeVar, Union, Dict, Set, Callable, Optional, List, TYPE_CHECKING, Any
33

44
from ravendb.documents.conventions import DocumentConventions
55
from ravendb.documents.indexes.definitions import (
@@ -18,6 +18,7 @@
1818
IndexType,
1919
)
2020
from ravendb.documents.indexes.spatial.configuration import SpatialOptionsFactory
21+
from ravendb.documents.indexes.vector.options import VectorOptions
2122
from ravendb.documents.operations.indexes import PutIndexesOperation
2223
from ravendb.documents.store.definition import DocumentStoreBase
2324
from ravendb.primitives import constants
@@ -88,6 +89,7 @@ def __init__(self):
8889
self._index_suggestions: Set[str] = set()
8990
self._term_vectors_strings: Dict[str, FieldTermVector] = {}
9091
self._spatial_options_strings: Dict[str, SpatialOptions] = {}
92+
self._vector_indexes_strings: Dict[str, VectorOptions] = {}
9193

9294
self._output_reduce_to_collection: Union[None, str] = None
9395
self._pattern_for_output_reduce_to_collection_references: Union[None, str] = None
@@ -150,6 +152,9 @@ def _add_assembly(self, assembly: AdditionalAssembly) -> None:
150152

151153
self.additional_assemblies.add(assembly)
152154

155+
def _vector(self, field: str, vector_options: VectorOptions) -> None:
156+
self._vector_indexes_strings[field] = vector_options
157+
153158

154159
class AbstractIndexDefinitionBuilder(Generic[_T_IndexDefinition]):
155160
def __init__(self, index_name: str):
@@ -165,6 +170,7 @@ def __init__(self, index_name: str):
165170
self.suggestions_options: Set[str] = set()
166171
self.term_vectors_strings: Dict[str, FieldTermVector] = {}
167172
self.spatial_indexes_strings: Dict[str, SpatialOptions] = {}
173+
self.vector_indexes_strings: Dict[str, VectorOptions] = {}
168174

169175
self.lock_mode: Optional[IndexLockMode] = None
170176
self.priority: Optional[IndexLockMode] = None
@@ -191,7 +197,7 @@ def __apply_values(
191197
self,
192198
index_definition: IndexDefinition,
193199
values: Dict[str, object],
194-
action: Callable[[IndexFieldOptions, object], None],
200+
action: Callable[[IndexFieldOptions, Any], None],
195201
) -> None:
196202
for key, value in values.items():
197203
field = index_definition.fields.get(key, IndexFieldOptions())
@@ -216,29 +222,33 @@ def to_index_definition(self, conventions: DocumentConventions, validate_map: bo
216222
for suggestions_option in self.suggestions_options:
217223
suggestions[suggestions_option] = True
218224

219-
def __set_indexing(options, value):
225+
def __set_indexing(options: IndexFieldOptions, value: FieldIndexing):
220226
options.indexing = value
221227

222-
def __set_storage(options, value):
228+
def __set_storage(options: IndexFieldOptions, value: FieldStorage):
223229
options.storage = value
224230

225-
def __set_analyzer(options, value):
231+
def __set_analyzer(options: IndexFieldOptions, value: str):
226232
options.analyzer = value
227233

228-
def __set_term_vector(options, value):
234+
def __set_term_vector(options: IndexFieldOptions, value: FieldTermVector):
229235
options.term_vector = value
230236

231-
def __set_spatial(options, value):
237+
def __set_spatial(options: IndexFieldOptions, value: SpatialOptions):
232238
options.spatial = value
233239

234-
def __set_suggestions(options, value):
240+
def __set_vector(options: IndexFieldOptions, value: VectorOptions):
241+
options.vector = value
242+
243+
def __set_suggestions(options: IndexFieldOptions, value: bool):
235244
options.suggestions = value
236245

237246
self.__apply_values(index_definition, self.indexes_strings, __set_indexing)
238247
self.__apply_values(index_definition, self.stores_strings, __set_storage)
239248
self.__apply_values(index_definition, self.analyzers_strings, __set_analyzer)
240249
self.__apply_values(index_definition, self.term_vectors_strings, __set_term_vector)
241250
self.__apply_values(index_definition, self.spatial_indexes_strings, __set_spatial)
251+
self.__apply_values(index_definition, self.vector_indexes_strings, __set_vector)
242252
self.__apply_values(index_definition, suggestions, __set_suggestions)
243253

244254
index_definition.additional_sources = self.additional_sources
@@ -302,6 +312,7 @@ def create_index_definition(self) -> IndexDefinition:
302312
index_definition_builder.suggestions_options = self._index_suggestions
303313
index_definition_builder.term_vectors_strings = self._term_vectors_strings
304314
index_definition_builder.spatial_indexes_strings = self._spatial_options_strings
315+
index_definition_builder.vector_indexes_strings = self._vector_indexes_strings
305316
index_definition_builder.output_reduce_to_collection = self._output_reduce_to_collection
306317
index_definition_builder.pattern_for_output_reduce_to_collection_references = (
307318
self._pattern_for_output_reduce_to_collection_references

ravendb/documents/indexes/definitions.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from abc import ABC
77
from typing import Union, Optional, List, Dict, Set, Iterable
88
from ravendb.documents.indexes.spatial.configuration import SpatialOptions, AutoSpatialOptions
9+
from ravendb.documents.indexes.vector.options import VectorOptions, AutoVectorOptions
910
from ravendb.tools.utils import Utils
1011

1112

@@ -144,13 +145,15 @@ def __init__(
144145
indexing: Optional[FieldIndexing] = None,
145146
term_vector: Optional[FieldTermVector] = None,
146147
spatial: Optional[SpatialOptions] = None,
148+
vector: Optional[VectorOptions] = None,
147149
analyzer: Optional[str] = None,
148150
suggestions: Optional[bool] = None,
149151
):
150152
self.storage = storage
151153
self.indexing = indexing
152154
self.term_vector = term_vector
153155
self.spatial = spatial
156+
self.vector = vector
154157
self.analyzer = analyzer
155158
self.suggestions = suggestions
156159

@@ -160,6 +163,7 @@ def to_json(self):
160163
"Indexing": self.indexing,
161164
"TermVector": self.term_vector,
162165
"Spatial": self.spatial.to_json() if self.spatial else None,
166+
"Vector": self.vector.to_json() if self.vector else None,
163167
"Analyzer": self.analyzer,
164168
"Suggestions": self.suggestions,
165169
}
@@ -370,6 +374,7 @@ def __init__(
370374
indexing: Optional[AutoFieldIndexing] = None,
371375
aggregation: Optional[AggregationOperation] = None,
372376
spatial: Optional[AutoSpatialOptions] = None,
377+
vector: Optional[AutoVectorOptions] = None,
373378
group_by_array_behavior: Optional[GroupByArrayBehavior] = None,
374379
suggestions: Optional[bool] = None,
375380
is_name_quoted: Optional[bool] = None,
@@ -378,6 +383,7 @@ def __init__(
378383
self.indexing = indexing
379384
self.aggregation = aggregation
380385
self.spatial = spatial
386+
self.vector = vector
381387
self.group_by_array_behavior = group_by_array_behavior
382388
self.suggestions = suggestions
383389
self.is_name_quoted = is_name_quoted
@@ -389,6 +395,7 @@ def from_json(cls, json_dict: Dict) -> AutoIndexFieldOptions:
389395
AutoFieldIndexing(json_dict.get("Indexing")),
390396
AggregationOperation(json_dict.get("Aggregation")) if json_dict.get("Aggregation", None) else None,
391397
AutoSpatialOptions.from_json(json_dict.get("Spatial")) if json_dict.get("Spatial", None) else None,
398+
AutoVectorOptions.from_json(json_dict.get("Vector")) if json_dict.get("Vector", None) else None,
392399
GroupByArrayBehavior(json_dict.get("GroupByArrayBehavior")),
393400
json_dict.get("Suggestions"),
394401
json_dict.get("IsNameQuoted"),
@@ -400,6 +407,9 @@ def to_json(self) -> Dict:
400407
"Indexing": self.indexing.value,
401408
"Aggregation": self.aggregation.value if self.aggregation is not None else None,
402409
"Spatial": self.spatial.type if self.spatial is not None else None,
410+
"Vector": (
411+
self.vector.to_json() if self.vector is not None else None
412+
), # todo; check if vector.to_json() is valid here
403413
"GroupByArrayBehavior": self.group_by_array_behavior.value,
404414
"Suggestions": self.suggestions,
405415
"IsNameQuoted": self.is_name_quoted,

ravendb/documents/indexes/vector/__init__.py

Whitespace-only changes.
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
from enum import Enum
2+
3+
4+
class VectorEmbeddingType(Enum):
5+
SINGLE = "Single" # float
6+
INT8 = "Int8" # quantized int
7+
BINARY = "Binary" # 1/0 quantized int
8+
TEXT = "Text" # str
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
from __future__ import annotations
2+
3+
from typing import Dict, Any
4+
5+
from ravendb.documents.indexes.vector.embedding import VectorEmbeddingType
6+
from ravendb.primitives import constants
7+
8+
9+
class VectorOptions:
10+
def __init__(
11+
self,
12+
source_embedding_type: VectorEmbeddingType = constants.VectorSearch.DEFAULT_EMBEDDING_TYPE,
13+
destination_embedding_type: VectorEmbeddingType = constants.VectorSearch.DEFAULT_EMBEDDING_TYPE,
14+
dimensions: int = None,
15+
number_of_edges: int = None,
16+
number_of_candidates_for_indexing: int = None,
17+
):
18+
self.dimensions = dimensions
19+
self.source_embedding_type = source_embedding_type
20+
self.destination_embedding_type = destination_embedding_type
21+
self.numbers_of_candidates_for_indexing = number_of_candidates_for_indexing
22+
self.number_of_edges = number_of_edges
23+
24+
@classmethod
25+
def from_json(cls, json_dict: Dict[str, Any]) -> VectorOptions:
26+
return cls(
27+
json_dict["SourceEmbeddingType"],
28+
json_dict["DestinationEmbeddingType"],
29+
json_dict["Dimensions"],
30+
json_dict["NumberOfEdges"],
31+
json_dict["NumberOfCandidatesForIndexing"],
32+
)
33+
34+
def to_json(self) -> Dict[str, Any]:
35+
return {
36+
"SourceEmbeddingType": self.source_embedding_type.value,
37+
"DestinationEmbeddingType": self.destination_embedding_type.value,
38+
"Dimensions": self.dimensions,
39+
"NumberOfCandidatesForIndexing": self.numbers_of_candidates_for_indexing,
40+
"NumberOfEdges": self.number_of_edges,
41+
}
42+
43+
44+
class AutoVectorOptions(VectorOptions):
45+
def __init__(
46+
self,
47+
source_embedding_type: VectorEmbeddingType = constants.VectorSearch.DEFAULT_EMBEDDING_TYPE,
48+
destination_embedding_type: VectorEmbeddingType = constants.VectorSearch.DEFAULT_EMBEDDING_TYPE,
49+
dimensions: int = None,
50+
number_of_edges: int = None,
51+
number_of_candidates_for_indexing: int = None,
52+
source_field_name: str = None,
53+
):
54+
super().__init__(
55+
source_embedding_type,
56+
destination_embedding_type,
57+
dimensions,
58+
number_of_edges,
59+
number_of_candidates_for_indexing,
60+
)
61+
self.source_field_name = source_field_name
62+
63+
@classmethod
64+
def from_vector_options(cls, vector_options: VectorOptions):
65+
return cls(
66+
source_embedding_type=vector_options.source_embedding_type,
67+
destination_embedding_type=vector_options.destination_embedding_type,
68+
dimensions=vector_options.dimensions,
69+
number_of_edges=vector_options.number_of_edges,
70+
number_of_candidates_for_indexing=vector_options.numbers_of_candidates_for_indexing,
71+
)
72+
73+
@classmethod
74+
def from_json(cls, json_dict: Dict[str, Any]):
75+
vec_options = super().from_json(json_dict)
76+
auto_vect_options = cls.from_vector_options(vec_options)
77+
auto_vect_options.source_field_name = json_dict["SourceFieldName"]
78+
return auto_vect_options
79+
80+
def to_json(self) -> Dict[str, Any]:
81+
json_dict = super().to_json()
82+
json_dict["SourceFieldName"] = self.source_field_name
83+
return json_dict
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import struct
2+
from typing import List, Tuple
3+
4+
5+
class VectorQuantizer:
6+
@staticmethod
7+
def to_int8(raw_embedding: List[float]) -> bytes:
8+
"""
9+
Converts a list of floats to a packed byte array of signed 8-bit integers (int8).
10+
The maximum absolute value is appended as a 4-byte float at the end.
11+
12+
Args:
13+
raw_embedding (List[float]): List of floating-point numbers to be quantized.
14+
15+
Returns:
16+
bytes: Packed byte array containing the quantized int8 values and the max component.
17+
"""
18+
if not raw_embedding:
19+
return b""
20+
21+
# Find the maximum absolute value in the input array
22+
max_component: float = max(abs(x) for x in raw_embedding)
23+
24+
# If all elements are zero, set quantized to all zeros
25+
if max_component == 0:
26+
quantized: List[int] = [0] * len(raw_embedding)
27+
else:
28+
# Scale all elements to the range [-127, 127]
29+
scale_factor: float = 127.0 / max_component
30+
quantized: List[int] = [int(x * scale_factor) for x in raw_embedding]
31+
32+
# Pack the quantized values into signed bytes (int8)
33+
packed: bytes = struct.pack("b" * len(quantized), *quantized)
34+
35+
# Append the max_component as a little-endian float
36+
packed += struct.pack("<f", max_component)
37+
38+
return packed
39+
40+
@staticmethod
41+
def to_int1(raw_embedding: List[float]) -> bytes:
42+
"""
43+
Converts a list of floats to a packed byte array of binary values (int1).
44+
Each byte represents 8 consecutive float values, where each bit corresponds to
45+
whether the float is non-negative (1) or negative (0).
46+
47+
Args:
48+
raw_embedding (List[float]): List of floating-point numbers to be quantized.
49+
50+
Returns:
51+
bytes: Packed byte array containing the binary-packed values.
52+
"""
53+
# Calculate the number of bytes needed to store the binary-packed values
54+
output_length: int = (len(raw_embedding) + 7) // 8
55+
56+
# Initialize a bytearray to store the packed bits
57+
bytes_list: bytearray = bytearray(output_length)
58+
59+
# Iterate over each float value and pack it into the appropriate bit
60+
for i, val in enumerate(raw_embedding):
61+
if val >= 0:
62+
byte_index: int = i // 8 # Determine which byte to modify
63+
bit_pos: int = 7 - (i % 8) # Determine the bit position within the byte
64+
bytes_list[byte_index] |= 1 << bit_pos # Set the bit to 1 if the value is non-negative
65+
66+
return bytes(bytes_list)

0 commit comments

Comments
 (0)