Skip to content

Commit 4a2abb0

Browse files
committed
RDBC-934 Vertex & Overlap tokens
1 parent 4077dd4 commit 4a2abb0

File tree

5 files changed

+151
-2
lines changed

5 files changed

+151
-2
lines changed

ravendb/documents/operations/ai/ai_connection_string.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
from ravendb.documents.operations.ai.mistral_ai_settings import MistralAiSettings
1010
from ravendb.documents.operations.ai.ollama_settings import OllamaSettings
1111
from ravendb.documents.operations.ai.open_ai_settings import OpenAiSettings
12+
from ravendb.documents.operations.ai.vertex_settings import VertexSettings
13+
1214
from ravendb.documents.operations.connection_strings import ConnectionString
1315

1416

@@ -29,6 +31,7 @@ def __init__(
2931
google_settings: Optional[GoogleSettings] = None,
3032
huggingface_settings: Optional[HuggingFaceSettings] = None,
3133
mistral_ai_settings: Optional[MistralAiSettings] = None,
34+
vertex_settings: Optional[VertexSettings] = None,
3235
model_type: AiModelType = None,
3336
):
3437
super().__init__(name)
@@ -40,6 +43,7 @@ def __init__(
4043
self.google_settings = google_settings
4144
self.huggingface_settings = huggingface_settings
4245
self.mistral_ai_settings = mistral_ai_settings
46+
self.vertex_settings = vertex_settings
4347
self.model_type = model_type
4448

4549
if not any(
@@ -51,10 +55,11 @@ def __init__(
5155
google_settings,
5256
huggingface_settings,
5357
mistral_ai_settings,
58+
vertex_settings,
5459
]
5560
):
5661
raise ValueError(
57-
"Please provide at least one of the following settings: openai_settings, azure_openai_settings, ollama_settings, embedded_settings, google_settings, huggingface_settings, mistral_ai_settings"
62+
"Please provide at least one of the following settings: openai_settings, azure_openai_settings, ollama_settings, embedded_settings, google_settings, huggingface_settings, mistral_ai_settings, vertex_settings"
5863
)
5964

6065
if model_type is None:
@@ -69,12 +74,13 @@ def __init__(
6974
google_settings,
7075
huggingface_settings,
7176
mistral_ai_settings,
77+
vertex_settings,
7278
]:
7379
if setting:
7480
settings_set_count += 1 if setting else 0
7581
if settings_set_count > 1:
7682
raise ValueError(
77-
"Please provide only one of the following settings: openai_settings, azure_openai_settings, ollama_settings, embedded_settings, google_settings, huggingface_settings, mistral_ai_settings"
83+
"Please provide only one of the following settings: openai_settings, azure_openai_settings, ollama_settings, embedded_settings, google_settings, huggingface_settings, mistral_ai_settings, vertex_settings"
7884
)
7985

8086
@property
@@ -92,6 +98,7 @@ def to_json(self) -> Dict[str, Any]:
9298
"GoogleSettings": self.google_settings.to_json() if self.google_settings else None,
9399
"HuggingFaceSettings": self.huggingface_settings.to_json() if self.huggingface_settings else None,
94100
"MistralAiSettings": self.mistral_ai_settings.to_json() if self.mistral_ai_settings else None,
101+
"VertexSettings": self.vertex_settings.to_json() if self.vertex_settings else None,
95102
"ModelType": self.model_type.value if self.model_type else None,
96103
"Type": self.get_type,
97104
}
@@ -128,5 +135,8 @@ def from_json(cls, json_dict: Dict[str, Any]) -> "AiConnectionString":
128135
if json_dict.get("MistralAiSettings")
129136
else None
130137
),
138+
vertex_settings=(
139+
VertexSettings.from_json(json_dict["VertexSettings"]) if json_dict.get("VertexSettings") else None
140+
),
131141
model_type=AiModelType(json_dict["ModelType"]) if json_dict.get("ModelType") else None,
132142
)
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
from enum import Enum
2+
from typing import Dict, Any, Optional
3+
4+
5+
# todo: EmbeddingsGenerationConfiguration
6+
class ChunkingMethod(Enum):
7+
PLAIN_TEXT_SPLIT = "PlainTextSplit"
8+
PLAIN_TEXT_SPLIT_LINES = "PlainTextSplitLines"
9+
PLAIN_TEXT_SPLIT_PARAGRAPHS = "PlainTextSplitParagraphs"
10+
MARK_DOWN_SPLIT_LINES = "MarkDownSplitLines"
11+
MARK_DOWN_SPLIT_PARAGRAPHS = "MarkDownSplitParagraphs"
12+
HTML_STRIP = "HtmlStrip"
13+
14+
15+
class ChunkingOptions:
16+
def __init__(
17+
self,
18+
chunking_method: Optional[ChunkingMethod] = None,
19+
max_tokens_per_chunk: int = 512,
20+
overlap_tokens: int = 0,
21+
):
22+
self.chunking_method = chunking_method
23+
self.max_tokens_per_chunk = max_tokens_per_chunk
24+
self.overlap_tokens = overlap_tokens
25+
26+
@classmethod
27+
def from_json(cls, json_dict: Dict[str, Any]) -> "ChunkingOptions":
28+
return cls(
29+
chunking_method=ChunkingMethod(json_dict["ChunkingMethod"]) if json_dict.get("ChunkingMethod") else None,
30+
max_tokens_per_chunk=json_dict.get("MaxTokensPerChunk", 512),
31+
overlap_tokens=json_dict.get("OverlapTokens", 0),
32+
)
33+
34+
def to_json(self) -> Dict[str, Any]:
35+
return {
36+
"ChunkingMethod": self.chunking_method.value if self.chunking_method else None,
37+
"MaxTokensPerChunk": self.max_tokens_per_chunk,
38+
"OverlapTokens": self.overlap_tokens,
39+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
from typing import Dict, Any, Optional
2+
3+
from ravendb.documents.operations.ai.chunking_options import ChunkingOptions
4+
5+
6+
# todo: EmbeddingsGenerationConfiguration
7+
class EmbeddingPathConfiguration:
8+
def __init__(self, path: Optional[str] = None, chunking_options: Optional[ChunkingOptions] = None):
9+
self.path = path
10+
self.chunking_options = chunking_options
11+
12+
@classmethod
13+
def from_json(cls, json_dict: Dict[str, Any]) -> "EmbeddingPathConfiguration":
14+
return cls(
15+
path=json_dict.get("Path"),
16+
chunking_options=(
17+
ChunkingOptions.from_json(json_dict["ChunkingOptions"]) if json_dict.get("ChunkingOptions") else None
18+
),
19+
)
20+
21+
def to_json(self) -> Dict[str, Any]:
22+
return {
23+
"Path": self.path,
24+
"ChunkingOptions": self.chunking_options.to_json() if self.chunking_options else None,
25+
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
from typing import Dict, Any, Optional
2+
3+
from ravendb.documents.operations.ai.chunking_options import ChunkingOptions, ChunkingMethod
4+
5+
6+
# todo: EmbeddingsGenerationConfiguration
7+
class EmbeddingsTransformation:
8+
GENERATE_EMBEDDINGS_FUNCTION_NAME = "embeddings.generate"
9+
10+
def __init__(
11+
self,
12+
script: Optional[str] = None,
13+
chunking_options: Optional[ChunkingOptions] = None,
14+
):
15+
self.script = script
16+
self.chunking_options = chunking_options or ChunkingOptions(
17+
chunking_method=ChunkingMethod.PLAIN_TEXT_SPLIT, max_tokens_per_chunk=256
18+
)
19+
20+
@classmethod
21+
def from_json(cls, json_dict: Dict[str, Any]) -> "EmbeddingsTransformation":
22+
return cls(
23+
script=json_dict.get("Script"),
24+
chunking_options=(
25+
ChunkingOptions.from_json(json_dict["ChunkingOptions"]) if json_dict.get("ChunkingOptions") else None
26+
),
27+
)
28+
29+
def to_json(self) -> Dict[str, Any]:
30+
return {
31+
"Script": self.script,
32+
"ChunkingOptions": self.chunking_options.to_json() if self.chunking_options else None,
33+
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
from enum import Enum
2+
from typing import Dict, Any, Optional
3+
4+
from ravendb.documents.operations.ai.abstract_ai_settings import AbstractAiSettings
5+
6+
7+
class VertexAIVersion(Enum):
8+
V1 = "V1"
9+
V1_BETA = "V1_Beta"
10+
11+
12+
class VertexSettings(AbstractAiSettings):
13+
def __init__(
14+
self,
15+
model: Optional[str] = None,
16+
google_credentials_json: Optional[str] = None,
17+
location: Optional[str] = None,
18+
ai_version: Optional[VertexAIVersion] = None,
19+
):
20+
super().__init__()
21+
self.model = model
22+
self.google_credentials_json = google_credentials_json
23+
self.location = location
24+
self.ai_version = ai_version
25+
26+
@classmethod
27+
def from_json(cls, json_dict: Dict[str, Any]) -> "VertexSettings":
28+
return cls(
29+
model=json_dict.get("Model"),
30+
google_credentials_json=json_dict.get("GoogleCredentialsJson"),
31+
location=json_dict.get("Location"),
32+
ai_version=VertexAIVersion(json_dict["AiVersion"]) if json_dict.get("AiVersion") else None,
33+
)
34+
35+
def to_json(self) -> Dict[str, Any]:
36+
return {
37+
"Model": self.model,
38+
"GoogleCredentialsJson": self.google_credentials_json,
39+
"AiVersion": self.ai_version.value if self.ai_version else None,
40+
"Location": self.location,
41+
"EmbeddingsMaxConcurrentBatches": self.embeddings_max_concurrent_batches,
42+
}

0 commit comments

Comments
 (0)