Skip to content

Commit dbdb2d6

Browse files
tylerhutchersonSpartee
authored andcommitted
Improve vectorizer init and descriptions (#92)
Addresses https://github.com/RedisVentures/redisvl/issues/87 and improves docstrings and exception details.
1 parent 25ef582 commit dbdb2d6

File tree

9 files changed

+390
-241
lines changed

9 files changed

+390
-241
lines changed

docs/user_guide/vectorizers_04.ipynb

Lines changed: 87 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -88,16 +88,16 @@
8888
{
8989
"data": {
9090
"text/plain": [
91-
"[-0.001046799123287201,\n",
92-
" -0.0031105349771678448,\n",
93-
" 0.0024228920228779316,\n",
94-
" -0.004480978474020958,\n",
95-
" -0.010343699716031551,\n",
96-
" 0.012758520431816578,\n",
97-
" -0.00535263866186142,\n",
98-
" -0.003002384677529335,\n",
99-
" -0.007115328684449196,\n",
100-
" -0.03378167003393173]"
91+
"[-0.001025049015879631,\n",
92+
" -0.0030993607360869646,\n",
93+
" 0.0024536605924367905,\n",
94+
" -0.004484387580305338,\n",
95+
" -0.010331203229725361,\n",
96+
" 0.012700922787189484,\n",
97+
" -0.005368996877223253,\n",
98+
" -0.0029411641880869865,\n",
99+
" -0.0070833307690918446,\n",
100+
" -0.03386051580309868]"
101101
]
102102
},
103103
"execution_count": 3,
@@ -127,16 +127,16 @@
127127
{
128128
"data": {
129129
"text/plain": [
130-
"[-0.017399806529283524,\n",
131-
" -2.3427608653037169e-07,\n",
132-
" 0.0014656063867732882,\n",
133-
" -0.02562308870255947,\n",
134-
" -0.019890939816832542,\n",
135-
" 0.016027139499783516,\n",
136-
" -0.0036763285752385855,\n",
137-
" 0.0008253469131886959,\n",
138-
" 0.006609130185097456,\n",
139-
" -0.025165533646941185]"
130+
"[-0.01747742109000683,\n",
131+
" -5.228330701356754e-05,\n",
132+
" 0.0013870716793462634,\n",
133+
" -0.025637786835432053,\n",
134+
" -0.01985435001552105,\n",
135+
" 0.016117358580231667,\n",
136+
" -0.0037306349258869886,\n",
137+
" 0.0008945261361077428,\n",
138+
" 0.006577865686267614,\n",
139+
" -0.025091219693422318]"
140140
]
141141
},
142142
"execution_count": 4,
@@ -190,9 +190,29 @@
190190
},
191191
{
192192
"cell_type": "code",
193-
"execution_count": null,
193+
"execution_count": 6,
194194
"metadata": {},
195-
"outputs": [],
195+
"outputs": [
196+
{
197+
"data": {
198+
"text/plain": [
199+
"[0.00037810884532518685,\n",
200+
" -0.05080341175198555,\n",
201+
" -0.03514723479747772,\n",
202+
" -0.02325104922056198,\n",
203+
" -0.044158220291137695,\n",
204+
" 0.020487844944000244,\n",
205+
" 0.0014617963461205363,\n",
206+
" 0.031261757016181946,\n",
207+
" 0.05605152249336243,\n",
208+
" 0.018815357238054276]"
209+
]
210+
},
211+
"execution_count": 6,
212+
"metadata": {},
213+
"output_type": "execute_result"
214+
}
215+
],
196216
"source": [
197217
"os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n",
198218
"from redisvl.vectorize.text import HFTextVectorizer\n",
@@ -229,26 +249,49 @@
229249
"pip install google-cloud-aiplatform>=1.26\n",
230250
"```\n",
231251
"\n",
232-
"1. Then you need to gain access to a [Google Cloud Project](https://cloud.google.com/gcp?hl=en) and provide [access to credentials](https://cloud.google.com/docs/authentication/application-default-credentials). This typically accomplished with the `GOOGLE_APPLICATION_CREDENTIALS` environment variable pointing to the path of a JSON key file downloaded from your service account on GCP.\n",
233-
"2. Lastly, you need to find your [project ID](https://support.google.com/googleapi/answer/7014113?hl=en) and [geographic region for VertexAI](https://cloud.google.com/vertex-ai/docs/general/locations)."
252+
"1. Then you need to gain access to a [Google Cloud Project](https://cloud.google.com/gcp?hl=en) and provide [access to credentials](https://cloud.google.com/docs/authentication/application-default-credentials). This is accomplished by setting the `GOOGLE_APPLICATION_CREDENTIALS` environment variable pointing to the path of a JSON key file downloaded from your service account on GCP.\n",
253+
"2. Lastly, you need to find your [project ID](https://support.google.com/googleapi/answer/7014113?hl=en) and [geographic region for VertexAI](https://cloud.google.com/vertex-ai/docs/general/locations).\n",
254+
"\n",
255+
"\n",
256+
"**Make sure the following env vars are set:**\n",
257+
"```\n",
258+
"GOOGLE_APPLICATION_CREDENTIALS=<path to your gcp JSON creds>\n",
259+
"GCP_PROJECT_ID=<your gcp project id>\n",
260+
"GCP_LOCATION=<your gcp geo region for vertex ai>\n",
261+
"```"
234262
]
235263
},
236264
{
237265
"cell_type": "code",
238-
"execution_count": null,
266+
"execution_count": 12,
239267
"metadata": {},
240-
"outputs": [],
268+
"outputs": [
269+
{
270+
"data": {
271+
"text/plain": [
272+
"[0.04373306408524513,\n",
273+
" -0.05040992051362991,\n",
274+
" -0.011946038343012333,\n",
275+
" -0.043528858572244644,\n",
276+
" 0.021510830149054527,\n",
277+
" 0.028604144230484962,\n",
278+
" 0.014770914800465107,\n",
279+
" -0.01610461436212063,\n",
280+
" -0.0036560404114425182,\n",
281+
" 0.013746795244514942]"
282+
]
283+
},
284+
"execution_count": 12,
285+
"metadata": {},
286+
"output_type": "execute_result"
287+
}
288+
],
241289
"source": [
242290
"from redisvl.vectorize.text import VertexAITextVectorizer\n",
243291
"\n",
244292
"\n",
245293
"# create a vectorizer\n",
246-
"vtx = VertexAITextVectorizer(\n",
247-
" api_config={\n",
248-
" \"project_id\": os.environ[\"GCP_PROJECT_ID\"],\n",
249-
" \"location\": os.environ[\"GCP_LOCATION\"]\n",
250-
" }\n",
251-
")\n",
294+
"vtx = VertexAITextVectorizer()\n",
252295
"\n",
253296
"# embed a sentence\n",
254297
"test = vtx.embed(\"This is a test sentence.\")\n",
@@ -287,7 +330,7 @@
287330
},
288331
{
289332
"cell_type": "code",
290-
"execution_count": 8,
333+
"execution_count": 13,
291334
"metadata": {},
292335
"outputs": [],
293336
"source": [
@@ -305,15 +348,15 @@
305348
},
306349
{
307350
"cell_type": "code",
308-
"execution_count": 9,
351+
"execution_count": 14,
309352
"metadata": {},
310353
"outputs": [
311354
{
312355
"name": "stdout",
313356
"output_type": "stream",
314357
"text": [
315-
"\u001b[32m20:13:35\u001b[0m \u001b[34m[RedisVL]\u001b[0m \u001b[1;30mINFO\u001b[0m Indices:\n",
316-
"\u001b[32m20:13:35\u001b[0m \u001b[34m[RedisVL]\u001b[0m \u001b[1;30mINFO\u001b[0m 1. providers\n"
358+
"\u001b[32m22:02:27\u001b[0m \u001b[34m[RedisVL]\u001b[0m \u001b[1;30mINFO\u001b[0m Indices:\n",
359+
"\u001b[32m22:02:27\u001b[0m \u001b[34m[RedisVL]\u001b[0m \u001b[1;30mINFO\u001b[0m 1. providers\n"
317360
]
318361
}
319362
],
@@ -324,7 +367,7 @@
324367
},
325368
{
326369
"cell_type": "code",
327-
"execution_count": 10,
370+
"execution_count": 15,
328371
"metadata": {},
329372
"outputs": [],
330373
"source": [
@@ -340,19 +383,16 @@
340383
},
341384
{
342385
"cell_type": "code",
343-
"execution_count": 11,
386+
"execution_count": 16,
344387
"metadata": {},
345388
"outputs": [
346389
{
347390
"name": "stdout",
348391
"output_type": "stream",
349392
"text": [
350-
"That is a happy dog\n",
351-
"0.160862445831\n",
352-
"That is a happy person\n",
353-
"0.273598074913\n",
354-
"Today is a sunny day\n",
355-
"0.744559526443\n"
393+
"That is a happy dog 0.160862326622\n",
394+
"That is a happy person 0.273598492146\n",
395+
"Today is a sunny day 0.744559407234\n"
356396
]
357397
}
358398
],
@@ -369,10 +409,9 @@
369409
" num_results=3\n",
370410
")\n",
371411
"\n",
372-
"results = index.search(query.query, query_params=query.params)\n",
373-
"for doc in results.docs:\n",
374-
" print(doc.text)\n",
375-
" print(doc.vector_distance)"
412+
"results = index.query(query)\n",
413+
"for doc in results:\n",
414+
" print(doc[\"text\"], doc[\"vector_distance\"])"
376415
]
377416
}
378417
],

redisvl/index.py

Lines changed: 44 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -137,15 +137,14 @@ class SearchIndex:
137137
purpose-built methods for interacting with Redis as a vector database.
138138
139139
Example:
140-
>>> from redisvl.index import SearchIndex
141-
>>> index = SearchIndex.from_yaml("schema.yaml", redis_url="redis://localhost:6379")
142-
>>> index.create(overwrite=True)
143-
>>> index.load(data) # data is an iterable of dictionaries
144-
>>>
145-
>>> # Use an async connection
146-
>>> index = SearchIndex.from_yaml("schema.yaml", redis_url="redis://localhost:6379", use_async=True)
147-
>>> await index.acreate(overwrite=True)
148-
>>> await index.aload(data)
140+
from redisvl.index import SearchIndex
141+
index = SearchIndex.from_yaml("schema.yaml", redis_url="redis://localhost:6379")
142+
index.create(overwrite=True)
143+
index.load(data) # data is an iterable of dictionaries
144+
# Use an async connection
145+
index = SearchIndex.from_yaml("schema.yaml", redis_url="redis://localhost:6379", use_async=True)
146+
await index.acreate(overwrite=True)
147+
await index.aload(data)
149148
"""
150149

151150
_STORAGE_MAP = {
@@ -226,9 +225,9 @@ def from_yaml(
226225
args.
227226
228227
Example:
229-
>>> from redisvl.index import SearchIndex
230-
>>> index = SearchIndex.from_yaml("schema.yaml", redis_url="redis://localhost:6379")
231-
>>> index.create(overwrite=True)
228+
from redisvl.index import SearchIndex
229+
index = SearchIndex.from_yaml("schema.yaml", redis_url="redis://localhost:6379")
230+
index.create(overwrite=True)
232231
233232
Returns:
234233
SearchIndex: A RedisVL SearchIndex object.
@@ -248,18 +247,18 @@ def from_dict(
248247
args.
249248
250249
Example:
251-
>>> from redisvl.index import SearchIndex
252-
>>> index = SearchIndex.from_dict({
253-
>>> "index": {
254-
>>> "name": "my-index",
255-
>>> "prefix": "rvl",
256-
>>> "storage_type": "hash",
257-
>>> },
258-
>>> "fields": {
259-
>>> "tag": [{"name": "doc-id"}]
260-
>>> }
261-
>>> }, redis_url="redis://localhost:6379")
262-
>>> index.create(overwrite=True)
250+
from redisvl.index import SearchIndex
251+
index = SearchIndex.from_dict({
252+
"index": {
253+
"name": "my-index",
254+
"prefix": "rvl",
255+
"storage_type": "hash",
256+
},
257+
"fields": {
258+
"tag": [{"name": "doc-id"}]
259+
}
260+
}, redis_url="redis://localhost:6379")
261+
index.create(overwrite=True)
263262
264263
Returns:
265264
SearchIndex: A RedisVL SearchIndex object.
@@ -289,10 +288,10 @@ def connect(
289288
Redis client. Defaults to `False`.
290289
291290
Example:
292-
>>> # standard sync Redis connection
293-
>>> index.connect(redis_url="redis://localhost:6379")
294-
>>> # async Redis connection
295-
>>> index.connect(redis_url="redis://localhost:6379", use_async=True)
291+
# standard sync Redis connection
292+
index.connect(redis_url="redis://localhost:6379")
293+
# async Redis connection
294+
index.connect(redis_url="redis://localhost:6379", use_async=True)
296295
297296
Raises:
298297
redis.exceptions.ConnectionError: If the connection to the Redis
@@ -320,14 +319,13 @@ def set_client(self, client: Union[redis.Redis, aredis.Redis]):
320319
client instance to be used for the connection.
321320
322321
Example:
323-
>>> import redis
324-
>>> r = redis.Redis.from_url("redis://localhost:6379")
325-
>>> index.set_client(r)
326-
>>> # async Redis client
327-
>>> import redis.asyncio as aredis
328-
>>> r = aredis.Redis.from_url("redis://localhost:6379")
329-
>>> index.set_client(r)
330-
322+
import redis
323+
r = redis.Redis.from_url("redis://localhost:6379")
324+
index.set_client(r)
325+
# async Redis client
326+
import redis.asyncio as aredis
327+
r = aredis.Redis.from_url("redis://localhost:6379")
328+
index.set_client(r)
331329
332330
Raises:
333331
TypeError: If the provided client is not valid.
@@ -432,11 +430,11 @@ def load(
432430
of objects.
433431
434432
Example:
435-
>>> data = [{"foo": "bar"}, {"test": "values"}]
436-
>>> def func(record: dict):
437-
>>> record["new"] = "value"
438-
>>> return record
439-
>>> index.load(data, preprocess=func)
433+
data = [{"foo": "bar"}, {"test": "values"}]
434+
def func(record: dict):
435+
record["new"] = "value"
436+
return record
437+
index.load(data, preprocess=func)
440438
"""
441439
self._storage.write(
442440
self._redis_conn.client, # type: ignore
@@ -587,11 +585,11 @@ async def aload(
587585
length of objects.
588586
589587
Example:
590-
>>> data = [{"foo": "bar"}, {"test": "values"}]
591-
>>> async def func(record: dict):
592-
>>> record["new"] = "value"
593-
>>> return record
594-
>>> await index.load(data, preprocess=func)
588+
data = [{"foo": "bar"}, {"test": "values"}]
589+
async def func(record: dict):
590+
record["new"] = "value"
591+
return record
592+
await index.load(data, preprocess=func)
595593
"""
596594
await self._storage.awrite(
597595
self._redis_conn.client, # type: ignore

0 commit comments

Comments
 (0)