Skip to content

Commit bf88687

Browse files
tylerhutchersonSpartee
authored andcommitted
Improve SemanticCache for metadata persistence (#97)
This PR improves the ability to store optional metadata alongside entries in the `SemanticCache`. It also improves documentation, docstrings, comments, and user guide examples for the cache.
1 parent 18489da commit bf88687

File tree

6 files changed

+255
-232
lines changed

6 files changed

+255
-232
lines changed

docs/api/cache.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,10 @@ SemanticCache
1515
SemanticCache.__init__
1616
SemanticCache.check
1717
SemanticCache.store
18-
SemanticCache.set_threshold
18+
SemanticCache.clear
19+
SemanticCache.delete
1920
SemanticCache.distance_threshold
20-
SemanticCache.index
21+
SemanticCache.set_threshold
2122
SemanticCache.ttl
2223
SemanticCache.set_ttl
2324

docs/user_guide/llmcache_03.ipynb

Lines changed: 71 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@
6565
"cell_type": "markdown",
6666
"metadata": {},
6767
"source": [
68-
"## Initializing and using ``SemanticCache``\n",
68+
"## Initializing ``SemanticCache``\n",
6969
"\n",
7070
"``SemanticCache`` will automatically create an index within Redis upon initialization for the semantic cache content."
7171
]
@@ -80,9 +80,9 @@
8080
"\n",
8181
"llmcache = SemanticCache(\n",
8282
" name=\"llmcache\", # underlying search index name\n",
83-
" prefix=\"llmcache\", # redis key prefix\n",
83+
" prefix=\"llmcache\", # redis key prefix for hash entries\n",
8484
" redis_url=\"redis://localhost:6379\", # redis connection url string\n",
85-
" distance_threshold=0.1 # semantic distance threshold\n",
85+
" distance_threshold=0.1 # semantic cache distance threshold\n",
8686
")"
8787
]
8888
},
@@ -119,6 +119,13 @@
119119
"!rvl index info -i llmcache"
120120
]
121121
},
122+
{
123+
"cell_type": "markdown",
124+
"metadata": {},
125+
"source": [
126+
"## Basic Cache Usage"
127+
]
128+
},
122129
{
123130
"cell_type": "code",
124131
"execution_count": 5,
@@ -134,99 +141,89 @@
134141
"metadata": {},
135142
"outputs": [
136143
{
137-
"data": {
138-
"text/plain": [
139-
"[]"
140-
]
141-
},
142-
"execution_count": 6,
143-
"metadata": {},
144-
"output_type": "execute_result"
144+
"name": "stdout",
145+
"output_type": "stream",
146+
"text": [
147+
"Empty cache\n"
148+
]
145149
}
146150
],
147151
"source": [
148-
"# Check the cache -- should be empty\n",
149-
"llmcache.check(prompt=question)"
152+
"# Check the semantic cache -- should be empty\n",
153+
"if response := llmcache.check(prompt=question):\n",
154+
" print(response)\n",
155+
"else:\n",
156+
" print(\"Empty cache\")"
150157
]
151158
},
152159
{
153-
"cell_type": "code",
154-
"execution_count": 7,
160+
"cell_type": "markdown",
155161
"metadata": {},
156-
"outputs": [],
157162
"source": [
158-
"# Cache the question and answer\n",
159-
"llmcache.store(prompt=question, response=\"Paris\")"
163+
"Our initial cache check should be empty since we have not yet stored anything in the cache. Below, store the `question`,\n",
164+
"proper `response`, and any arbitrary `metadata` (as a python dictionary object) in the cache."
160165
]
161166
},
162167
{
163168
"cell_type": "code",
164-
"execution_count": 8,
169+
"execution_count": 7,
165170
"metadata": {},
166-
"outputs": [
167-
{
168-
"data": {
169-
"text/plain": [
170-
"[{'response': 'Paris', 'vector_distance': '8.34465026855e-07'}]"
171-
]
172-
},
173-
"execution_count": 8,
174-
"metadata": {},
175-
"output_type": "execute_result"
176-
}
177-
],
171+
"outputs": [],
178172
"source": [
179-
"# Check the cache again to see if new answer is there\n",
180-
"llmcache.check(prompt=question)"
173+
"# Cache the question, answer, and arbitrary metadata\n",
174+
"llmcache.store(\n",
175+
" prompt=question,\n",
176+
" response=\"Paris\",\n",
177+
" metadata={\"city\": \"Paris\", \"country\": \"france\"}\n",
178+
")"
181179
]
182180
},
183181
{
184182
"cell_type": "code",
185-
"execution_count": 9,
183+
"execution_count": 8,
186184
"metadata": {},
187185
"outputs": [
188186
{
189-
"data": {
190-
"text/plain": [
191-
"[{'response': 'Paris',\n",
192-
" 'prompt': 'What is the capital of France?',\n",
193-
" 'vector_distance': '8.34465026855e-07'}]"
194-
]
195-
},
196-
"execution_count": 9,
197-
"metadata": {},
198-
"output_type": "execute_result"
187+
"name": "stdout",
188+
"output_type": "stream",
189+
"text": [
190+
"[{'id': 'llmcache:115049a298532be2f181edb03f766770c0db84c22aff39003fec340deaec7545', 'vector_distance': '8.34465026855e-07', 'prompt': 'What is the capital of France?', 'response': 'Paris', 'metadata': {'city': 'Paris', 'country': 'france'}}]\n"
191+
]
199192
}
200193
],
201194
"source": [
202-
"# Update the return fields to gather other kinds of information about the cached entity\n",
203-
"llmcache.check(prompt=question, return_fields=[\"response\", \"prompt\"])"
195+
"# Check the cache again\n",
196+
"if response := llmcache.check(prompt=question, return_fields=[\"prompt\", \"response\", \"metadata\"]):\n",
197+
" print(response)\n",
198+
"else:\n",
199+
" print(\"Empty cache\")"
204200
]
205201
},
206202
{
207203
"cell_type": "code",
208-
"execution_count": 10,
204+
"execution_count": 9,
209205
"metadata": {},
210206
"outputs": [
211207
{
212208
"data": {
213209
"text/plain": [
214-
"[{'response': 'Paris', 'vector_distance': '0.0988066792488'}]"
210+
"'Paris'"
215211
]
216212
},
217-
"execution_count": 10,
213+
"execution_count": 9,
218214
"metadata": {},
219215
"output_type": "execute_result"
220216
}
221217
],
222218
"source": [
223219
"# Check for a semantically similar result\n",
224-
"llmcache.check(prompt=\"What actually is the capital of France?\")"
220+
"question = \"What actually is the capital of France?\"\n",
221+
"llmcache.check(prompt=question)[0]['response']"
225222
]
226223
},
227224
{
228225
"cell_type": "code",
229-
"execution_count": 11,
226+
"execution_count": 10,
230227
"metadata": {},
231228
"outputs": [],
232229
"source": [
@@ -236,41 +233,30 @@
236233
},
237234
{
238235
"cell_type": "code",
239-
"execution_count": 12,
236+
"execution_count": 11,
240237
"metadata": {},
241238
"outputs": [
242239
{
243240
"data": {
244241
"text/plain": [
245-
"[{'response': 'Paris', 'vector_distance': '0.273138523102'}]"
242+
"'Paris'"
246243
]
247244
},
248-
"execution_count": 12,
245+
"execution_count": 11,
249246
"metadata": {},
250247
"output_type": "execute_result"
251248
}
252249
],
253250
"source": [
254251
"# Really try to trick it by asking around the point\n",
255252
"# But is able to slip just under our new threshold\n",
256-
"llmcache.check(\n",
257-
" prompt=\"What is the capital city of the country in Europe that also has a city named Nice?\"\n",
258-
")"
259-
]
260-
},
261-
{
262-
"cell_type": "code",
263-
"execution_count": 13,
264-
"metadata": {},
265-
"outputs": [],
266-
"source": [
267-
"# Invalidate the cache completely by clearing it out\n",
268-
"llmcache.clear()"
253+
"question = \"What is the capital city of the country in Europe that also has a city named Nice?\"\n",
254+
"llmcache.check(prompt=question)[0]['response']"
269255
]
270256
},
271257
{
272258
"cell_type": "code",
273-
"execution_count": 14,
259+
"execution_count": 12,
274260
"metadata": {},
275261
"outputs": [
276262
{
@@ -279,12 +265,15 @@
279265
"[]"
280266
]
281267
},
282-
"execution_count": 14,
268+
"execution_count": 12,
283269
"metadata": {},
284270
"output_type": "execute_result"
285271
}
286272
],
287273
"source": [
274+
"# Invalidate the cache completely by clearing it out\n",
275+
"llmcache.clear()\n",
276+
"\n",
288277
"# should be empty now\n",
289278
"llmcache.check(prompt=question)"
290279
]
@@ -300,7 +289,7 @@
300289
},
301290
{
302291
"cell_type": "code",
303-
"execution_count": 15,
292+
"execution_count": 13,
304293
"metadata": {},
305294
"outputs": [],
306295
"source": [
@@ -327,14 +316,14 @@
327316
},
328317
{
329318
"cell_type": "code",
330-
"execution_count": 19,
319+
"execution_count": 14,
331320
"metadata": {},
332321
"outputs": [
333322
{
334323
"name": "stdout",
335324
"output_type": "stream",
336325
"text": [
337-
"Without caching, a call to openAI to answer this simple question took 0.5083951950073242 seconds.\n"
326+
"Without caching, a call to openAI to answer this simple question took 0.5017588138580322 seconds.\n"
338327
]
339328
}
340329
],
@@ -349,7 +338,7 @@
349338
},
350339
{
351340
"cell_type": "code",
352-
"execution_count": 20,
341+
"execution_count": 15,
353342
"metadata": {},
354343
"outputs": [],
355344
"source": [
@@ -358,15 +347,15 @@
358347
},
359348
{
360349
"cell_type": "code",
361-
"execution_count": 34,
350+
"execution_count": 16,
362351
"metadata": {},
363352
"outputs": [
364353
{
365354
"name": "stdout",
366355
"output_type": "stream",
367356
"text": [
368-
"Time Taken with cache enabled: 0.08954691886901855\n",
369-
"Percentage of time saved: 82.39%\n"
357+
"Time Taken with cache enabled: 0.327639102935791\n",
358+
"Percentage of time saved: 34.7%\n"
370359
]
371360
}
372361
],
@@ -380,7 +369,7 @@
380369
},
381370
{
382371
"cell_type": "code",
383-
"execution_count": 35,
372+
"execution_count": 17,
384373
"metadata": {},
385374
"outputs": [
386375
{
@@ -398,7 +387,7 @@
398387
"│ num_records │ 16 │\n",
399388
"│ percent_indexed │ 1 │\n",
400389
"│ hash_indexing_failures │ 0 │\n",
401-
"│ number_of_uses │ 26\n",
390+
"│ number_of_uses │ 9 \n",
402391
"│ bytes_per_record_avg │ 5.25 │\n",
403392
"│ doc_table_size_mb │ 0.000134468 │\n",
404393
"│ inverted_sz_mb │ 8.01086e-05 │\n",
@@ -408,7 +397,7 @@
408397
"│ offsets_per_term_avg │ 0.875 │\n",
409398
"│ records_per_doc_avg │ 16 │\n",
410399
"│ sortable_values_size_mb │ 0 │\n",
411-
"│ total_indexing_time │ 0.76 \n",
400+
"│ total_indexing_time │ 0.548\n",
412401
"│ total_inverted_index_blocks │ 7 │\n",
413402
"│ vector_index_sz_mb │ 3.0161 │\n",
414403
"╰─────────────────────────────┴─────────────╯\n"
@@ -422,15 +411,12 @@
422411
},
423412
{
424413
"cell_type": "code",
425-
"execution_count": 20,
414+
"execution_count": 18,
426415
"metadata": {},
427416
"outputs": [],
428417
"source": [
429-
"# Clear the cache\n",
430-
"llmcache.clear()\n",
431-
"\n",
432-
"# Remove the underlying index\n",
433-
"llmcache._index.delete(drop=True)"
418+
"# Clear the cache AND delete the underlying index\n",
419+
"llmcache.delete()"
434420
]
435421
}
436422
],

0 commit comments

Comments
 (0)