|
65 | 65 | "cell_type": "markdown", |
66 | 66 | "metadata": {}, |
67 | 67 | "source": [ |
68 | | - "## Initializing and using ``SemanticCache``\n", |
| 68 | + "## Initializing ``SemanticCache``\n", |
69 | 69 | "\n", |
70 | 70 | "``SemanticCache`` will automatically create an index within Redis upon initialization for the semantic cache content." |
71 | 71 | ] |
|
80 | 80 | "\n", |
81 | 81 | "llmcache = SemanticCache(\n", |
82 | 82 | " name=\"llmcache\", # underlying search index name\n", |
83 | | - " prefix=\"llmcache\", # redis key prefix\n", |
| 83 | + " prefix=\"llmcache\", # redis key prefix for hash entries\n", |
84 | 84 | " redis_url=\"redis://localhost:6379\", # redis connection url string\n", |
85 | | - " distance_threshold=0.1 # semantic distance threshold\n", |
| 85 | + " distance_threshold=0.1 # semantic cache distance threshold\n", |
86 | 86 | ")" |
87 | 87 | ] |
88 | 88 | }, |
|
119 | 119 | "!rvl index info -i llmcache" |
120 | 120 | ] |
121 | 121 | }, |
| 122 | + { |
| 123 | + "cell_type": "markdown", |
| 124 | + "metadata": {}, |
| 125 | + "source": [ |
| 126 | + "## Basic Cache Usage" |
| 127 | + ] |
| 128 | + }, |
122 | 129 | { |
123 | 130 | "cell_type": "code", |
124 | 131 | "execution_count": 5, |
|
134 | 141 | "metadata": {}, |
135 | 142 | "outputs": [ |
136 | 143 | { |
137 | | - "data": { |
138 | | - "text/plain": [ |
139 | | - "[]" |
140 | | - ] |
141 | | - }, |
142 | | - "execution_count": 6, |
143 | | - "metadata": {}, |
144 | | - "output_type": "execute_result" |
| 144 | + "name": "stdout", |
| 145 | + "output_type": "stream", |
| 146 | + "text": [ |
| 147 | + "Empty cache\n" |
| 148 | + ] |
145 | 149 | } |
146 | 150 | ], |
147 | 151 | "source": [ |
148 | | - "# Check the cache -- should be empty\n", |
149 | | - "llmcache.check(prompt=question)" |
| 152 | + "# Check the semantic cache -- should be empty\n", |
| 153 | + "if response := llmcache.check(prompt=question):\n", |
| 154 | + " print(response)\n", |
| 155 | + "else:\n", |
| 156 | + " print(\"Empty cache\")" |
150 | 157 | ] |
151 | 158 | }, |
152 | 159 | { |
153 | | - "cell_type": "code", |
154 | | - "execution_count": 7, |
| 160 | + "cell_type": "markdown", |
155 | 161 | "metadata": {}, |
156 | | - "outputs": [], |
157 | 162 | "source": [ |
158 | | - "# Cache the question and answer\n", |
159 | | - "llmcache.store(prompt=question, response=\"Paris\")" |
| 163 | + "Our initial cache check should be empty since we have not yet stored anything in the cache. Below, store the `question`,\n", |
| 164 | + "proper `response`, and any arbitrary `metadata` (as a python dictionary object) in the cache." |
160 | 165 | ] |
161 | 166 | }, |
162 | 167 | { |
163 | 168 | "cell_type": "code", |
164 | | - "execution_count": 8, |
| 169 | + "execution_count": 7, |
165 | 170 | "metadata": {}, |
166 | | - "outputs": [ |
167 | | - { |
168 | | - "data": { |
169 | | - "text/plain": [ |
170 | | - "[{'response': 'Paris', 'vector_distance': '8.34465026855e-07'}]" |
171 | | - ] |
172 | | - }, |
173 | | - "execution_count": 8, |
174 | | - "metadata": {}, |
175 | | - "output_type": "execute_result" |
176 | | - } |
177 | | - ], |
| 171 | + "outputs": [], |
178 | 172 | "source": [ |
179 | | - "# Check the cache again to see if new answer is there\n", |
180 | | - "llmcache.check(prompt=question)" |
| 173 | + "# Cache the question, answer, and arbitrary metadata\n", |
| 174 | + "llmcache.store(\n", |
| 175 | + " prompt=question,\n", |
| 176 | + " response=\"Paris\",\n", |
| 177 | + " metadata={\"city\": \"Paris\", \"country\": \"france\"}\n", |
| 178 | + ")" |
181 | 179 | ] |
182 | 180 | }, |
183 | 181 | { |
184 | 182 | "cell_type": "code", |
185 | | - "execution_count": 9, |
| 183 | + "execution_count": 8, |
186 | 184 | "metadata": {}, |
187 | 185 | "outputs": [ |
188 | 186 | { |
189 | | - "data": { |
190 | | - "text/plain": [ |
191 | | - "[{'response': 'Paris',\n", |
192 | | - " 'prompt': 'What is the capital of France?',\n", |
193 | | - " 'vector_distance': '8.34465026855e-07'}]" |
194 | | - ] |
195 | | - }, |
196 | | - "execution_count": 9, |
197 | | - "metadata": {}, |
198 | | - "output_type": "execute_result" |
| 187 | + "name": "stdout", |
| 188 | + "output_type": "stream", |
| 189 | + "text": [ |
| 190 | + "[{'id': 'llmcache:115049a298532be2f181edb03f766770c0db84c22aff39003fec340deaec7545', 'vector_distance': '8.34465026855e-07', 'prompt': 'What is the capital of France?', 'response': 'Paris', 'metadata': {'city': 'Paris', 'country': 'france'}}]\n" |
| 191 | + ] |
199 | 192 | } |
200 | 193 | ], |
201 | 194 | "source": [ |
202 | | - "# Update the return fields to gather other kinds of information about the cached entity\n", |
203 | | - "llmcache.check(prompt=question, return_fields=[\"response\", \"prompt\"])" |
| 195 | + "# Check the cache again\n", |
| 196 | + "if response := llmcache.check(prompt=question, return_fields=[\"prompt\", \"response\", \"metadata\"]):\n", |
| 197 | + " print(response)\n", |
| 198 | + "else:\n", |
| 199 | + " print(\"Empty cache\")" |
204 | 200 | ] |
205 | 201 | }, |
206 | 202 | { |
207 | 203 | "cell_type": "code", |
208 | | - "execution_count": 10, |
| 204 | + "execution_count": 9, |
209 | 205 | "metadata": {}, |
210 | 206 | "outputs": [ |
211 | 207 | { |
212 | 208 | "data": { |
213 | 209 | "text/plain": [ |
214 | | - "[{'response': 'Paris', 'vector_distance': '0.0988066792488'}]" |
| 210 | + "'Paris'" |
215 | 211 | ] |
216 | 212 | }, |
217 | | - "execution_count": 10, |
| 213 | + "execution_count": 9, |
218 | 214 | "metadata": {}, |
219 | 215 | "output_type": "execute_result" |
220 | 216 | } |
221 | 217 | ], |
222 | 218 | "source": [ |
223 | 219 | "# Check for a semantically similar result\n", |
224 | | - "llmcache.check(prompt=\"What actually is the capital of France?\")" |
| 220 | + "question = \"What actually is the capital of France?\"\n", |
| 221 | + "llmcache.check(prompt=question)[0]['response']" |
225 | 222 | ] |
226 | 223 | }, |
227 | 224 | { |
228 | 225 | "cell_type": "code", |
229 | | - "execution_count": 11, |
| 226 | + "execution_count": 10, |
230 | 227 | "metadata": {}, |
231 | 228 | "outputs": [], |
232 | 229 | "source": [ |
|
236 | 233 | }, |
237 | 234 | { |
238 | 235 | "cell_type": "code", |
239 | | - "execution_count": 12, |
| 236 | + "execution_count": 11, |
240 | 237 | "metadata": {}, |
241 | 238 | "outputs": [ |
242 | 239 | { |
243 | 240 | "data": { |
244 | 241 | "text/plain": [ |
245 | | - "[{'response': 'Paris', 'vector_distance': '0.273138523102'}]" |
| 242 | + "'Paris'" |
246 | 243 | ] |
247 | 244 | }, |
248 | | - "execution_count": 12, |
| 245 | + "execution_count": 11, |
249 | 246 | "metadata": {}, |
250 | 247 | "output_type": "execute_result" |
251 | 248 | } |
252 | 249 | ], |
253 | 250 | "source": [ |
254 | 251 | "# Really try to trick it by asking around the point\n", |
255 | 252 | "# But is able to slip just under our new threshold\n", |
256 | | - "llmcache.check(\n", |
257 | | - " prompt=\"What is the capital city of the country in Europe that also has a city named Nice?\"\n", |
258 | | - ")" |
259 | | - ] |
260 | | - }, |
261 | | - { |
262 | | - "cell_type": "code", |
263 | | - "execution_count": 13, |
264 | | - "metadata": {}, |
265 | | - "outputs": [], |
266 | | - "source": [ |
267 | | - "# Invalidate the cache completely by clearing it out\n", |
268 | | - "llmcache.clear()" |
| 253 | + "question = \"What is the capital city of the country in Europe that also has a city named Nice?\"\n", |
| 254 | + "llmcache.check(prompt=question)[0]['response']" |
269 | 255 | ] |
270 | 256 | }, |
271 | 257 | { |
272 | 258 | "cell_type": "code", |
273 | | - "execution_count": 14, |
| 259 | + "execution_count": 12, |
274 | 260 | "metadata": {}, |
275 | 261 | "outputs": [ |
276 | 262 | { |
|
279 | 265 | "[]" |
280 | 266 | ] |
281 | 267 | }, |
282 | | - "execution_count": 14, |
| 268 | + "execution_count": 12, |
283 | 269 | "metadata": {}, |
284 | 270 | "output_type": "execute_result" |
285 | 271 | } |
286 | 272 | ], |
287 | 273 | "source": [ |
| 274 | + "# Invalidate the cache completely by clearing it out\n", |
| 275 | + "llmcache.clear()\n", |
| 276 | + "\n", |
288 | 277 | "# should be empty now\n", |
289 | 278 | "llmcache.check(prompt=question)" |
290 | 279 | ] |
|
300 | 289 | }, |
301 | 290 | { |
302 | 291 | "cell_type": "code", |
303 | | - "execution_count": 15, |
| 292 | + "execution_count": 13, |
304 | 293 | "metadata": {}, |
305 | 294 | "outputs": [], |
306 | 295 | "source": [ |
|
327 | 316 | }, |
328 | 317 | { |
329 | 318 | "cell_type": "code", |
330 | | - "execution_count": 19, |
| 319 | + "execution_count": 14, |
331 | 320 | "metadata": {}, |
332 | 321 | "outputs": [ |
333 | 322 | { |
334 | 323 | "name": "stdout", |
335 | 324 | "output_type": "stream", |
336 | 325 | "text": [ |
337 | | - "Without caching, a call to openAI to answer this simple question took 0.5083951950073242 seconds.\n" |
| 326 | + "Without caching, a call to openAI to answer this simple question took 0.5017588138580322 seconds.\n" |
338 | 327 | ] |
339 | 328 | } |
340 | 329 | ], |
|
349 | 338 | }, |
350 | 339 | { |
351 | 340 | "cell_type": "code", |
352 | | - "execution_count": 20, |
| 341 | + "execution_count": 15, |
353 | 342 | "metadata": {}, |
354 | 343 | "outputs": [], |
355 | 344 | "source": [ |
|
358 | 347 | }, |
359 | 348 | { |
360 | 349 | "cell_type": "code", |
361 | | - "execution_count": 34, |
| 350 | + "execution_count": 16, |
362 | 351 | "metadata": {}, |
363 | 352 | "outputs": [ |
364 | 353 | { |
365 | 354 | "name": "stdout", |
366 | 355 | "output_type": "stream", |
367 | 356 | "text": [ |
368 | | - "Time Taken with cache enabled: 0.08954691886901855\n", |
369 | | - "Percentage of time saved: 82.39%\n" |
| 357 | + "Time Taken with cache enabled: 0.327639102935791\n", |
| 358 | + "Percentage of time saved: 34.7%\n" |
370 | 359 | ] |
371 | 360 | } |
372 | 361 | ], |
|
380 | 369 | }, |
381 | 370 | { |
382 | 371 | "cell_type": "code", |
383 | | - "execution_count": 35, |
| 372 | + "execution_count": 17, |
384 | 373 | "metadata": {}, |
385 | 374 | "outputs": [ |
386 | 375 | { |
|
398 | 387 | "│ num_records │ 16 │\n", |
399 | 388 | "│ percent_indexed │ 1 │\n", |
400 | 389 | "│ hash_indexing_failures │ 0 │\n", |
401 | | - "│ number_of_uses │ 26 │\n", |
| 390 | + "│ number_of_uses │ 9 │\n", |
402 | 391 | "│ bytes_per_record_avg │ 5.25 │\n", |
403 | 392 | "│ doc_table_size_mb │ 0.000134468 │\n", |
404 | 393 | "│ inverted_sz_mb │ 8.01086e-05 │\n", |
|
408 | 397 | "│ offsets_per_term_avg │ 0.875 │\n", |
409 | 398 | "│ records_per_doc_avg │ 16 │\n", |
410 | 399 | "│ sortable_values_size_mb │ 0 │\n", |
411 | | - "│ total_indexing_time │ 0.76 │\n", |
| 400 | + "│ total_indexing_time │ 0.548 │\n", |
412 | 401 | "│ total_inverted_index_blocks │ 7 │\n", |
413 | 402 | "│ vector_index_sz_mb │ 3.0161 │\n", |
414 | 403 | "╰─────────────────────────────┴─────────────╯\n" |
|
422 | 411 | }, |
423 | 412 | { |
424 | 413 | "cell_type": "code", |
425 | | - "execution_count": 20, |
| 414 | + "execution_count": 18, |
426 | 415 | "metadata": {}, |
427 | 416 | "outputs": [], |
428 | 417 | "source": [ |
429 | | - "# Clear the cache\n", |
430 | | - "llmcache.clear()\n", |
431 | | - "\n", |
432 | | - "# Remove the underlying index\n", |
433 | | - "llmcache._index.delete(drop=True)" |
| 418 | + "# Clear the cache AND delete the underlying index\n", |
| 419 | + "llmcache.delete()" |
434 | 420 | ] |
435 | 421 | } |
436 | 422 | ], |
|
0 commit comments