AnswerDotAI · RensDimmendaal · Nov 25, 2025 · Nov 26, 2025 · Nov 28, 2025 · Nov 28, 2025
diff --git a/cachy.jsonl b/cachy.jsonl
diff --git a/lisette/_modidx.py b/lisette/_modidx.py
@@ -19,12 +19,12 @@
                               'lisette.core.Chat.__init__': ('core.html#chat.__init__', 'lisette/core.py'),
                               'lisette.core.Chat._call': ('core.html#chat._call', 'lisette/core.py'),
                               'lisette.core.Chat._prep_msg': ('core.html#chat._prep_msg', 'lisette/core.py'),
-                              'lisette.core.Chat.print_hist': ('core.html#chat.print_hist', 'lisette/core.py'),
                               'lisette.core.ToolResponse': ('core.html#toolresponse', 'lisette/core.py'),
                               'lisette.core._add_cache_control': ('core.html#_add_cache_control', 'lisette/core.py'),
                               'lisette.core._alite_call_func': ('core.html#_alite_call_func', 'lisette/core.py'),
                               'lisette.core._apply_cache_idxs': ('core.html#_apply_cache_idxs', 'lisette/core.py'),
                               'lisette.core._bytes2content': ('core.html#_bytes2content', 'lisette/core.py'),
+                              'lisette.core._detect_mime': ('core.html#_detect_mime', 'lisette/core.py'),
                               'lisette.core._extract_tool': ('core.html#_extract_tool', 'lisette/core.py'),
                               'lisette.core._has_cache': ('core.html#_has_cache', 'lisette/core.py'),
                               'lisette.core._has_search': ('core.html#_has_search', 'lisette/core.py'),
@@ -43,13 +43,8 @@
                                                                                       'lisette/core.py'),
                               'lisette.core.mk_msg': ('core.html#mk_msg', 'lisette/core.py'),
                               'lisette.core.mk_msgs': ('core.html#mk_msgs', 'lisette/core.py'),
-                              'lisette.core.mk_tc': ('core.html#mk_tc', 'lisette/core.py'),
-                              'lisette.core.mk_tc_req': ('core.html#mk_tc_req', 'lisette/core.py'),
-                              'lisette.core.mk_tc_result': ('core.html#mk_tc_result', 'lisette/core.py'),
-                              'lisette.core.mk_tc_results': ('core.html#mk_tc_results', 'lisette/core.py'),
                               'lisette.core.mk_tr_details': ('core.html#mk_tr_details', 'lisette/core.py'),
                               'lisette.core.patch_litellm': ('core.html#patch_litellm', 'lisette/core.py'),
-                              'lisette.core.random_tool_id': ('core.html#random_tool_id', 'lisette/core.py'),
                               'lisette.core.remove_cache_ckpts': ('core.html#remove_cache_ckpts', 'lisette/core.py'),
                               'lisette.core.stream_with_complete': ('core.html#stream_with_complete', 'lisette/core.py')},
             'lisette.usage': { 'lisette.usage.LisetteUsageLogger': ('usage.html#lisetteusagelogger', 'lisette/usage.py'),

diff --git a/lisette/core.py b/lisette/core.py
@@ -5,11 +5,10 @@
 # %% auto 0
 __all__ = ['sonn45', 'detls_tag', 're_tools', 'effort', 'patch_litellm', 'remove_cache_ckpts', 'contents', 'mk_msg', 'fmt2hist',
            'mk_msgs', 'stream_with_complete', 'lite_mk_func', 'ToolResponse', 'cite_footnote', 'cite_footnotes', 'Chat',
-           'random_tool_id', 'mk_tc', 'mk_tc_req', 'mk_tc_result', 'mk_tc_results', 'astream_with_complete',
-           'AsyncChat', 'mk_tr_details', 'AsyncStreamFormatter', 'adisplay_stream']
+           'astream_with_complete', 'AsyncChat', 'mk_tr_details', 'AsyncStreamFormatter', 'adisplay_stream']
 
 # %% ../nbs/00_core.ipynb
-import asyncio, base64, json, litellm, mimetypes, random, string
+import asyncio, base64, io, json, litellm, mimetypes, random, string
 from typing import Optional
 from html import escape
 from litellm import (acompletion, completion, stream_chunk_builder, Message,
@@ -45,15 +44,11 @@ def _repr_markdown_(self: litellm.ModelResponse):
     if message.tool_calls:
         tool_calls = [f"\n\n🔧 {nested_idx(tc,'function','name')}({nested_idx(tc,'function','arguments')})\n" for tc in message.tool_calls]
         content += "\n".join(tool_calls)
+    for img in getattr(message, 'images', []): content += f"\n\n![generated image]({nested_idx(img, 'image_url', 'url')})"
     if not content: content = str(message)
-    details = [
-        f"id: `{self.id}`",
-        f"model: `{self.model}`",
-        f"finish_reason: `{self.choices[0].finish_reason}`"
-    ]
+    details = [f"id: `{self.id}`", f"model: `{self.model}`", f"finish_reason: `{self.choices[0].finish_reason}`"]
     if hasattr(self, 'usage') and self.usage: details.append(f"usage: `{self.usage}`")
     det_str = '\n- '.join(details)
-
     return f"""{content}
 
 <details>
@@ -63,23 +58,37 @@ def _repr_markdown_(self: litellm.ModelResponse):
 </details>"""
 
 # %% ../nbs/00_core.ipynb
-register_model({
-    "claude-sonnet-4-5": {
-        "max_tokens": 64000, "max_input_tokens": 200000, "max_output_tokens": 64000,
-        "input_cost_per_token": 3e-06, "output_cost_per_token": 1.5e-05, "cache_creation_input_token_cost": 3.75e-06, "cache_read_input_token_cost": 3e-07,
-        "litellm_provider": "anthropic", "mode": "chat",
-        "supports_function_calling": True, "supports_parallel_function_calling": True, "supports_vision": True, "supports_prompt_caching": True, "supports_response_schema": True, "supports_system_messages": True, "supports_reasoning": True, "supports_assistant_prefill": True,
-        "supports_tool_choice": True, "supports_computer_use": True 
-    }
-});
 sonn45 = "claude-sonnet-4-5"
 
 # %% ../nbs/00_core.ipynb
+_sigs = {
+    (b'%PDF', 0): 'application/pdf',
+    (b'RIFF', 0): lambda d: 'audio/wav' if d[8:12]==b'WAVE' else 'video/avi' if d[8:12]==b'AVI ' else None,
+    (b'ID3', 0): 'audio/mp3',
+    (b'\xff\xfb', 0): 'audio/mp3',
+    (b'\xff\xf3', 0): 'audio/mp3',
+    (b'FORM', 0): lambda d: 'audio/aiff' if d[8:12]==b'AIFF' else None,
+    (b'OggS', 0): 'audio/ogg',
+    (b'fLaC', 0): 'audio/flac',
+    (b'ftyp', 4): lambda d: 'video/3gpp' if d[8:11]==b'3gp' else 'video/mp4',
+    (b'\x1a\x45\xdf', 0): 'video/webm',
+    (b'FLV', 0): 'video/x-flv',
+    (b'\x30\x26\xb2\x75', 0): 'video/wmv',
+    (b'\x00\x00\x01\xb3', 0): 'video/mpeg',
+}
+
+def _detect_mime(data):
+    for (sig,pos),mime in _sigs.items():
+        if data[pos:pos+len(sig)]==sig: return mime(data) if callable(mime) else mime
+    return mimetypes.types_map.get(f'.{imghdr.what(None, h=data)}')
+
 def _bytes2content(data):
-    "Convert bytes to litellm content dict (image or pdf)"
-    mtype = 'application/pdf' if data[:4] == b'%PDF' else mimetypes.types_map.get(f'.{imghdr.what(None, h=data)}')
-    if not mtype: raise ValueError(f'Data must be image or PDF bytes, got {data[:10]}')
-    return {'type': 'image_url', 'image_url': f'data:{mtype};base64,{base64.b64encode(data).decode("utf-8")}'}
+    "Convert bytes to litellm content dict (image, pdf, audio, video)"
+    mtype = _detect_mime(data)
+    if not mtype: raise ValueError(f'Data must be a supported file type, got {data[:10]}')
+    encoded = base64.b64encode(data).decode("utf-8")    
+    if mtype.startswith('image/'): return {'type': 'image_url', 'image_url': f'data:{mtype};base64,{encoded}'}
+    return {'type': 'file', 'file': {'file_data': f'data:{mtype};base64,{encoded}'}}
 
 # %% ../nbs/00_core.ipynb
 def _add_cache_control(msg,          # LiteLLM formatted msg
@@ -267,7 +276,7 @@ def _prep_msg(self, msg=None, prefill=None):
             cache_idxs = L(self.cache_idxs).filter().map(lambda o: o-1 if o>0 else o)
         else:
             cache_idxs = self.cache_idxs
-        if msg: self.hist = mk_msgs(self.hist+[msg], self.cache, cache_idxs, self.ttl)
+        if msg: self.hist = mk_msgs(self.hist+[msg], self.cache and 'claude' in self.model, cache_idxs, self.ttl)
         pf = [{"role":"assistant","content":prefill}] if prefill else []
         return sp + self.hist + pf
 
@@ -288,6 +297,7 @@ def _call(self, msg=None, prefill=None, temp=None, think=None, search=None, stre
                          tools=self.tool_schemas, reasoning_effort = effort.get(think), tool_choice=tool_choice,
                          # temperature is not supported when reasoning
                          temperature=None if think else ifnone(temp,self.temp),
+                         caching=self.cache and 'claude' not in self.model,
                          **kwargs)
         if stream:
             if prefill: yield _mk_prefill(prefill)
@@ -324,35 +334,6 @@ def __call__(self,
         elif return_all: return list(result_gen)  # toolloop behavior
         else: return last(result_gen)             # normal chat behavior
 
-# %% ../nbs/00_core.ipynb
-@patch
-def print_hist(self:Chat):
-    "Print each message on a different line"
-    for r in self.hist: print(r, end='\n\n')
-
-# %% ../nbs/00_core.ipynb
-def random_tool_id():
-    "Generate a random tool ID with 'toolu_' prefix"
-    random_part = ''.join(random.choices(string.ascii_letters + string.digits, k=25))
-    return f'toolu_{random_part}'
-
-# %% ../nbs/00_core.ipynb
-def mk_tc(func, args, tcid=None, idx=1):
-    if not tcid: tcid = random_tool_id()
-    return {'index': idx, 'function': {'arguments': args, 'name': func}, 'id': tcid, 'type': 'function'}
-
-# %% ../nbs/00_core.ipynb
-def mk_tc_req(content, tcs):
-    msg = Message(content=content, role='assistant', tool_calls=tcs, function_call=None)
-    msg.tool_calls = [{**dict(tc), 'function': dict(tc['function'])} for tc in msg.tool_calls]
-    return msg
-
-# %% ../nbs/00_core.ipynb
-def mk_tc_result(tc, result): return {'tool_call_id': tc['id'], 'role': 'tool', 'name': tc['function']['name'], 'content': result}
-
-# %% ../nbs/00_core.ipynb
-def mk_tc_results(tcq, results): return [mk_tc_result(a,b) for a,b in zip(tcq.tool_calls, results)]
-
 # %% ../nbs/00_core.ipynb
 async def _alite_call_func(tc, ns, raise_on_err=True):
     try: fargs = json.loads(tc.function.arguments)
@@ -383,6 +364,7 @@ async def _call(self, msg=None, prefill=None, temp=None, think=None, search=None
                          tools=self.tool_schemas, reasoning_effort=effort.get(think), tool_choice=tool_choice,
                          # temperature is not supported when reasoning
                          temperature=None if think else ifnone(temp,self.temp), 
+                         caching=self.cache and 'claude' not in self.model,
                          **kwargs)
         if stream:
             if prefill: yield _mk_prefill(prefill)
@@ -442,20 +424,19 @@ def mk_tr_details(tr, tc, mx=2000):
 # %% ../nbs/00_core.ipynb
 class AsyncStreamFormatter:
     def __init__(self, include_usage=False, mx=2000):
-        self.outp,self.tcs,self.include_usage,self.think,self.mx = '',{},include_usage,False,mx
+        self.outp,self.tcs,self.include_usage,self.mx = '',{},include_usage,mx
 
     def format_item(self, o):
         "Format a single item from the response stream."
         res = ''
         if isinstance(o, ModelResponseStream):
             d = o.choices[0].delta
-            if nested_idx(d, 'reasoning_content'): 
-                self.think = True
-                res += '🧠'
-            elif self.think:
-                self.think = False
-                res += '\n\n'
-            if c:=d.content: res+=c
+            if nested_idx(d, 'reasoning_content') and d['reasoning_content']!='{"text": ""}':
+                res+= '🧠' if not self.outp or self.outp[-1]=='🧠' else '\n\n🧠' # gemini can interleave reasoning
+            elif self.outp and self.outp[-1] == '🧠': res+= '\n\n'
+            if c:=d.content: # gemini has text content in last reasoning chunk
+                res+=f"\n\n{c}" if res and res[-1] == '🧠' else c
+            for img in getattr(d, 'images', []): res += f"\n\n![generated image]({nested_idx(img, 'image_url', 'url')})\n\n"
         elif isinstance(o, ModelResponse):
             if self.include_usage: res += f"\nUsage: {o.usage}"
             if c:=getattr(contents(o),'tool_calls',None):

diff --git a/lisette/usage.py b/lisette/usage.py
@@ -27,9 +27,17 @@ def log_success_event(self, kwargs, response_obj, start_time, end_time):
     def _log_usage(self, response_obj, response_cost, start_time, end_time):
         usage = response_obj.usage
         ptd   = usage.prompt_tokens_details
-        self.usage.insert(Usage(timestamp=time.time(), model=response_obj.model, user_id=self.user_id_fn(), prompt_tokens=usage.prompt_tokens, completion_tokens=usage.completion_tokens,
-                                    total_tokens=usage.total_tokens, cached_tokens=ptd.cached_tokens if ptd else 0, cache_creation_tokens=usage.cache_creation_input_tokens, 
-                                    cache_read_tokens=usage.cache_read_input_tokens, web_search_requests=nested_idx(usage, 'server_tool_use', 'web_search_requests'), response_cost=response_cost))
+        self.usage.insert(Usage(timestamp=time.time(), 
+                                model=response_obj.model, 
+                                user_id=self.user_id_fn(), 
+                                prompt_tokens=usage.prompt_tokens, 
+                                completion_tokens=usage.completion_tokens,
+                                total_tokens=usage.total_tokens, 
+                                cached_tokens=ptd.cached_tokens if ptd else 0, # used by gemini (read tokens)
+                                cache_creation_tokens=nested_idx(usage, 'cache_creation_input_tokens'),
+                                cache_read_tokens=nested_idx(usage, 'cache_read_input_tokens'), # used by anthropic 
+                                web_search_requests=nested_idx(usage, 'server_tool_use', 'web_search_requests'),
+                                response_cost=response_cost))
 
     def user_id_fn(self): raise NotImplementedError('Please implement `LisetteUsageLogger.user_id_fn` before initializing, e.g using fastcore.patch.')