Skip to content

Commit 3832a34

Browse files
committed
feat: better support for reasoning/thinking
1 parent edb5615 commit 3832a34

File tree

3 files changed

+223
-149
lines changed

3 files changed

+223
-149
lines changed

chatlas/_provider_anthropic.py

Lines changed: 178 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
ContentJson,
1818
ContentPDF,
1919
ContentText,
20+
ContentThinking,
2021
ContentToolRequest,
2122
ContentToolResult,
2223
ContentToolResultImage,
@@ -41,6 +42,8 @@
4142
MessageParam,
4243
RawMessageStreamEvent,
4344
TextBlock,
45+
ThinkingBlock,
46+
ThinkingBlockParam,
4447
ToolParam,
4548
ToolUseBlock,
4649
)
@@ -51,6 +54,7 @@
5154
from anthropic.types.messages.batch_create_params import Request as BatchRequest
5255
from anthropic.types.model_param import ModelParam
5356
from anthropic.types.text_block_param import TextBlockParam
57+
from anthropic.types.thinking_config_enabled_param import ThinkingConfigEnabledParam
5458
from anthropic.types.tool_result_block_param import ToolResultBlockParam
5559
from anthropic.types.tool_use_block_param import ToolUseBlockParam
5660

@@ -62,6 +66,7 @@
6266
ToolUseBlockParam,
6367
ToolResultBlockParam,
6468
DocumentBlockParam,
69+
ThinkingBlockParam,
6570
]
6671
else:
6772
Message = object
@@ -72,154 +77,168 @@ def ChatAnthropic(
7277
*,
7378
system_prompt: Optional[str] = None,
7479
model: "Optional[ModelParam]" = None,
75-
api_key: Optional[str] = None,
7680
max_tokens: int = 4096,
7781
cache: Literal["5m", "1h", "none"] = "5m",
82+
reasoning: Optional["int | ThinkingConfigEnabledParam"] = None,
83+
api_key: Optional[str] = None,
7884
kwargs: Optional["ChatClientArgs"] = None,
7985
) -> Chat["SubmitInputArgs", Message]:
8086
"""
81-
Chat with an Anthropic Claude model.
82-
83-
[Anthropic](https://www.anthropic.com) provides a number of chat based
84-
models under the [Claude](https://www.anthropic.com/claude) moniker.
85-
86-
Prerequisites
87-
-------------
88-
89-
::: {.callout-note}
90-
## API key
91-
92-
Note that a Claude Pro membership does not give you the ability to call
93-
models via the API. You will need to go to the [developer
94-
console](https://console.anthropic.com/account/keys) to sign up (and pay
95-
for) a developer account that will give you an API key that you can use with
96-
this package.
97-
:::
98-
99-
::: {.callout-note}
100-
## Python requirements
101-
102-
`ChatAnthropic` requires the `anthropic` package: `pip install "chatlas[anthropic]"`.
103-
:::
104-
105-
Examples
106-
--------
107-
108-
```python
109-
import os
110-
from chatlas import ChatAnthropic
111-
112-
chat = ChatAnthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
113-
chat.chat("What is the capital of France?")
114-
```
115-
116-
Parameters
117-
----------
118-
system_prompt
119-
A system prompt to set the behavior of the assistant.
120-
model
121-
The model to use for the chat. The default, None, will pick a reasonable
122-
default, and warn you about it. We strongly recommend explicitly
123-
choosing a model for all but the most casual use.
124-
api_key
125-
The API key to use for authentication. You generally should not supply
126-
this directly, but instead set the `ANTHROPIC_API_KEY` environment
127-
variable.
128-
max_tokens
129-
Maximum number of tokens to generate before stopping.
130-
cache
131-
How long to cache inputs? Defaults to "5m" (five minutes).
132-
Set to "none" to disable caching or "1h" to cache for one hour.
133-
See the Caching section for details.
134-
kwargs
135-
Additional arguments to pass to the `anthropic.Anthropic()` client
136-
constructor.
137-
138-
Returns
139-
-------
140-
Chat
141-
A Chat object.
142-
143-
Note
144-
----
145-
Pasting an API key into a chat constructor (e.g., `ChatAnthropic(api_key="...")`)
146-
is the simplest way to get started, and is fine for interactive use, but is
147-
problematic for code that may be shared with others.
148-
149-
Instead, consider using environment variables or a configuration file to manage
150-
your credentials. One popular way to manage credentials is to use a `.env` file
151-
to store your credentials, and then use the `python-dotenv` package to load them
152-
into your environment.
153-
154-
```shell
155-
pip install python-dotenv
156-
```
157-
158-
```shell
159-
# .env
160-
ANTHROPIC_API_KEY=...
161-
```
162-
163-
```python
164-
from chatlas import ChatAnthropic
165-
from dotenv import load_dotenv
166-
167-
load_dotenv()
168-
chat = ChatAnthropic()
169-
chat.console()
170-
```
171-
172-
Another, more general, solution is to load your environment variables into the shell
173-
before starting Python (maybe in a `.bashrc`, `.zshrc`, etc. file):
174-
175-
```shell
176-
export ANTHROPIC_API_KEY=...
177-
```
178-
179-
Caching
180-
-------
181-
182-
Caching with Claude is a bit more complicated than other providers but we
183-
believe that on average it will save you both money and time, so we have
184-
enabled it by default. With other providers, like OpenAI and Google,
185-
you only pay for cache reads, which cost 10% of the normal price. With
186-
Claude, you also pay for cache writes, which cost 125% of the normal price
187-
for 5 minute caching and 200% of the normal price for 1 hour caching.
188-
189-
How does this affect the total cost of a conversation? Imagine the first
190-
turn sends 1000 input tokens and receives 200 output tokens. The second
191-
turn must first send both the input and output from the previous turn
192-
(1200 tokens). It then sends a further 1000 tokens and receives 200 tokens
193-
back.
194-
195-
To compare the prices of these two approaches we can ignore the cost of
196-
output tokens, because they are the same for both. How much will the input
197-
tokens cost? If we don't use caching, we send 1000 tokens in the first turn
198-
and 2200 (1000 + 200 + 1000) tokens in the second turn for a total of 3200
199-
tokens. If we use caching, we'll send (the equivalent of) 1000 * 1.25 = 1250
200-
tokens in the first turn. In the second turn, 1000 of the input tokens will
201-
be cached so the total cost is 1000 * 0.1 + (200 + 1000) * 1.25 = 1600
202-
tokens. That makes a total of 2850 tokens, i.e. 11% fewer tokens,
203-
decreasing the overall cost.
204-
205-
Obviously, the details will vary from conversation to conversation, but
206-
if you have a large system prompt that you re-use many times you should
207-
expect to see larger savings. You can see exactly how many input and
208-
cache input tokens each turn uses, along with the total cost,
209-
with `chat.get_tokens()`. If you don't see savings for your use case, you can
210-
suppress caching with `cache="none"`.
211-
212-
Note: Claude will only cache longer prompts, with caching requiring at least
213-
1024-4096 tokens, depending on the model. So don't be surprised if you
214-
don't see any differences with caching if you have a short prompt.
215-
216-
See all the details at
217-
<https://docs.claude.com/en/docs/build-with-claude/prompt-caching>.
87+
Chat with an Anthropic Claude model.
88+
89+
[Anthropic](https://www.anthropic.com) provides a number of chat based
90+
models under the [Claude](https://www.anthropic.com/claude) moniker.
91+
92+
Prerequisites
93+
-------------
94+
95+
::: {.callout-note}
96+
## API key
97+
98+
Note that a Claude Pro membership does not give you the ability to call
99+
models via the API. You will need to go to the [developer
100+
console](https://console.anthropic.com/account/keys) to sign up (and pay
101+
for) a developer account that will give you an API key that you can use with
102+
this package.
103+
:::
104+
105+
::: {.callout-note}
106+
## Python requirements
107+
108+
`ChatAnthropic` requires the `anthropic` package: `pip install "chatlas[anthropic]"`.
109+
:::
110+
111+
Examples
112+
--------
113+
114+
```python
115+
import os
116+
from chatlas import ChatAnthropic
117+
118+
chat = ChatAnthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
119+
chat.chat("What is the capital of France?")
120+
```
121+
122+
Parameters
123+
----------
124+
system_prompt
125+
A system prompt to set the behavior of the assistant.
126+
model
127+
The model to use for the chat. The default, None, will pick a reasonable
128+
default, and warn you about it. We strongly recommend explicitly
129+
choosing a model for all but the most casual use.
130+
max_tokens
131+
Maximum number of tokens to generate before stopping.
132+
cache
133+
How long to cache inputs? Defaults to "5m" (five minutes).
134+
Set to "none" to disable caching or "1h" to cache for one hour.
135+
See the Caching section for details.
136+
reasoning
137+
Determines how many tokens Claude can be allocated to reasoning. Must be
138+
≥1024 and less than `max_tokens`. Larger budgets can enable more
139+
thorough analysis for complex problems, improving response quality. See
140+
[extended
141+
thinking](https://docs.claude.com/en/docs/build-with-claude/extended-thinking)
142+
for details.
143+
api_key
144+
The API key to use for authentication. You generally should not supply
145+
this directly, but instead set the `ANTHROPIC_API_KEY` environment
146+
variable.
147+
kwargs
148+
Additional arguments to pass to the `anthropic.Anthropic()` client
149+
constructor.
150+
151+
Returns
152+
-------
153+
Chat
154+
A Chat object.
155+
156+
Note
157+
----
158+
Pasting an API key into a chat constructor (e.g., `ChatAnthropic(api_key="...")`)
159+
is the simplest way to get started, and is fine for interactive use, but is
160+
problematic for code that may be shared with others.
161+
162+
Instead, consider using environment variables or a configuration file to manage
163+
your credentials. One popular way to manage credentials is to use a `.env` file
164+
to store your credentials, and then use the `python-dotenv` package to load them
165+
into your environment.
166+
167+
```shell
168+
pip install python-dotenv
169+
```
170+
171+
```shell
172+
# .env
173+
ANTHROPIC_API_KEY=...
174+
```
175+
176+
```python
177+
from chatlas import ChatAnthropic
178+
from dotenv import load_dotenv
179+
180+
load_dotenv()
181+
chat = ChatAnthropic()
182+
chat.console()
183+
```
184+
185+
Another, more general, solution is to load your environment variables into the shell
186+
before starting Python (maybe in a `.bashrc`, `.zshrc`, etc. file):
187+
188+
```shell
189+
export ANTHROPIC_API_KEY=...
190+
```
191+
192+
Caching
193+
-------
194+
195+
Caching with Claude is a bit more complicated than other providers but we
196+
believe that on average it will save you both money and time, so we have
197+
enabled it by default. With other providers, like OpenAI and Google,
198+
you only pay for cache reads, which cost 10% of the normal price. With
199+
Claude, you also pay for cache writes, which cost 125% of the normal price
200+
for 5 minute caching and 200% of the normal price for 1 hour caching.
201+
202+
How does this affect the total cost of a conversation? Imagine the first
203+
turn sends 1000 input tokens and receives 200 output tokens. The second
204+
turn must first send both the input and output from the previous turn
205+
(1200 tokens). It then sends a further 1000 tokens and receives 200 tokens
206+
back.
207+
208+
To compare the prices of these two approaches we can ignore the cost of
209+
output tokens, because they are the same for both. How much will the input
210+
tokens cost? If we don't use caching, we send 1000 tokens in the first turn
211+
and 2200 (1000 + 200 + 1000) tokens in the second turn for a total of 3200
212+
tokens. If we use caching, we'll send (the equivalent of) 1000 * 1.25 = 1250
213+
tokens in the first turn. In the second turn, 1000 of the input tokens will
214+
be cached so the total cost is 1000 * 0.1 + (200 + 1000) * 1.25 = 1600
215+
tokens. That makes a total of 2850 tokens, i.e. 11% fewer tokens,
216+
decreasing the overall cost.
217+
218+
Obviously, the details will vary from conversation to conversation, but
219+
if you have a large system prompt that you re-use many times you should
220+
expect to see larger savings. You can see exactly how many input and
221+
cache input tokens each turn uses, along with the total cost,
222+
with `chat.get_tokens()`. If you don't see savings for your use case, you can
223+
suppress caching with `cache="none"`.
224+
225+
Note: Claude will only cache longer prompts, with caching requiring at least
226+
1024-4096 tokens, depending on the model. So don't be surprised if you
227+
don't see any differences with caching if you have a short prompt.
228+
229+
See all the details at
230+
<https://docs.claude.com/en/docs/build-with-claude/prompt-caching>.
218231
"""
219232

220233
if model is None:
221234
model = log_model_default("claude-sonnet-4-0")
222235

236+
kwargs_chat: "SubmitInputArgs" = {}
237+
if reasoning is not None:
238+
if isinstance(reasoning, int):
239+
reasoning = {"type": "enabled", "budget_tokens": reasoning}
240+
kwargs_chat = {"thinking": reasoning}
241+
223242
return Chat(
224243
provider=AnthropicProvider(
225244
api_key=api_key,
@@ -229,6 +248,7 @@ def ChatAnthropic(
229248
kwargs=kwargs,
230249
),
231250
system_prompt=system_prompt,
251+
kwargs_chat=kwargs_chat,
232252
)
233253

234254

@@ -451,6 +471,12 @@ def stream_merge_chunks(self, completion, chunk):
451471
if not isinstance(this_content.input, str):
452472
this_content.input = "" # type: ignore
453473
this_content.input += json_delta # type: ignore
474+
elif chunk.delta.type == "thinking_delta":
475+
this_content = cast("ThinkingBlock", this_content)
476+
this_content.thinking += chunk.delta.thinking
477+
elif chunk.delta.type == "signature_delta":
478+
this_content = cast("ThinkingBlock", this_content)
479+
this_content.signature += chunk.delta.signature
454480
elif chunk.type == "content_block_stop":
455481
this_content = completion.content[chunk.index]
456482
if this_content.type == "tool_use" and isinstance(this_content.input, str):
@@ -656,6 +682,13 @@ def _as_content_block(content: Content) -> "ContentBlockParam":
656682
res["content"] = content.get_model_value() # type: ignore
657683

658684
return res
685+
elif isinstance(content, ContentThinking):
686+
extra = content.extra or {}
687+
return {
688+
"type": "thinking",
689+
"thinking": content.thinking,
690+
"signature": extra.get("signature", ""),
691+
}
659692

660693
raise ValueError(f"Unknown content type: {type(content)}")
661694

@@ -709,6 +742,13 @@ def _as_turn(self, completion: Message, has_data_model=False) -> Turn:
709742
arguments=content.input,
710743
)
711744
)
745+
elif content.type == "thinking":
746+
contents.append(
747+
ContentThinking(
748+
thinking=content.thinking,
749+
extra={"signature": content.signature},
750+
)
751+
)
712752

713753
return Turn(
714754
"assistant",

0 commit comments

Comments
 (0)