1717 ContentJson ,
1818 ContentPDF ,
1919 ContentText ,
20+ ContentThinking ,
2021 ContentToolRequest ,
2122 ContentToolResult ,
2223 ContentToolResultImage ,
4142 MessageParam ,
4243 RawMessageStreamEvent ,
4344 TextBlock ,
45+ ThinkingBlock ,
46+ ThinkingBlockParam ,
4447 ToolParam ,
4548 ToolUseBlock ,
4649 )
5154 from anthropic .types .messages .batch_create_params import Request as BatchRequest
5255 from anthropic .types .model_param import ModelParam
5356 from anthropic .types .text_block_param import TextBlockParam
57+ from anthropic .types .thinking_config_enabled_param import ThinkingConfigEnabledParam
5458 from anthropic .types .tool_result_block_param import ToolResultBlockParam
5559 from anthropic .types .tool_use_block_param import ToolUseBlockParam
5660
6266 ToolUseBlockParam ,
6367 ToolResultBlockParam ,
6468 DocumentBlockParam ,
69+ ThinkingBlockParam ,
6570 ]
6671else :
6772 Message = object
@@ -72,154 +77,168 @@ def ChatAnthropic(
7277 * ,
7378 system_prompt : Optional [str ] = None ,
7479 model : "Optional[ModelParam]" = None ,
75- api_key : Optional [str ] = None ,
7680 max_tokens : int = 4096 ,
7781 cache : Literal ["5m" , "1h" , "none" ] = "5m" ,
82+ reasoning : Optional ["int | ThinkingConfigEnabledParam" ] = None ,
83+ api_key : Optional [str ] = None ,
7884 kwargs : Optional ["ChatClientArgs" ] = None ,
7985) -> Chat ["SubmitInputArgs" , Message ]:
8086 """
81- Chat with an Anthropic Claude model.
82-
83- [Anthropic](https://www.anthropic.com) provides a number of chat based
84- models under the [Claude](https://www.anthropic.com/claude) moniker.
85-
86- Prerequisites
87- -------------
88-
89- ::: {.callout-note}
90- ## API key
91-
92- Note that a Claude Pro membership does not give you the ability to call
93- models via the API. You will need to go to the [developer
94- console](https://console.anthropic.com/account/keys) to sign up (and pay
95- for) a developer account that will give you an API key that you can use with
96- this package.
97- :::
98-
99- ::: {.callout-note}
100- ## Python requirements
101-
102- `ChatAnthropic` requires the `anthropic` package: `pip install "chatlas[anthropic]"`.
103- :::
104-
105- Examples
106- --------
107-
108- ```python
109- import os
110- from chatlas import ChatAnthropic
111-
112- chat = ChatAnthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
113- chat.chat("What is the capital of France?")
114- ```
115-
116- Parameters
117- ----------
118- system_prompt
119- A system prompt to set the behavior of the assistant.
120- model
121- The model to use for the chat. The default, None, will pick a reasonable
122- default, and warn you about it. We strongly recommend explicitly
123- choosing a model for all but the most casual use.
124- api_key
125- The API key to use for authentication. You generally should not supply
126- this directly, but instead set the `ANTHROPIC_API_KEY` environment
127- variable.
128- max_tokens
129- Maximum number of tokens to generate before stopping.
130- cache
131- How long to cache inputs? Defaults to "5m" (five minutes).
132- Set to "none" to disable caching or "1h" to cache for one hour.
133- See the Caching section for details.
134- kwargs
135- Additional arguments to pass to the `anthropic.Anthropic()` client
136- constructor.
137-
138- Returns
139- -------
140- Chat
141- A Chat object.
142-
143- Note
144- ----
145- Pasting an API key into a chat constructor (e.g., `ChatAnthropic(api_key="...")`)
146- is the simplest way to get started, and is fine for interactive use, but is
147- problematic for code that may be shared with others.
148-
149- Instead, consider using environment variables or a configuration file to manage
150- your credentials. One popular way to manage credentials is to use a `.env` file
151- to store your credentials, and then use the `python-dotenv` package to load them
152- into your environment.
153-
154- ```shell
155- pip install python-dotenv
156- ```
157-
158- ```shell
159- # .env
160- ANTHROPIC_API_KEY=...
161- ```
162-
163- ```python
164- from chatlas import ChatAnthropic
165- from dotenv import load_dotenv
166-
167- load_dotenv()
168- chat = ChatAnthropic()
169- chat.console()
170- ```
171-
172- Another, more general, solution is to load your environment variables into the shell
173- before starting Python (maybe in a `.bashrc`, `.zshrc`, etc. file):
174-
175- ```shell
176- export ANTHROPIC_API_KEY=...
177- ```
178-
179- Caching
180- -------
181-
182- Caching with Claude is a bit more complicated than other providers but we
183- believe that on average it will save you both money and time, so we have
184- enabled it by default. With other providers, like OpenAI and Google,
185- you only pay for cache reads, which cost 10% of the normal price. With
186- Claude, you also pay for cache writes, which cost 125% of the normal price
187- for 5 minute caching and 200% of the normal price for 1 hour caching.
188-
189- How does this affect the total cost of a conversation? Imagine the first
190- turn sends 1000 input tokens and receives 200 output tokens. The second
191- turn must first send both the input and output from the previous turn
192- (1200 tokens). It then sends a further 1000 tokens and receives 200 tokens
193- back.
194-
195- To compare the prices of these two approaches we can ignore the cost of
196- output tokens, because they are the same for both. How much will the input
197- tokens cost? If we don't use caching, we send 1000 tokens in the first turn
198- and 2200 (1000 + 200 + 1000) tokens in the second turn for a total of 3200
199- tokens. If we use caching, we'll send (the equivalent of) 1000 * 1.25 = 1250
200- tokens in the first turn. In the second turn, 1000 of the input tokens will
201- be cached so the total cost is 1000 * 0.1 + (200 + 1000) * 1.25 = 1600
202- tokens. That makes a total of 2850 tokens, i.e. 11% fewer tokens,
203- decreasing the overall cost.
204-
205- Obviously, the details will vary from conversation to conversation, but
206- if you have a large system prompt that you re-use many times you should
207- expect to see larger savings. You can see exactly how many input and
208- cache input tokens each turn uses, along with the total cost,
209- with `chat.get_tokens()`. If you don't see savings for your use case, you can
210- suppress caching with `cache="none"`.
211-
212- Note: Claude will only cache longer prompts, with caching requiring at least
213- 1024-4096 tokens, depending on the model. So don't be surprised if you
214- don't see any differences with caching if you have a short prompt.
215-
216- See all the details at
217- <https://docs.claude.com/en/docs/build-with-claude/prompt-caching>.
87+ Chat with an Anthropic Claude model.
88+
89+ [Anthropic](https://www.anthropic.com) provides a number of chat based
90+ models under the [Claude](https://www.anthropic.com/claude) moniker.
91+
92+ Prerequisites
93+ -------------
94+
95+ ::: {.callout-note}
96+ ## API key
97+
98+ Note that a Claude Pro membership does not give you the ability to call
99+ models via the API. You will need to go to the [developer
100+ console](https://console.anthropic.com/account/keys) to sign up (and pay
101+ for) a developer account that will give you an API key that you can use with
102+ this package.
103+ :::
104+
105+ ::: {.callout-note}
106+ ## Python requirements
107+
108+ `ChatAnthropic` requires the `anthropic` package: `pip install "chatlas[anthropic]"`.
109+ :::
110+
111+ Examples
112+ --------
113+
114+ ```python
115+ import os
116+ from chatlas import ChatAnthropic
117+
118+ chat = ChatAnthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
119+ chat.chat("What is the capital of France?")
120+ ```
121+
122+ Parameters
123+ ----------
124+ system_prompt
125+ A system prompt to set the behavior of the assistant.
126+ model
127+ The model to use for the chat. The default, None, will pick a reasonable
128+ default, and warn you about it. We strongly recommend explicitly
129+ choosing a model for all but the most casual use.
130+ max_tokens
131+ Maximum number of tokens to generate before stopping.
132+ cache
133+ How long to cache inputs? Defaults to "5m" (five minutes).
134+ Set to "none" to disable caching or "1h" to cache for one hour.
135+ See the Caching section for details.
136+ reasoning
137+ Determines how many tokens Claude can be allocated to reasoning. Must be
138+ ≥1024 and less than `max_tokens`. Larger budgets can enable more
139+ thorough analysis for complex problems, improving response quality. See
140+ [extended
141+ thinking](https://docs.claude.com/en/docs/build-with-claude/extended-thinking)
142+ for details.
143+ api_key
144+ The API key to use for authentication. You generally should not supply
145+ this directly, but instead set the `ANTHROPIC_API_KEY` environment
146+ variable.
147+ kwargs
148+ Additional arguments to pass to the `anthropic.Anthropic()` client
149+ constructor.
150+
151+ Returns
152+ -------
153+ Chat
154+ A Chat object.
155+
156+ Note
157+ ----
158+ Pasting an API key into a chat constructor (e.g., `ChatAnthropic(api_key="...")`)
159+ is the simplest way to get started, and is fine for interactive use, but is
160+ problematic for code that may be shared with others.
161+
162+ Instead, consider using environment variables or a configuration file to manage
163+ your credentials. One popular way to manage credentials is to use a `.env` file
164+ to store your credentials, and then use the `python-dotenv` package to load them
165+ into your environment.
166+
167+ ```shell
168+ pip install python-dotenv
169+ ```
170+
171+ ```shell
172+ # .env
173+ ANTHROPIC_API_KEY=...
174+ ```
175+
176+ ```python
177+ from chatlas import ChatAnthropic
178+ from dotenv import load_dotenv
179+
180+ load_dotenv()
181+ chat = ChatAnthropic()
182+ chat.console()
183+ ```
184+
185+ Another, more general, solution is to load your environment variables into the shell
186+ before starting Python (maybe in a `.bashrc`, `.zshrc`, etc. file):
187+
188+ ```shell
189+ export ANTHROPIC_API_KEY=...
190+ ```
191+
192+ Caching
193+ -------
194+
195+ Caching with Claude is a bit more complicated than other providers but we
196+ believe that on average it will save you both money and time, so we have
197+ enabled it by default. With other providers, like OpenAI and Google,
198+ you only pay for cache reads, which cost 10% of the normal price. With
199+ Claude, you also pay for cache writes, which cost 125% of the normal price
200+ for 5 minute caching and 200% of the normal price for 1 hour caching.
201+
202+ How does this affect the total cost of a conversation? Imagine the first
203+ turn sends 1000 input tokens and receives 200 output tokens. The second
204+ turn must first send both the input and output from the previous turn
205+ (1200 tokens). It then sends a further 1000 tokens and receives 200 tokens
206+ back.
207+
208+ To compare the prices of these two approaches we can ignore the cost of
209+ output tokens, because they are the same for both. How much will the input
210+ tokens cost? If we don't use caching, we send 1000 tokens in the first turn
211+ and 2200 (1000 + 200 + 1000) tokens in the second turn for a total of 3200
212+ tokens. If we use caching, we'll send (the equivalent of) 1000 * 1.25 = 1250
213+ tokens in the first turn. In the second turn, 1000 of the input tokens will
214+ be cached so the total cost is 1000 * 0.1 + (200 + 1000) * 1.25 = 1600
215+ tokens. That makes a total of 2850 tokens, i.e. 11% fewer tokens,
216+ decreasing the overall cost.
217+
218+ Obviously, the details will vary from conversation to conversation, but
219+ if you have a large system prompt that you re-use many times you should
220+ expect to see larger savings. You can see exactly how many input and
221+ cache input tokens each turn uses, along with the total cost,
222+ with `chat.get_tokens()`. If you don't see savings for your use case, you can
223+ suppress caching with `cache="none"`.
224+
225+ Note: Claude will only cache longer prompts, with caching requiring at least
226+ 1024-4096 tokens, depending on the model. So don't be surprised if you
227+ don't see any differences with caching if you have a short prompt.
228+
229+ See all the details at
230+ <https://docs.claude.com/en/docs/build-with-claude/prompt-caching>.
218231 """
219232
220233 if model is None :
221234 model = log_model_default ("claude-sonnet-4-0" )
222235
236+ kwargs_chat : "SubmitInputArgs" = {}
237+ if reasoning is not None :
238+ if isinstance (reasoning , int ):
239+ reasoning = {"type" : "enabled" , "budget_tokens" : reasoning }
240+ kwargs_chat = {"thinking" : reasoning }
241+
223242 return Chat (
224243 provider = AnthropicProvider (
225244 api_key = api_key ,
@@ -229,6 +248,7 @@ def ChatAnthropic(
229248 kwargs = kwargs ,
230249 ),
231250 system_prompt = system_prompt ,
251+ kwargs_chat = kwargs_chat ,
232252 )
233253
234254
@@ -451,6 +471,12 @@ def stream_merge_chunks(self, completion, chunk):
451471 if not isinstance (this_content .input , str ):
452472 this_content .input = "" # type: ignore
453473 this_content .input += json_delta # type: ignore
474+ elif chunk .delta .type == "thinking_delta" :
475+ this_content = cast ("ThinkingBlock" , this_content )
476+ this_content .thinking += chunk .delta .thinking
477+ elif chunk .delta .type == "signature_delta" :
478+ this_content = cast ("ThinkingBlock" , this_content )
479+ this_content .signature += chunk .delta .signature
454480 elif chunk .type == "content_block_stop" :
455481 this_content = completion .content [chunk .index ]
456482 if this_content .type == "tool_use" and isinstance (this_content .input , str ):
@@ -656,6 +682,13 @@ def _as_content_block(content: Content) -> "ContentBlockParam":
656682 res ["content" ] = content .get_model_value () # type: ignore
657683
658684 return res
685+ elif isinstance (content , ContentThinking ):
686+ extra = content .extra or {}
687+ return {
688+ "type" : "thinking" ,
689+ "thinking" : content .thinking ,
690+ "signature" : extra .get ("signature" , "" ),
691+ }
659692
660693 raise ValueError (f"Unknown content type: { type (content )} " )
661694
@@ -709,6 +742,13 @@ def _as_turn(self, completion: Message, has_data_model=False) -> Turn:
709742 arguments = content .input ,
710743 )
711744 )
745+ elif content .type == "thinking" :
746+ contents .append (
747+ ContentThinking (
748+ thinking = content .thinking ,
749+ extra = {"signature" : content .signature },
750+ )
751+ )
712752
713753 return Turn (
714754 "assistant" ,
0 commit comments