From 3832a34e94666e38a6e4eb4be5b5bc7e1cb55b91 Mon Sep 17 00:00:00 2001 From: Carson Date: Tue, 4 Nov 2025 12:19:05 -0600 Subject: [PATCH 1/4] feat: better support for reasoning/thinking --- chatlas/_provider_anthropic.py | 316 +++++++++++++++++++-------------- chatlas/_provider_google.py | 16 +- chatlas/_provider_openai.py | 40 ++++- 3 files changed, 223 insertions(+), 149 deletions(-) diff --git a/chatlas/_provider_anthropic.py b/chatlas/_provider_anthropic.py index fb5eb1a6..09612c50 100644 --- a/chatlas/_provider_anthropic.py +++ b/chatlas/_provider_anthropic.py @@ -17,6 +17,7 @@ ContentJson, ContentPDF, ContentText, + ContentThinking, ContentToolRequest, ContentToolResult, ContentToolResultImage, @@ -41,6 +42,8 @@ MessageParam, RawMessageStreamEvent, TextBlock, + ThinkingBlock, + ThinkingBlockParam, ToolParam, ToolUseBlock, ) @@ -51,6 +54,7 @@ from anthropic.types.messages.batch_create_params import Request as BatchRequest from anthropic.types.model_param import ModelParam from anthropic.types.text_block_param import TextBlockParam + from anthropic.types.thinking_config_enabled_param import ThinkingConfigEnabledParam from anthropic.types.tool_result_block_param import ToolResultBlockParam from anthropic.types.tool_use_block_param import ToolUseBlockParam @@ -62,6 +66,7 @@ ToolUseBlockParam, ToolResultBlockParam, DocumentBlockParam, + ThinkingBlockParam, ] else: Message = object @@ -72,154 +77,168 @@ def ChatAnthropic( *, system_prompt: Optional[str] = None, model: "Optional[ModelParam]" = None, - api_key: Optional[str] = None, max_tokens: int = 4096, cache: Literal["5m", "1h", "none"] = "5m", + reasoning: Optional["int | ThinkingConfigEnabledParam"] = None, + api_key: Optional[str] = None, kwargs: Optional["ChatClientArgs"] = None, ) -> Chat["SubmitInputArgs", Message]: """ - Chat with an Anthropic Claude model. - - [Anthropic](https://www.anthropic.com) provides a number of chat based - models under the [Claude](https://www.anthropic.com/claude) moniker. - - Prerequisites - ------------- - - ::: {.callout-note} - ## API key - - Note that a Claude Pro membership does not give you the ability to call - models via the API. You will need to go to the [developer - console](https://console.anthropic.com/account/keys) to sign up (and pay - for) a developer account that will give you an API key that you can use with - this package. - ::: - - ::: {.callout-note} - ## Python requirements - - `ChatAnthropic` requires the `anthropic` package: `pip install "chatlas[anthropic]"`. - ::: - - Examples - -------- - - ```python - import os - from chatlas import ChatAnthropic - - chat = ChatAnthropic(api_key=os.getenv("ANTHROPIC_API_KEY")) - chat.chat("What is the capital of France?") - ``` - - Parameters - ---------- - system_prompt - A system prompt to set the behavior of the assistant. - model - The model to use for the chat. The default, None, will pick a reasonable - default, and warn you about it. We strongly recommend explicitly - choosing a model for all but the most casual use. - api_key - The API key to use for authentication. You generally should not supply - this directly, but instead set the `ANTHROPIC_API_KEY` environment - variable. - max_tokens - Maximum number of tokens to generate before stopping. - cache - How long to cache inputs? Defaults to "5m" (five minutes). - Set to "none" to disable caching or "1h" to cache for one hour. - See the Caching section for details. - kwargs - Additional arguments to pass to the `anthropic.Anthropic()` client - constructor. - - Returns - ------- - Chat - A Chat object. - - Note - ---- - Pasting an API key into a chat constructor (e.g., `ChatAnthropic(api_key="...")`) - is the simplest way to get started, and is fine for interactive use, but is - problematic for code that may be shared with others. - - Instead, consider using environment variables or a configuration file to manage - your credentials. One popular way to manage credentials is to use a `.env` file - to store your credentials, and then use the `python-dotenv` package to load them - into your environment. - - ```shell - pip install python-dotenv - ``` - - ```shell - # .env - ANTHROPIC_API_KEY=... - ``` - - ```python - from chatlas import ChatAnthropic - from dotenv import load_dotenv - - load_dotenv() - chat = ChatAnthropic() - chat.console() - ``` - - Another, more general, solution is to load your environment variables into the shell - before starting Python (maybe in a `.bashrc`, `.zshrc`, etc. file): - - ```shell - export ANTHROPIC_API_KEY=... - ``` - - Caching - ------- - - Caching with Claude is a bit more complicated than other providers but we - believe that on average it will save you both money and time, so we have - enabled it by default. With other providers, like OpenAI and Google, - you only pay for cache reads, which cost 10% of the normal price. With - Claude, you also pay for cache writes, which cost 125% of the normal price - for 5 minute caching and 200% of the normal price for 1 hour caching. - - How does this affect the total cost of a conversation? Imagine the first - turn sends 1000 input tokens and receives 200 output tokens. The second - turn must first send both the input and output from the previous turn - (1200 tokens). It then sends a further 1000 tokens and receives 200 tokens - back. - - To compare the prices of these two approaches we can ignore the cost of - output tokens, because they are the same for both. How much will the input - tokens cost? If we don't use caching, we send 1000 tokens in the first turn - and 2200 (1000 + 200 + 1000) tokens in the second turn for a total of 3200 - tokens. If we use caching, we'll send (the equivalent of) 1000 * 1.25 = 1250 - tokens in the first turn. In the second turn, 1000 of the input tokens will - be cached so the total cost is 1000 * 0.1 + (200 + 1000) * 1.25 = 1600 - tokens. That makes a total of 2850 tokens, i.e. 11% fewer tokens, - decreasing the overall cost. - - Obviously, the details will vary from conversation to conversation, but - if you have a large system prompt that you re-use many times you should - expect to see larger savings. You can see exactly how many input and - cache input tokens each turn uses, along with the total cost, - with `chat.get_tokens()`. If you don't see savings for your use case, you can - suppress caching with `cache="none"`. - - Note: Claude will only cache longer prompts, with caching requiring at least - 1024-4096 tokens, depending on the model. So don't be surprised if you - don't see any differences with caching if you have a short prompt. - - See all the details at - . + Chat with an Anthropic Claude model. + + [Anthropic](https://www.anthropic.com) provides a number of chat based + models under the [Claude](https://www.anthropic.com/claude) moniker. + + Prerequisites + ------------- + + ::: {.callout-note} + ## API key + + Note that a Claude Pro membership does not give you the ability to call + models via the API. You will need to go to the [developer + console](https://console.anthropic.com/account/keys) to sign up (and pay + for) a developer account that will give you an API key that you can use with + this package. + ::: + + ::: {.callout-note} + ## Python requirements + + `ChatAnthropic` requires the `anthropic` package: `pip install "chatlas[anthropic]"`. + ::: + + Examples + -------- + + ```python + import os + from chatlas import ChatAnthropic + + chat = ChatAnthropic(api_key=os.getenv("ANTHROPIC_API_KEY")) + chat.chat("What is the capital of France?") + ``` + + Parameters + ---------- + system_prompt + A system prompt to set the behavior of the assistant. + model + The model to use for the chat. The default, None, will pick a reasonable + default, and warn you about it. We strongly recommend explicitly + choosing a model for all but the most casual use. + max_tokens + Maximum number of tokens to generate before stopping. + cache + How long to cache inputs? Defaults to "5m" (five minutes). + Set to "none" to disable caching or "1h" to cache for one hour. + See the Caching section for details. + reasoning + Determines how many tokens Claude can be allocated to reasoning. Must be + ≥1024 and less than `max_tokens`. Larger budgets can enable more + thorough analysis for complex problems, improving response quality. See + [extended + thinking](https://docs.claude.com/en/docs/build-with-claude/extended-thinking) + for details. + api_key + The API key to use for authentication. You generally should not supply + this directly, but instead set the `ANTHROPIC_API_KEY` environment + variable. + kwargs + Additional arguments to pass to the `anthropic.Anthropic()` client + constructor. + + Returns + ------- + Chat + A Chat object. + + Note + ---- + Pasting an API key into a chat constructor (e.g., `ChatAnthropic(api_key="...")`) + is the simplest way to get started, and is fine for interactive use, but is + problematic for code that may be shared with others. + + Instead, consider using environment variables or a configuration file to manage + your credentials. One popular way to manage credentials is to use a `.env` file + to store your credentials, and then use the `python-dotenv` package to load them + into your environment. + + ```shell + pip install python-dotenv + ``` + + ```shell + # .env + ANTHROPIC_API_KEY=... + ``` + + ```python + from chatlas import ChatAnthropic + from dotenv import load_dotenv + + load_dotenv() + chat = ChatAnthropic() + chat.console() + ``` + + Another, more general, solution is to load your environment variables into the shell + before starting Python (maybe in a `.bashrc`, `.zshrc`, etc. file): + + ```shell + export ANTHROPIC_API_KEY=... + ``` + + Caching + ------- + + Caching with Claude is a bit more complicated than other providers but we + believe that on average it will save you both money and time, so we have + enabled it by default. With other providers, like OpenAI and Google, + you only pay for cache reads, which cost 10% of the normal price. With + Claude, you also pay for cache writes, which cost 125% of the normal price + for 5 minute caching and 200% of the normal price for 1 hour caching. + + How does this affect the total cost of a conversation? Imagine the first + turn sends 1000 input tokens and receives 200 output tokens. The second + turn must first send both the input and output from the previous turn + (1200 tokens). It then sends a further 1000 tokens and receives 200 tokens + back. + + To compare the prices of these two approaches we can ignore the cost of + output tokens, because they are the same for both. How much will the input + tokens cost? If we don't use caching, we send 1000 tokens in the first turn + and 2200 (1000 + 200 + 1000) tokens in the second turn for a total of 3200 + tokens. If we use caching, we'll send (the equivalent of) 1000 * 1.25 = 1250 + tokens in the first turn. In the second turn, 1000 of the input tokens will + be cached so the total cost is 1000 * 0.1 + (200 + 1000) * 1.25 = 1600 + tokens. That makes a total of 2850 tokens, i.e. 11% fewer tokens, + decreasing the overall cost. + + Obviously, the details will vary from conversation to conversation, but + if you have a large system prompt that you re-use many times you should + expect to see larger savings. You can see exactly how many input and + cache input tokens each turn uses, along with the total cost, + with `chat.get_tokens()`. If you don't see savings for your use case, you can + suppress caching with `cache="none"`. + + Note: Claude will only cache longer prompts, with caching requiring at least + 1024-4096 tokens, depending on the model. So don't be surprised if you + don't see any differences with caching if you have a short prompt. + + See all the details at + . """ if model is None: model = log_model_default("claude-sonnet-4-0") + kwargs_chat: "SubmitInputArgs" = {} + if reasoning is not None: + if isinstance(reasoning, int): + reasoning = {"type": "enabled", "budget_tokens": reasoning} + kwargs_chat = {"thinking": reasoning} + return Chat( provider=AnthropicProvider( api_key=api_key, @@ -229,6 +248,7 @@ def ChatAnthropic( kwargs=kwargs, ), system_prompt=system_prompt, + kwargs_chat=kwargs_chat, ) @@ -451,6 +471,12 @@ def stream_merge_chunks(self, completion, chunk): if not isinstance(this_content.input, str): this_content.input = "" # type: ignore this_content.input += json_delta # type: ignore + elif chunk.delta.type == "thinking_delta": + this_content = cast("ThinkingBlock", this_content) + this_content.thinking += chunk.delta.thinking + elif chunk.delta.type == "signature_delta": + this_content = cast("ThinkingBlock", this_content) + this_content.signature += chunk.delta.signature elif chunk.type == "content_block_stop": this_content = completion.content[chunk.index] if this_content.type == "tool_use" and isinstance(this_content.input, str): @@ -656,6 +682,13 @@ def _as_content_block(content: Content) -> "ContentBlockParam": res["content"] = content.get_model_value() # type: ignore return res + elif isinstance(content, ContentThinking): + extra = content.extra or {} + return { + "type": "thinking", + "thinking": content.thinking, + "signature": extra.get("signature", ""), + } raise ValueError(f"Unknown content type: {type(content)}") @@ -709,6 +742,13 @@ def _as_turn(self, completion: Message, has_data_model=False) -> Turn: arguments=content.input, ) ) + elif content.type == "thinking": + contents.append( + ContentThinking( + thinking=content.thinking, + extra={"signature": content.signature}, + ) + ) return Turn( "assistant", diff --git a/chatlas/_provider_google.py b/chatlas/_provider_google.py index eea999ee..ac0936d9 100644 --- a/chatlas/_provider_google.py +++ b/chatlas/_provider_google.py @@ -34,6 +34,7 @@ GenerateContentResponseDict, Part, PartDict, + ThinkingConfigDict, ) from .types.google import ChatClientArgs, SubmitInputArgs @@ -45,6 +46,7 @@ def ChatGoogle( *, system_prompt: Optional[str] = None, model: Optional[str] = None, + reasoning: Optional["int | ThinkingConfigDict"] = None, api_key: Optional[str] = None, kwargs: Optional["ChatClientArgs"] = None, ) -> Chat["SubmitInputArgs", GenerateContentResponse]: @@ -86,6 +88,10 @@ def ChatGoogle( The model to use for the chat. The default, None, will pick a reasonable default, and warn you about it. We strongly recommend explicitly choosing a model for all but the most casual use. + reasoning + If provided, enables reasoning (a.k.a. "thoughts") in the model's + responses. This can be an integer number of tokens to use for reasoning, + or a full `ThinkingConfigDict` to customize the reasoning behavior. api_key The API key to use for authentication. You generally should not supply this directly, but instead set the `GOOGLE_API_KEY` environment variable. @@ -137,14 +143,20 @@ def ChatGoogle( if model is None: model = log_model_default("gemini-2.5-flash") + kwargs_chat: "SubmitInputArgs" = {} + if reasoning is not None: + if isinstance(reasoning, int): + reasoning = {"thinking_budget": reasoning, "include_thoughts": True} + kwargs_chat["config"] = {"thinking_config": reasoning} + return Chat( provider=GoogleProvider( model=model, api_key=api_key, - name="Google/Gemini", kwargs=kwargs, ), system_prompt=system_prompt, + kwargs_chat=kwargs_chat, ) @@ -367,7 +379,7 @@ def value_tokens(self, completion): cached = usage.cached_content_token_count or 0 return ( (usage.prompt_token_count or 0) - cached, - usage.candidates_token_count or 0, + (usage.candidates_token_count or 0) + (usage.thoughts_token_count or 0), usage.cached_content_token_count or 0, ) diff --git a/chatlas/_provider_openai.py b/chatlas/_provider_openai.py index 255aa6bd..2e84e980 100644 --- a/chatlas/_provider_openai.py +++ b/chatlas/_provider_openai.py @@ -35,6 +35,8 @@ ) from openai.types.responses.easy_input_message_param import EasyInputMessageParam from openai.types.responses.tool_param import ToolParam + from openai.types.shared.reasoning_effort import ReasoningEffort + from openai.types.shared_params.reasoning import Reasoning from openai.types.shared_params.responses_model import ResponsesModel from .types.openai import ChatClientArgs @@ -47,8 +49,9 @@ def ChatOpenAI( *, system_prompt: Optional[str] = None, model: "Optional[ResponsesModel | str]" = None, - api_key: Optional[str] = None, base_url: str = "https://api.openai.com/v1", + reasoning: "Optional[ReasoningEffort | Reasoning]" = None, + api_key: Optional[str] = None, kwargs: Optional["ChatClientArgs"] = None, ) -> Chat["SubmitInputArgs", Response]: """ @@ -87,12 +90,15 @@ def ChatOpenAI( The model to use for the chat. The default, None, will pick a reasonable default, and warn you about it. We strongly recommend explicitly choosing a model for all but the most casual use. + base_url + The base URL to the endpoint; the default uses OpenAI. + reasoning + The reasoning effort to use (for reasoning-capable models like the o and + gpt-5 series). api_key The API key to use for authentication. You generally should not supply this directly, but instead set the `OPENAI_API_KEY` environment variable. - base_url - The base URL to the endpoint; the default uses OpenAI. kwargs Additional arguments to pass to the `openai.OpenAI()` client constructor. @@ -146,6 +152,14 @@ def ChatOpenAI( if model is None: model = log_model_default("gpt-4.1") + kwargs_chat: "SubmitInputArgs" = {} + if reasoning is not None: + if not is_reasoning_model(model): + warnings.warn(f"Model {model} is not reasoning-capable", UserWarning) + if isinstance(reasoning, str): + reasoning = {"effort": reasoning, "summary": "auto"} + kwargs_chat = {"reasoning": reasoning} + return Chat( provider=OpenAIProvider( api_key=api_key, @@ -154,6 +168,7 @@ def ChatOpenAI( kwargs=kwargs, ), system_prompt=system_prompt, + kwargs_chat=kwargs_chat, ) @@ -239,7 +254,7 @@ def _chat_perform_args( # Request reasoning content for reasoning models include = [] - if self._is_reasoning(self.model): + if is_reasoning_model(self.model): include.append("reasoning.encrypted_content") if "log_probs" in kwargs_full: @@ -254,7 +269,14 @@ def _chat_perform_args( def stream_text(self, chunk): if chunk.type == "response.output_text.delta": + # https://platform.openai.com/docs/api-reference/responses-streaming/response/output_text/delta + return chunk.delta + if chunk.type == "response.reasoning_summary_text.delta": + # https://platform.openai.com/docs/api-reference/responses-streaming/response/reasoning_summary_text/delta return chunk.delta + if chunk.type == "response.reasoning_summary_text.done": + # https://platform.openai.com/docs/api-reference/responses-streaming/response/reasoning_summary_text/done + return "\n\n" return None def stream_merge_chunks(self, completion, chunk): @@ -337,11 +359,6 @@ def _response_as_turn(completion: Response, has_data_model: bool) -> Turn: completion=completion, ) - @staticmethod - def _is_reasoning(model: str) -> bool: - # https://platform.openai.com/docs/models/compare - return model.startswith("o") or model.startswith("gpt-5") - @staticmethod def _turns_as_inputs(turns: list[Turn]) -> "list[ResponseInputItemParam]": res: "list[ResponseInputItemParam]" = [] @@ -456,3 +473,8 @@ def as_input_param(content: Content, role: Role) -> "ResponseInputItemParam": def as_message(x: "ResponseInputContentParam", role: Role) -> "EasyInputMessageParam": return {"role": role, "content": [x]} + + +def is_reasoning_model(model: str) -> bool: + # https://platform.openai.com/docs/models/compare + return model.startswith("o") or model.startswith("gpt-5") From f461b6f6cf3564e8cbc39c77b8a3d92d44f4cb93 Mon Sep 17 00:00:00 2001 From: Carson Date: Wed, 5 Nov 2025 19:00:02 -0600 Subject: [PATCH 2/4] Update changelog --- CHANGELOG.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f61be0e..8276d3d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,18 +12,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### New features * Added new family of functions (`parallel_chat()`, `parallel_chat_text()`, and `parallel_chat_structured()`) for submitting multiple prompts at once with some basic rate limiting toggles. (#188) +* `ChatOpenAI()` and `ChatAzureOpenAI()` gain access to latest models, built-in tools, image generation, etc. as a result of moving to the new [Responses API](https://platform.openai.com/docs/api-reference/responses). (#192) +* `ChatOpenAI()`, `ChatAnthropic()`, and `ChatGoogle()` gain a new `reasoning` parameter to easily opt-into, and fully customize, reasoning capabilities. (#202) + * A new `ContentThinking` content type was added and captures the "thinking" portion of a reasoning model. (#192) +* `ChatAnthropic()` and `ChatBedrockAnthropic()` gain new `cache` parameter to control caching. By default it is set to "5m". This should (on average) reduce the cost of your chats. (#215) * Added support for systematic evaluation via [Inspect AI](https://inspect.aisi.org.uk/). This includes: * A new `.export_eval()` method for exporting conversation history as an Inspect eval dataset sample. This supports multi-turn conversations, tool calls, images, PDFs, and structured data. * A new `.to_solver()` method for translating chat instances into Inspect solvers that can be used with Inspect's evaluation framework. * A new `Turn.to_inspect_messages()` method for converting turns to Inspect's message format. * Comprehensive documentation in the [Evals guide](https://posit-dev.github.io/chatlas/misc/evals.html). -* `ChatOpenAI()` (and `ChatAzureOpenAI()`) gain access to latest models, built-in tools, etc. as a result of moving to the new [Responses API](https://platform.openai.com/docs/api-reference/responses). (#192) -* Added rudimentary support for a new `ContentThinking` type. (#192) -* `ChatAnthropic()` and `ChatBedrockAnthropic()` gain new `cache` parameter to control caching. By default it is set to "5m". This should (on average) reduce the cost of your chats. (#215) ### Changes -* `ChatOpenAI()` (and `ChatAzureOpenAI()`) move from OpenAI's Completions API to [Responses API](https://platform.openai.com/docs/api-reference/responses). If this happens to break behavior, change `ChatOpenAI()` -> `ChatOpenAICompletions()` (or `ChatAzureOpenAI()` -> `ChatAzureOpenAICompletions()`). (#192) +* `ChatOpenAI()` and `ChatAzureOpenAI()` move from OpenAI's Completions API to [Responses API](https://platform.openai.com/docs/api-reference/responses). If this happens to break behavior, change `ChatOpenAI()` -> `ChatOpenAICompletions()` (or `ChatAzureOpenAI()` -> `ChatAzureOpenAICompletions()`). (#192) * The `.set_model_params()` method no longer accepts `kwargs`. Instead, use the new `chat.kwargs_chat` attribute to set chat input parameters that persist across the chat session. (#212) * `Provider` implementations now require an additional `.value_tokens()` method. Previously, it was assumed that token info was logged and attached to the `Turn` as part of the `.value_turn()` method. The logging and attaching is now handled automatically. (#194) From f7dc812edad0e73ec9a32b6db5fd963197ca0412 Mon Sep 17 00:00:00 2001 From: Carson Date: Wed, 5 Nov 2025 19:06:37 -0600 Subject: [PATCH 3/4] Have ChatAnthropic() yield thinking deltas --- chatlas/_provider_anthropic.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/chatlas/_provider_anthropic.py b/chatlas/_provider_anthropic.py index 09612c50..b230fe24 100644 --- a/chatlas/_provider_anthropic.py +++ b/chatlas/_provider_anthropic.py @@ -445,8 +445,11 @@ def _structured_tool_call(**kwargs: Any): return kwargs_full def stream_text(self, chunk) -> Optional[str]: - if chunk.type == "content_block_delta" and chunk.delta.type == "text_delta": - return chunk.delta.text + if chunk.type == "content_block_delta": + if chunk.delta.type == "text_delta": + return chunk.delta.text + if chunk.delta.type == "thinking_delta": + return chunk.delta.thinking return None def stream_merge_chunks(self, completion, chunk): From a6b7282ed1bafa655ce79ab8232bf235ffd3f065 Mon Sep 17 00:00:00 2001 From: Carson Date: Fri, 7 Nov 2025 15:41:38 -0600 Subject: [PATCH 4/4] Fix docstring --- chatlas/_provider_anthropic.py | 288 ++++++++++++++++----------------- 1 file changed, 144 insertions(+), 144 deletions(-) diff --git a/chatlas/_provider_anthropic.py b/chatlas/_provider_anthropic.py index b230fe24..0d70393c 100644 --- a/chatlas/_provider_anthropic.py +++ b/chatlas/_provider_anthropic.py @@ -84,150 +84,150 @@ def ChatAnthropic( kwargs: Optional["ChatClientArgs"] = None, ) -> Chat["SubmitInputArgs", Message]: """ - Chat with an Anthropic Claude model. - - [Anthropic](https://www.anthropic.com) provides a number of chat based - models under the [Claude](https://www.anthropic.com/claude) moniker. - - Prerequisites - ------------- - - ::: {.callout-note} - ## API key - - Note that a Claude Pro membership does not give you the ability to call - models via the API. You will need to go to the [developer - console](https://console.anthropic.com/account/keys) to sign up (and pay - for) a developer account that will give you an API key that you can use with - this package. - ::: - - ::: {.callout-note} - ## Python requirements - - `ChatAnthropic` requires the `anthropic` package: `pip install "chatlas[anthropic]"`. - ::: - - Examples - -------- - - ```python - import os - from chatlas import ChatAnthropic - - chat = ChatAnthropic(api_key=os.getenv("ANTHROPIC_API_KEY")) - chat.chat("What is the capital of France?") - ``` - - Parameters - ---------- - system_prompt - A system prompt to set the behavior of the assistant. - model - The model to use for the chat. The default, None, will pick a reasonable - default, and warn you about it. We strongly recommend explicitly - choosing a model for all but the most casual use. - max_tokens - Maximum number of tokens to generate before stopping. - cache - How long to cache inputs? Defaults to "5m" (five minutes). - Set to "none" to disable caching or "1h" to cache for one hour. - See the Caching section for details. - reasoning - Determines how many tokens Claude can be allocated to reasoning. Must be - ≥1024 and less than `max_tokens`. Larger budgets can enable more - thorough analysis for complex problems, improving response quality. See - [extended - thinking](https://docs.claude.com/en/docs/build-with-claude/extended-thinking) - for details. - api_key - The API key to use for authentication. You generally should not supply - this directly, but instead set the `ANTHROPIC_API_KEY` environment - variable. - kwargs - Additional arguments to pass to the `anthropic.Anthropic()` client - constructor. - - Returns - ------- - Chat - A Chat object. - - Note - ---- - Pasting an API key into a chat constructor (e.g., `ChatAnthropic(api_key="...")`) - is the simplest way to get started, and is fine for interactive use, but is - problematic for code that may be shared with others. - - Instead, consider using environment variables or a configuration file to manage - your credentials. One popular way to manage credentials is to use a `.env` file - to store your credentials, and then use the `python-dotenv` package to load them - into your environment. - - ```shell - pip install python-dotenv - ``` - - ```shell - # .env - ANTHROPIC_API_KEY=... - ``` - - ```python - from chatlas import ChatAnthropic - from dotenv import load_dotenv - - load_dotenv() - chat = ChatAnthropic() - chat.console() - ``` - - Another, more general, solution is to load your environment variables into the shell - before starting Python (maybe in a `.bashrc`, `.zshrc`, etc. file): - - ```shell - export ANTHROPIC_API_KEY=... - ``` - - Caching - ------- - - Caching with Claude is a bit more complicated than other providers but we - believe that on average it will save you both money and time, so we have - enabled it by default. With other providers, like OpenAI and Google, - you only pay for cache reads, which cost 10% of the normal price. With - Claude, you also pay for cache writes, which cost 125% of the normal price - for 5 minute caching and 200% of the normal price for 1 hour caching. - - How does this affect the total cost of a conversation? Imagine the first - turn sends 1000 input tokens and receives 200 output tokens. The second - turn must first send both the input and output from the previous turn - (1200 tokens). It then sends a further 1000 tokens and receives 200 tokens - back. - - To compare the prices of these two approaches we can ignore the cost of - output tokens, because they are the same for both. How much will the input - tokens cost? If we don't use caching, we send 1000 tokens in the first turn - and 2200 (1000 + 200 + 1000) tokens in the second turn for a total of 3200 - tokens. If we use caching, we'll send (the equivalent of) 1000 * 1.25 = 1250 - tokens in the first turn. In the second turn, 1000 of the input tokens will - be cached so the total cost is 1000 * 0.1 + (200 + 1000) * 1.25 = 1600 - tokens. That makes a total of 2850 tokens, i.e. 11% fewer tokens, - decreasing the overall cost. - - Obviously, the details will vary from conversation to conversation, but - if you have a large system prompt that you re-use many times you should - expect to see larger savings. You can see exactly how many input and - cache input tokens each turn uses, along with the total cost, - with `chat.get_tokens()`. If you don't see savings for your use case, you can - suppress caching with `cache="none"`. - - Note: Claude will only cache longer prompts, with caching requiring at least - 1024-4096 tokens, depending on the model. So don't be surprised if you - don't see any differences with caching if you have a short prompt. - - See all the details at - . + Chat with an Anthropic Claude model. + + [Anthropic](https://www.anthropic.com) provides a number of chat based + models under the [Claude](https://www.anthropic.com/claude) moniker. + + Prerequisites + ------------- + + ::: {.callout-note} + ## API key + + Note that a Claude Pro membership does not give you the ability to call + models via the API. You will need to go to the [developer + console](https://console.anthropic.com/account/keys) to sign up (and pay + for) a developer account that will give you an API key that you can use with + this package. + ::: + + ::: {.callout-note} + ## Python requirements + + `ChatAnthropic` requires the `anthropic` package: `pip install "chatlas[anthropic]"`. + ::: + + Examples + -------- + + ```python + import os + from chatlas import ChatAnthropic + + chat = ChatAnthropic(api_key=os.getenv("ANTHROPIC_API_KEY")) + chat.chat("What is the capital of France?") + ``` + + Parameters + ---------- + system_prompt + A system prompt to set the behavior of the assistant. + model + The model to use for the chat. The default, None, will pick a reasonable + default, and warn you about it. We strongly recommend explicitly + choosing a model for all but the most casual use. + max_tokens + Maximum number of tokens to generate before stopping. + cache + How long to cache inputs? Defaults to "5m" (five minutes). + Set to "none" to disable caching or "1h" to cache for one hour. + See the Caching section for details. + reasoning + Determines how many tokens Claude can be allocated to reasoning. Must be + ≥1024 and less than `max_tokens`. Larger budgets can enable more + thorough analysis for complex problems, improving response quality. See + [extended + thinking](https://docs.claude.com/en/docs/build-with-claude/extended-thinking) + for details. + api_key + The API key to use for authentication. You generally should not supply + this directly, but instead set the `ANTHROPIC_API_KEY` environment + variable. + kwargs + Additional arguments to pass to the `anthropic.Anthropic()` client + constructor. + + Returns + ------- + Chat + A Chat object. + + Note + ---- + Pasting an API key into a chat constructor (e.g., `ChatAnthropic(api_key="...")`) + is the simplest way to get started, and is fine for interactive use, but is + problematic for code that may be shared with others. + + Instead, consider using environment variables or a configuration file to manage + your credentials. One popular way to manage credentials is to use a `.env` file + to store your credentials, and then use the `python-dotenv` package to load them + into your environment. + + ```shell + pip install python-dotenv + ``` + + ```shell + # .env + ANTHROPIC_API_KEY=... + ``` + + ```python + from chatlas import ChatAnthropic + from dotenv import load_dotenv + + load_dotenv() + chat = ChatAnthropic() + chat.console() + ``` + + Another, more general, solution is to load your environment variables into the shell + before starting Python (maybe in a `.bashrc`, `.zshrc`, etc. file): + + ```shell + export ANTHROPIC_API_KEY=... + ``` + + Caching + ------- + + Caching with Claude is a bit more complicated than other providers but we + believe that on average it will save you both money and time, so we have + enabled it by default. With other providers, like OpenAI and Google, + you only pay for cache reads, which cost 10% of the normal price. With + Claude, you also pay for cache writes, which cost 125% of the normal price + for 5 minute caching and 200% of the normal price for 1 hour caching. + + How does this affect the total cost of a conversation? Imagine the first + turn sends 1000 input tokens and receives 200 output tokens. The second + turn must first send both the input and output from the previous turn + (1200 tokens). It then sends a further 1000 tokens and receives 200 tokens + back. + + To compare the prices of these two approaches we can ignore the cost of + output tokens, because they are the same for both. How much will the input + tokens cost? If we don't use caching, we send 1000 tokens in the first turn + and 2200 (1000 + 200 + 1000) tokens in the second turn for a total of 3200 + tokens. If we use caching, we'll send (the equivalent of) 1000 * 1.25 = 1250 + tokens in the first turn. In the second turn, 1000 of the input tokens will + be cached so the total cost is 1000 * 0.1 + (200 + 1000) * 1.25 = 1600 + tokens. That makes a total of 2850 tokens, i.e. 11% fewer tokens, + decreasing the overall cost. + + Obviously, the details will vary from conversation to conversation, but + if you have a large system prompt that you re-use many times you should + expect to see larger savings. You can see exactly how many input and + cache input tokens each turn uses, along with the total cost, + with `chat.get_tokens()`. If you don't see savings for your use case, you can + suppress caching with `cache="none"`. + + Note: Claude will only cache longer prompts, with caching requiring at least + 1024-4096 tokens, depending on the model. So don't be surprised if you + don't see any differences with caching if you have a short prompt. + + See all the details at + . """ if model is None: