From 3832a34e94666e38a6e4eb4be5b5bc7e1cb55b91 Mon Sep 17 00:00:00 2001
From: Carson <cpsievert1@gmail.com>
Date: Tue, 4 Nov 2025 12:19:05 -0600
Subject: [PATCH 1/4] feat: better support for reasoning/thinking

---
 chatlas/_provider_anthropic.py | 316 +++++++++++++++++++--------------
 chatlas/_provider_google.py    |  16 +-
 chatlas/_provider_openai.py    |  40 ++++-
 3 files changed, 223 insertions(+), 149 deletions(-)

diff --git a/chatlas/_provider_anthropic.py b/chatlas/_provider_anthropic.py
index fb5eb1a6..09612c50 100644
--- a/chatlas/_provider_anthropic.py
+++ b/chatlas/_provider_anthropic.py
@@ -17,6 +17,7 @@
     ContentJson,
     ContentPDF,
     ContentText,
+    ContentThinking,
     ContentToolRequest,
     ContentToolResult,
     ContentToolResultImage,
@@ -41,6 +42,8 @@
         MessageParam,
         RawMessageStreamEvent,
         TextBlock,
+        ThinkingBlock,
+        ThinkingBlockParam,
         ToolParam,
         ToolUseBlock,
     )
@@ -51,6 +54,7 @@
     from anthropic.types.messages.batch_create_params import Request as BatchRequest
     from anthropic.types.model_param import ModelParam
     from anthropic.types.text_block_param import TextBlockParam
+    from anthropic.types.thinking_config_enabled_param import ThinkingConfigEnabledParam
     from anthropic.types.tool_result_block_param import ToolResultBlockParam
     from anthropic.types.tool_use_block_param import ToolUseBlockParam
 
@@ -62,6 +66,7 @@
         ToolUseBlockParam,
         ToolResultBlockParam,
         DocumentBlockParam,
+        ThinkingBlockParam,
     ]
 else:
     Message = object
@@ -72,154 +77,168 @@ def ChatAnthropic(
     *,
     system_prompt: Optional[str] = None,
     model: "Optional[ModelParam]" = None,
-    api_key: Optional[str] = None,
     max_tokens: int = 4096,
     cache: Literal["5m", "1h", "none"] = "5m",
+    reasoning: Optional["int | ThinkingConfigEnabledParam"] = None,
+    api_key: Optional[str] = None,
     kwargs: Optional["ChatClientArgs"] = None,
 ) -> Chat["SubmitInputArgs", Message]:
     """
-    Chat with an Anthropic Claude model.
-
-    [Anthropic](https://www.anthropic.com) provides a number of chat based
-    models under the [Claude](https://www.anthropic.com/claude) moniker.
-
-    Prerequisites
-    -------------
-
-    ::: {.callout-note}
-    ## API key
-
-    Note that a Claude Pro membership does not give you the ability to call
-    models via the API. You will need to go to the [developer
-    console](https://console.anthropic.com/account/keys) to sign up (and pay
-    for) a developer account that will give you an API key that you can use with
-    this package.
-    :::
-
-    ::: {.callout-note}
-    ## Python requirements
-
-    `ChatAnthropic` requires the `anthropic` package: `pip install "chatlas[anthropic]"`.
-    :::
-
-    Examples
-    --------
-
-    ```python
-    import os
-    from chatlas import ChatAnthropic
-
-    chat = ChatAnthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
-    chat.chat("What is the capital of France?")
-    ```
-
-    Parameters
-    ----------
-    system_prompt
-        A system prompt to set the behavior of the assistant.
-    model
-        The model to use for the chat. The default, None, will pick a reasonable
-        default, and warn you about it. We strongly recommend explicitly
-        choosing a model for all but the most casual use.
-    api_key
-        The API key to use for authentication. You generally should not supply
-        this directly, but instead set the `ANTHROPIC_API_KEY` environment
-        variable.
-    max_tokens
-        Maximum number of tokens to generate before stopping.
-    cache
-        How long to cache inputs? Defaults to "5m" (five minutes).
-        Set to "none" to disable caching or "1h" to cache for one hour.
-        See the Caching section for details.
-    kwargs
-        Additional arguments to pass to the `anthropic.Anthropic()` client
-        constructor.
-
-    Returns
-    -------
-    Chat
-        A Chat object.
-
-    Note
-    ----
-    Pasting an API key into a chat constructor (e.g., `ChatAnthropic(api_key="...")`)
-    is the simplest way to get started, and is fine for interactive use, but is
-    problematic for code that may be shared with others.
-
-    Instead, consider using environment variables or a configuration file to manage
-    your credentials. One popular way to manage credentials is to use a `.env` file
-    to store your credentials, and then use the `python-dotenv` package to load them
-    into your environment.
-
-    ```shell
-    pip install python-dotenv
-    ```
-
-    ```shell
-    # .env
-    ANTHROPIC_API_KEY=...
-    ```
-
-    ```python
-    from chatlas import ChatAnthropic
-    from dotenv import load_dotenv
-
-    load_dotenv()
-    chat = ChatAnthropic()
-    chat.console()
-    ```
-
-    Another, more general, solution is to load your environment variables into the shell
-    before starting Python (maybe in a `.bashrc`, `.zshrc`, etc. file):
-
-    ```shell
-    export ANTHROPIC_API_KEY=...
-    ```
-
-    Caching
-    -------
-
-    Caching with Claude is a bit more complicated than other providers but we
-    believe that on average it will save you both money and time, so we have
-    enabled it by default. With other providers, like OpenAI and Google,
-    you only pay for cache reads, which cost 10% of the normal price. With
-    Claude, you also pay for cache writes, which cost 125% of the normal price
-    for 5 minute caching and 200% of the normal price for 1 hour caching.
-
-    How does this affect the total cost of a conversation? Imagine the first
-    turn sends 1000 input tokens and receives 200 output tokens. The second
-    turn must first send both the input and output from the previous turn
-    (1200 tokens). It then sends a further 1000 tokens and receives 200 tokens
-    back.
-
-    To compare the prices of these two approaches we can ignore the cost of
-    output tokens, because they are the same for both. How much will the input
-    tokens cost? If we don't use caching, we send 1000 tokens in the first turn
-    and 2200 (1000 + 200 + 1000) tokens in the second turn for a total of 3200
-    tokens. If we use caching, we'll send (the equivalent of) 1000 * 1.25 = 1250
-    tokens in the first turn. In the second turn, 1000 of the input tokens will
-    be cached so the total cost is 1000 * 0.1 + (200 + 1000) * 1.25 = 1600
-    tokens. That makes a total of 2850 tokens, i.e. 11% fewer tokens,
-    decreasing the overall cost.
-
-    Obviously, the details will vary from conversation to conversation, but
-    if you have a large system prompt that you re-use many times you should
-    expect to see larger savings. You can see exactly how many input and
-    cache input tokens each turn uses, along with the total cost,
-    with `chat.get_tokens()`. If you don't see savings for your use case, you can
-    suppress caching with `cache="none"`.
-
-    Note: Claude will only cache longer prompts, with caching requiring at least
-    1024-4096 tokens, depending on the model. So don't be surprised if you
-    don't see any differences with caching if you have a short prompt.
-
-    See all the details at
-    <https://docs.claude.com/en/docs/build-with-claude/prompt-caching>.
+        Chat with an Anthropic Claude model.
+
+        [Anthropic](https://www.anthropic.com) provides a number of chat based
+        models under the [Claude](https://www.anthropic.com/claude) moniker.
+
+        Prerequisites
+        -------------
+
+        ::: {.callout-note}
+        ## API key
+
+        Note that a Claude Pro membership does not give you the ability to call
+        models via the API. You will need to go to the [developer
+        console](https://console.anthropic.com/account/keys) to sign up (and pay
+        for) a developer account that will give you an API key that you can use with
+        this package.
+        :::
+
+        ::: {.callout-note}
+        ## Python requirements
+
+        `ChatAnthropic` requires the `anthropic` package: `pip install "chatlas[anthropic]"`.
+        :::
+
+        Examples
+        --------
+
+        ```python
+        import os
+        from chatlas import ChatAnthropic
+
+        chat = ChatAnthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
+        chat.chat("What is the capital of France?")
+        ```
+
+        Parameters
+        ----------
+        system_prompt
+            A system prompt to set the behavior of the assistant.
+        model
+            The model to use for the chat. The default, None, will pick a reasonable
+            default, and warn you about it. We strongly recommend explicitly
+            choosing a model for all but the most casual use.
+        max_tokens
+            Maximum number of tokens to generate before stopping.
+        cache
+            How long to cache inputs? Defaults to "5m" (five minutes).
+            Set to "none" to disable caching or "1h" to cache for one hour.
+            See the Caching section for details.
+        reasoning
+            Determines how many tokens Claude can be allocated to reasoning. Must be
+            ≥1024 and less than `max_tokens`. Larger budgets can enable more
+            thorough analysis for complex problems, improving response quality.  See
+            [extended
+            thinking](https://docs.claude.com/en/docs/build-with-claude/extended-thinking)
+            for details.
+        api_key
+            The API key to use for authentication. You generally should not supply
+            this directly, but instead set the `ANTHROPIC_API_KEY` environment
+            variable.
+        kwargs
+            Additional arguments to pass to the `anthropic.Anthropic()` client
+            constructor.
+
+        Returns
+        -------
+        Chat
+            A Chat object.
+
+        Note
+        ----
+        Pasting an API key into a chat constructor (e.g., `ChatAnthropic(api_key="...")`)
+        is the simplest way to get started, and is fine for interactive use, but is
+        problematic for code that may be shared with others.
+
+        Instead, consider using environment variables or a configuration file to manage
+        your credentials. One popular way to manage credentials is to use a `.env` file
+        to store your credentials, and then use the `python-dotenv` package to load them
+        into your environment.
+
+        ```shell
+        pip install python-dotenv
+        ```
+
+        ```shell
+        # .env
+        ANTHROPIC_API_KEY=...
+        ```
+
+        ```python
+        from chatlas import ChatAnthropic
+        from dotenv import load_dotenv
+
+        load_dotenv()
+        chat = ChatAnthropic()
+        chat.console()
+        ```
+
+        Another, more general, solution is to load your environment variables into the shell
+        before starting Python (maybe in a `.bashrc`, `.zshrc`, etc. file):
+
+        ```shell
+        export ANTHROPIC_API_KEY=...
+        ```
+
+        Caching
+        -------
+
+        Caching with Claude is a bit more complicated than other providers but we
+        believe that on average it will save you both money and time, so we have
+        enabled it by default. With other providers, like OpenAI and Google,
+        you only pay for cache reads, which cost 10% of the normal price. With
+        Claude, you also pay for cache writes, which cost 125% of the normal price
+        for 5 minute caching and 200% of the normal price for 1 hour caching.
+
+        How does this affect the total cost of a conversation? Imagine the first
+        turn sends 1000 input tokens and receives 200 output tokens. The second
+        turn must first send both the input and output from the previous turn
+        (1200 tokens). It then sends a further 1000 tokens and receives 200 tokens
+        back.
+
+        To compare the prices of these two approaches we can ignore the cost of
+        output tokens, because they are the same for both. How much will the input
+        tokens cost? If we don't use caching, we send 1000 tokens in the first turn
+        and 2200 (1000 + 200 + 1000) tokens in the second turn for a total of 3200
+        tokens. If we use caching, we'll send (the equivalent of) 1000 * 1.25 = 1250
+        tokens in the first turn. In the second turn, 1000 of the input tokens will
+        be cached so the total cost is 1000 * 0.1 + (200 + 1000) * 1.25 = 1600
+        tokens. That makes a total of 2850 tokens, i.e. 11% fewer tokens,
+        decreasing the overall cost.
+
+        Obviously, the details will vary from conversation to conversation, but
+        if you have a large system prompt that you re-use many times you should
+        expect to see larger savings. You can see exactly how many input and
+        cache input tokens each turn uses, along with the total cost,
+        with `chat.get_tokens()`. If you don't see savings for your use case, you can
+        suppress caching with `cache="none"`.
+
+        Note: Claude will only cache longer prompts, with caching requiring at least
+        1024-4096 tokens, depending on the model. So don't be surprised if you
+        don't see any differences with caching if you have a short prompt.
+
+        See all the details at
+        <https://docs.claude.com/en/docs/build-with-claude/prompt-caching>.
     """
 
     if model is None:
         model = log_model_default("claude-sonnet-4-0")
 
+    kwargs_chat: "SubmitInputArgs" = {}
+    if reasoning is not None:
+        if isinstance(reasoning, int):
+            reasoning = {"type": "enabled", "budget_tokens": reasoning}
+        kwargs_chat = {"thinking": reasoning}
+
     return Chat(
         provider=AnthropicProvider(
             api_key=api_key,
@@ -229,6 +248,7 @@ def ChatAnthropic(
             kwargs=kwargs,
         ),
         system_prompt=system_prompt,
+        kwargs_chat=kwargs_chat,
     )
 
 
@@ -451,6 +471,12 @@ def stream_merge_chunks(self, completion, chunk):
                 if not isinstance(this_content.input, str):
                     this_content.input = ""  # type: ignore
                 this_content.input += json_delta  # type: ignore
+            elif chunk.delta.type == "thinking_delta":
+                this_content = cast("ThinkingBlock", this_content)
+                this_content.thinking += chunk.delta.thinking
+            elif chunk.delta.type == "signature_delta":
+                this_content = cast("ThinkingBlock", this_content)
+                this_content.signature += chunk.delta.signature
         elif chunk.type == "content_block_stop":
             this_content = completion.content[chunk.index]
             if this_content.type == "tool_use" and isinstance(this_content.input, str):
@@ -656,6 +682,13 @@ def _as_content_block(content: Content) -> "ContentBlockParam":
                 res["content"] = content.get_model_value()  # type: ignore
 
             return res
+        elif isinstance(content, ContentThinking):
+            extra = content.extra or {}
+            return {
+                "type": "thinking",
+                "thinking": content.thinking,
+                "signature": extra.get("signature", ""),
+            }
 
         raise ValueError(f"Unknown content type: {type(content)}")
 
@@ -709,6 +742,13 @@ def _as_turn(self, completion: Message, has_data_model=False) -> Turn:
                             arguments=content.input,
                         )
                     )
+            elif content.type == "thinking":
+                contents.append(
+                    ContentThinking(
+                        thinking=content.thinking,
+                        extra={"signature": content.signature},
+                    )
+                )
 
         return Turn(
             "assistant",
diff --git a/chatlas/_provider_google.py b/chatlas/_provider_google.py
index eea999ee..ac0936d9 100644
--- a/chatlas/_provider_google.py
+++ b/chatlas/_provider_google.py
@@ -34,6 +34,7 @@
         GenerateContentResponseDict,
         Part,
         PartDict,
+        ThinkingConfigDict,
     )
 
     from .types.google import ChatClientArgs, SubmitInputArgs
@@ -45,6 +46,7 @@ def ChatGoogle(
     *,
     system_prompt: Optional[str] = None,
     model: Optional[str] = None,
+    reasoning: Optional["int | ThinkingConfigDict"] = None,
     api_key: Optional[str] = None,
     kwargs: Optional["ChatClientArgs"] = None,
 ) -> Chat["SubmitInputArgs", GenerateContentResponse]:
@@ -86,6 +88,10 @@ def ChatGoogle(
         The model to use for the chat. The default, None, will pick a reasonable
         default, and warn you about it. We strongly recommend explicitly choosing
         a model for all but the most casual use.
+    reasoning
+        If provided, enables reasoning (a.k.a. "thoughts") in the model's
+        responses. This can be an integer number of tokens to use for reasoning,
+        or a full `ThinkingConfigDict` to customize the reasoning behavior.
     api_key
         The API key to use for authentication. You generally should not supply
         this directly, but instead set the `GOOGLE_API_KEY` environment variable.
@@ -137,14 +143,20 @@ def ChatGoogle(
     if model is None:
         model = log_model_default("gemini-2.5-flash")
 
+    kwargs_chat: "SubmitInputArgs" = {}
+    if reasoning is not None:
+        if isinstance(reasoning, int):
+            reasoning = {"thinking_budget": reasoning, "include_thoughts": True}
+        kwargs_chat["config"] = {"thinking_config": reasoning}
+
     return Chat(
         provider=GoogleProvider(
             model=model,
             api_key=api_key,
-            name="Google/Gemini",
             kwargs=kwargs,
         ),
         system_prompt=system_prompt,
+        kwargs_chat=kwargs_chat,
     )
 
 
@@ -367,7 +379,7 @@ def value_tokens(self, completion):
         cached = usage.cached_content_token_count or 0
         return (
             (usage.prompt_token_count or 0) - cached,
-            usage.candidates_token_count or 0,
+            (usage.candidates_token_count or 0) + (usage.thoughts_token_count or 0),
             usage.cached_content_token_count or 0,
         )
 
diff --git a/chatlas/_provider_openai.py b/chatlas/_provider_openai.py
index 255aa6bd..2e84e980 100644
--- a/chatlas/_provider_openai.py
+++ b/chatlas/_provider_openai.py
@@ -35,6 +35,8 @@
     )
     from openai.types.responses.easy_input_message_param import EasyInputMessageParam
     from openai.types.responses.tool_param import ToolParam
+    from openai.types.shared.reasoning_effort import ReasoningEffort
+    from openai.types.shared_params.reasoning import Reasoning
     from openai.types.shared_params.responses_model import ResponsesModel
 
     from .types.openai import ChatClientArgs
@@ -47,8 +49,9 @@ def ChatOpenAI(
     *,
     system_prompt: Optional[str] = None,
     model: "Optional[ResponsesModel | str]" = None,
-    api_key: Optional[str] = None,
     base_url: str = "https://api.openai.com/v1",
+    reasoning: "Optional[ReasoningEffort | Reasoning]" = None,
+    api_key: Optional[str] = None,
     kwargs: Optional["ChatClientArgs"] = None,
 ) -> Chat["SubmitInputArgs", Response]:
     """
@@ -87,12 +90,15 @@ def ChatOpenAI(
         The model to use for the chat. The default, None, will pick a reasonable
         default, and warn you about it. We strongly recommend explicitly
         choosing a model for all but the most casual use.
+    base_url
+        The base URL to the endpoint; the default uses OpenAI.
+    reasoning
+        The reasoning effort to use (for reasoning-capable models like the o and
+        gpt-5 series).
     api_key
         The API key to use for authentication. You generally should not supply
         this directly, but instead set the `OPENAI_API_KEY` environment
         variable.
-    base_url
-        The base URL to the endpoint; the default uses OpenAI.
     kwargs
         Additional arguments to pass to the `openai.OpenAI()` client
         constructor.
@@ -146,6 +152,14 @@ def ChatOpenAI(
     if model is None:
         model = log_model_default("gpt-4.1")
 
+    kwargs_chat: "SubmitInputArgs" = {}
+    if reasoning is not None:
+        if not is_reasoning_model(model):
+            warnings.warn(f"Model {model} is not reasoning-capable", UserWarning)
+        if isinstance(reasoning, str):
+            reasoning = {"effort": reasoning, "summary": "auto"}
+        kwargs_chat = {"reasoning": reasoning}
+
     return Chat(
         provider=OpenAIProvider(
             api_key=api_key,
@@ -154,6 +168,7 @@ def ChatOpenAI(
             kwargs=kwargs,
         ),
         system_prompt=system_prompt,
+        kwargs_chat=kwargs_chat,
     )
 
 
@@ -239,7 +254,7 @@ def _chat_perform_args(
 
         # Request reasoning content for reasoning models
         include = []
-        if self._is_reasoning(self.model):
+        if is_reasoning_model(self.model):
             include.append("reasoning.encrypted_content")
 
         if "log_probs" in kwargs_full:
@@ -254,7 +269,14 @@ def _chat_perform_args(
 
     def stream_text(self, chunk):
         if chunk.type == "response.output_text.delta":
+            # https://platform.openai.com/docs/api-reference/responses-streaming/response/output_text/delta
+            return chunk.delta
+        if chunk.type == "response.reasoning_summary_text.delta":
+            # https://platform.openai.com/docs/api-reference/responses-streaming/response/reasoning_summary_text/delta
             return chunk.delta
+        if chunk.type == "response.reasoning_summary_text.done":
+            # https://platform.openai.com/docs/api-reference/responses-streaming/response/reasoning_summary_text/done
+            return "\n\n"
         return None
 
     def stream_merge_chunks(self, completion, chunk):
@@ -337,11 +359,6 @@ def _response_as_turn(completion: Response, has_data_model: bool) -> Turn:
             completion=completion,
         )
 
-    @staticmethod
-    def _is_reasoning(model: str) -> bool:
-        # https://platform.openai.com/docs/models/compare
-        return model.startswith("o") or model.startswith("gpt-5")
-
     @staticmethod
     def _turns_as_inputs(turns: list[Turn]) -> "list[ResponseInputItemParam]":
         res: "list[ResponseInputItemParam]" = []
@@ -456,3 +473,8 @@ def as_input_param(content: Content, role: Role) -> "ResponseInputItemParam":
 
 def as_message(x: "ResponseInputContentParam", role: Role) -> "EasyInputMessageParam":
     return {"role": role, "content": [x]}
+
+
+def is_reasoning_model(model: str) -> bool:
+    # https://platform.openai.com/docs/models/compare
+    return model.startswith("o") or model.startswith("gpt-5")

From f461b6f6cf3564e8cbc39c77b8a3d92d44f4cb93 Mon Sep 17 00:00:00 2001
From: Carson <cpsievert1@gmail.com>
Date: Wed, 5 Nov 2025 19:00:02 -0600
Subject: [PATCH 2/4] Update changelog

---
 CHANGELOG.md | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3f61be0e..8276d3d1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,18 +12,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### New features
 
 * Added new family of functions (`parallel_chat()`, `parallel_chat_text()`, and `parallel_chat_structured()`) for submitting multiple prompts at once with some basic rate limiting toggles. (#188)
+* `ChatOpenAI()` and `ChatAzureOpenAI()` gain access to latest models, built-in tools, image generation, etc. as a result of moving to the new [Responses API](https://platform.openai.com/docs/api-reference/responses). (#192)
+* `ChatOpenAI()`, `ChatAnthropic()`, and `ChatGoogle()` gain a new `reasoning` parameter to easily opt-into, and fully customize, reasoning capabilities. (#202) 
+    * A new `ContentThinking` content type was added and captures the "thinking" portion of a reasoning model. (#192)
+* `ChatAnthropic()` and `ChatBedrockAnthropic()` gain new `cache` parameter to control caching. By default it is set to "5m". This should (on average) reduce the cost of your chats. (#215)
 * Added support for systematic evaluation via [Inspect AI](https://inspect.aisi.org.uk/). This includes:
     * A new `.export_eval()` method for exporting conversation history as an Inspect eval dataset sample. This supports multi-turn conversations, tool calls, images, PDFs, and structured data.
     * A new `.to_solver()` method for translating chat instances into Inspect solvers that can be used with Inspect's evaluation framework.
     * A new `Turn.to_inspect_messages()` method for converting turns to Inspect's message format.
     * Comprehensive documentation in the [Evals guide](https://posit-dev.github.io/chatlas/misc/evals.html).
-* `ChatOpenAI()` (and `ChatAzureOpenAI()`) gain access to latest models, built-in tools, etc. as a result of moving to the new [Responses API](https://platform.openai.com/docs/api-reference/responses). (#192)
-* Added rudimentary support for a new `ContentThinking` type. (#192)
-* `ChatAnthropic()` and `ChatBedrockAnthropic()` gain new `cache` parameter to control caching. By default it is set to "5m". This should (on average) reduce the cost of your chats. (#215)
 
 ### Changes
 
-* `ChatOpenAI()` (and `ChatAzureOpenAI()`) move from OpenAI's Completions API to [Responses API](https://platform.openai.com/docs/api-reference/responses). If this happens to break behavior, change `ChatOpenAI()` -> `ChatOpenAICompletions()` (or `ChatAzureOpenAI()` -> `ChatAzureOpenAICompletions()`). (#192)
+* `ChatOpenAI()` and `ChatAzureOpenAI()` move from OpenAI's Completions API to [Responses API](https://platform.openai.com/docs/api-reference/responses). If this happens to break behavior, change `ChatOpenAI()` -> `ChatOpenAICompletions()` (or `ChatAzureOpenAI()` -> `ChatAzureOpenAICompletions()`). (#192)
 * The `.set_model_params()` method no longer accepts `kwargs`. Instead, use the new `chat.kwargs_chat` attribute to set chat input parameters that persist across the chat session. (#212)
 * `Provider` implementations now require an additional `.value_tokens()` method. Previously, it was assumed that token info was logged and attached to the `Turn` as part of the `.value_turn()` method. The logging and attaching is now handled automatically. (#194)
 

From f7dc812edad0e73ec9a32b6db5fd963197ca0412 Mon Sep 17 00:00:00 2001
From: Carson <cpsievert1@gmail.com>
Date: Wed, 5 Nov 2025 19:06:37 -0600
Subject: [PATCH 3/4] Have ChatAnthropic() yield thinking deltas

---
 chatlas/_provider_anthropic.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/chatlas/_provider_anthropic.py b/chatlas/_provider_anthropic.py
index 09612c50..b230fe24 100644
--- a/chatlas/_provider_anthropic.py
+++ b/chatlas/_provider_anthropic.py
@@ -445,8 +445,11 @@ def _structured_tool_call(**kwargs: Any):
         return kwargs_full
 
     def stream_text(self, chunk) -> Optional[str]:
-        if chunk.type == "content_block_delta" and chunk.delta.type == "text_delta":
-            return chunk.delta.text
+        if chunk.type == "content_block_delta":
+            if chunk.delta.type == "text_delta":
+                return chunk.delta.text
+            if chunk.delta.type == "thinking_delta":
+                return chunk.delta.thinking
         return None
 
     def stream_merge_chunks(self, completion, chunk):

From a6b7282ed1bafa655ce79ab8232bf235ffd3f065 Mon Sep 17 00:00:00 2001
From: Carson <cpsievert1@gmail.com>
Date: Fri, 7 Nov 2025 15:41:38 -0600
Subject: [PATCH 4/4] Fix docstring

---
 chatlas/_provider_anthropic.py | 288 ++++++++++++++++-----------------
 1 file changed, 144 insertions(+), 144 deletions(-)

diff --git a/chatlas/_provider_anthropic.py b/chatlas/_provider_anthropic.py
index b230fe24..0d70393c 100644
--- a/chatlas/_provider_anthropic.py
+++ b/chatlas/_provider_anthropic.py
@@ -84,150 +84,150 @@ def ChatAnthropic(
     kwargs: Optional["ChatClientArgs"] = None,
 ) -> Chat["SubmitInputArgs", Message]:
     """
-        Chat with an Anthropic Claude model.
-
-        [Anthropic](https://www.anthropic.com) provides a number of chat based
-        models under the [Claude](https://www.anthropic.com/claude) moniker.
-
-        Prerequisites
-        -------------
-
-        ::: {.callout-note}
-        ## API key
-
-        Note that a Claude Pro membership does not give you the ability to call
-        models via the API. You will need to go to the [developer
-        console](https://console.anthropic.com/account/keys) to sign up (and pay
-        for) a developer account that will give you an API key that you can use with
-        this package.
-        :::
-
-        ::: {.callout-note}
-        ## Python requirements
-
-        `ChatAnthropic` requires the `anthropic` package: `pip install "chatlas[anthropic]"`.
-        :::
-
-        Examples
-        --------
-
-        ```python
-        import os
-        from chatlas import ChatAnthropic
-
-        chat = ChatAnthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
-        chat.chat("What is the capital of France?")
-        ```
-
-        Parameters
-        ----------
-        system_prompt
-            A system prompt to set the behavior of the assistant.
-        model
-            The model to use for the chat. The default, None, will pick a reasonable
-            default, and warn you about it. We strongly recommend explicitly
-            choosing a model for all but the most casual use.
-        max_tokens
-            Maximum number of tokens to generate before stopping.
-        cache
-            How long to cache inputs? Defaults to "5m" (five minutes).
-            Set to "none" to disable caching or "1h" to cache for one hour.
-            See the Caching section for details.
-        reasoning
-            Determines how many tokens Claude can be allocated to reasoning. Must be
-            ≥1024 and less than `max_tokens`. Larger budgets can enable more
-            thorough analysis for complex problems, improving response quality.  See
-            [extended
-            thinking](https://docs.claude.com/en/docs/build-with-claude/extended-thinking)
-            for details.
-        api_key
-            The API key to use for authentication. You generally should not supply
-            this directly, but instead set the `ANTHROPIC_API_KEY` environment
-            variable.
-        kwargs
-            Additional arguments to pass to the `anthropic.Anthropic()` client
-            constructor.
-
-        Returns
-        -------
-        Chat
-            A Chat object.
-
-        Note
-        ----
-        Pasting an API key into a chat constructor (e.g., `ChatAnthropic(api_key="...")`)
-        is the simplest way to get started, and is fine for interactive use, but is
-        problematic for code that may be shared with others.
-
-        Instead, consider using environment variables or a configuration file to manage
-        your credentials. One popular way to manage credentials is to use a `.env` file
-        to store your credentials, and then use the `python-dotenv` package to load them
-        into your environment.
-
-        ```shell
-        pip install python-dotenv
-        ```
-
-        ```shell
-        # .env
-        ANTHROPIC_API_KEY=...
-        ```
-
-        ```python
-        from chatlas import ChatAnthropic
-        from dotenv import load_dotenv
-
-        load_dotenv()
-        chat = ChatAnthropic()
-        chat.console()
-        ```
-
-        Another, more general, solution is to load your environment variables into the shell
-        before starting Python (maybe in a `.bashrc`, `.zshrc`, etc. file):
-
-        ```shell
-        export ANTHROPIC_API_KEY=...
-        ```
-
-        Caching
-        -------
-
-        Caching with Claude is a bit more complicated than other providers but we
-        believe that on average it will save you both money and time, so we have
-        enabled it by default. With other providers, like OpenAI and Google,
-        you only pay for cache reads, which cost 10% of the normal price. With
-        Claude, you also pay for cache writes, which cost 125% of the normal price
-        for 5 minute caching and 200% of the normal price for 1 hour caching.
-
-        How does this affect the total cost of a conversation? Imagine the first
-        turn sends 1000 input tokens and receives 200 output tokens. The second
-        turn must first send both the input and output from the previous turn
-        (1200 tokens). It then sends a further 1000 tokens and receives 200 tokens
-        back.
-
-        To compare the prices of these two approaches we can ignore the cost of
-        output tokens, because they are the same for both. How much will the input
-        tokens cost? If we don't use caching, we send 1000 tokens in the first turn
-        and 2200 (1000 + 200 + 1000) tokens in the second turn for a total of 3200
-        tokens. If we use caching, we'll send (the equivalent of) 1000 * 1.25 = 1250
-        tokens in the first turn. In the second turn, 1000 of the input tokens will
-        be cached so the total cost is 1000 * 0.1 + (200 + 1000) * 1.25 = 1600
-        tokens. That makes a total of 2850 tokens, i.e. 11% fewer tokens,
-        decreasing the overall cost.
-
-        Obviously, the details will vary from conversation to conversation, but
-        if you have a large system prompt that you re-use many times you should
-        expect to see larger savings. You can see exactly how many input and
-        cache input tokens each turn uses, along with the total cost,
-        with `chat.get_tokens()`. If you don't see savings for your use case, you can
-        suppress caching with `cache="none"`.
-
-        Note: Claude will only cache longer prompts, with caching requiring at least
-        1024-4096 tokens, depending on the model. So don't be surprised if you
-        don't see any differences with caching if you have a short prompt.
-
-        See all the details at
-        <https://docs.claude.com/en/docs/build-with-claude/prompt-caching>.
+    Chat with an Anthropic Claude model.
+
+    [Anthropic](https://www.anthropic.com) provides a number of chat based
+    models under the [Claude](https://www.anthropic.com/claude) moniker.
+
+    Prerequisites
+    -------------
+
+    ::: {.callout-note}
+    ## API key
+
+    Note that a Claude Pro membership does not give you the ability to call
+    models via the API. You will need to go to the [developer
+    console](https://console.anthropic.com/account/keys) to sign up (and pay
+    for) a developer account that will give you an API key that you can use with
+    this package.
+    :::
+
+    ::: {.callout-note}
+    ## Python requirements
+
+    `ChatAnthropic` requires the `anthropic` package: `pip install "chatlas[anthropic]"`.
+    :::
+
+    Examples
+    --------
+
+    ```python
+    import os
+    from chatlas import ChatAnthropic
+
+    chat = ChatAnthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
+    chat.chat("What is the capital of France?")
+    ```
+
+    Parameters
+    ----------
+    system_prompt
+        A system prompt to set the behavior of the assistant.
+    model
+        The model to use for the chat. The default, None, will pick a reasonable
+        default, and warn you about it. We strongly recommend explicitly
+        choosing a model for all but the most casual use.
+    max_tokens
+        Maximum number of tokens to generate before stopping.
+    cache
+        How long to cache inputs? Defaults to "5m" (five minutes).
+        Set to "none" to disable caching or "1h" to cache for one hour.
+        See the Caching section for details.
+    reasoning
+        Determines how many tokens Claude can be allocated to reasoning. Must be
+        ≥1024 and less than `max_tokens`. Larger budgets can enable more
+        thorough analysis for complex problems, improving response quality.  See
+        [extended
+        thinking](https://docs.claude.com/en/docs/build-with-claude/extended-thinking)
+        for details.
+    api_key
+        The API key to use for authentication. You generally should not supply
+        this directly, but instead set the `ANTHROPIC_API_KEY` environment
+        variable.
+    kwargs
+        Additional arguments to pass to the `anthropic.Anthropic()` client
+        constructor.
+
+    Returns
+    -------
+    Chat
+        A Chat object.
+
+    Note
+    ----
+    Pasting an API key into a chat constructor (e.g., `ChatAnthropic(api_key="...")`)
+    is the simplest way to get started, and is fine for interactive use, but is
+    problematic for code that may be shared with others.
+
+    Instead, consider using environment variables or a configuration file to manage
+    your credentials. One popular way to manage credentials is to use a `.env` file
+    to store your credentials, and then use the `python-dotenv` package to load them
+    into your environment.
+
+    ```shell
+    pip install python-dotenv
+    ```
+
+    ```shell
+    # .env
+    ANTHROPIC_API_KEY=...
+    ```
+
+    ```python
+    from chatlas import ChatAnthropic
+    from dotenv import load_dotenv
+
+    load_dotenv()
+    chat = ChatAnthropic()
+    chat.console()
+    ```
+
+    Another, more general, solution is to load your environment variables into the shell
+    before starting Python (maybe in a `.bashrc`, `.zshrc`, etc. file):
+
+    ```shell
+    export ANTHROPIC_API_KEY=...
+    ```
+
+    Caching
+    -------
+
+    Caching with Claude is a bit more complicated than other providers but we
+    believe that on average it will save you both money and time, so we have
+    enabled it by default. With other providers, like OpenAI and Google,
+    you only pay for cache reads, which cost 10% of the normal price. With
+    Claude, you also pay for cache writes, which cost 125% of the normal price
+    for 5 minute caching and 200% of the normal price for 1 hour caching.
+
+    How does this affect the total cost of a conversation? Imagine the first
+    turn sends 1000 input tokens and receives 200 output tokens. The second
+    turn must first send both the input and output from the previous turn
+    (1200 tokens). It then sends a further 1000 tokens and receives 200 tokens
+    back.
+
+    To compare the prices of these two approaches we can ignore the cost of
+    output tokens, because they are the same for both. How much will the input
+    tokens cost? If we don't use caching, we send 1000 tokens in the first turn
+    and 2200 (1000 + 200 + 1000) tokens in the second turn for a total of 3200
+    tokens. If we use caching, we'll send (the equivalent of) 1000 * 1.25 = 1250
+    tokens in the first turn. In the second turn, 1000 of the input tokens will
+    be cached so the total cost is 1000 * 0.1 + (200 + 1000) * 1.25 = 1600
+    tokens. That makes a total of 2850 tokens, i.e. 11% fewer tokens,
+    decreasing the overall cost.
+
+    Obviously, the details will vary from conversation to conversation, but
+    if you have a large system prompt that you re-use many times you should
+    expect to see larger savings. You can see exactly how many input and
+    cache input tokens each turn uses, along with the total cost,
+    with `chat.get_tokens()`. If you don't see savings for your use case, you can
+    suppress caching with `cache="none"`.
+
+    Note: Claude will only cache longer prompts, with caching requiring at least
+    1024-4096 tokens, depending on the model. So don't be surprised if you
+    don't see any differences with caching if you have a short prompt.
+
+    See all the details at
+    <https://docs.claude.com/en/docs/build-with-claude/prompt-caching>.
     """
 
     if model is None: