posit-dev
diff --git a/‎chatlas/_provider_anthropic.py‎
Lines changed: 178 additions & 138 deletions b/‎chatlas/_provider_anthropic.py‎
Lines changed: 178 additions & 138 deletions
@@ -17,6 +17,7 @@
     ContentJson,
     ContentPDF,
     ContentText,
+    ContentThinking,
     ContentToolRequest,
     ContentToolResult,
     ContentToolResultImage,
@@ -41,6 +42,8 @@
         MessageParam,
         RawMessageStreamEvent,
         TextBlock,
+        ThinkingBlock,
+        ThinkingBlockParam,
         ToolParam,
         ToolUseBlock,
     )
@@ -51,6 +54,7 @@
     from anthropic.types.messages.batch_create_params import Request as BatchRequest
     from anthropic.types.model_param import ModelParam
     from anthropic.types.text_block_param import TextBlockParam
+    from anthropic.types.thinking_config_enabled_param import ThinkingConfigEnabledParam
     from anthropic.types.tool_result_block_param import ToolResultBlockParam
     from anthropic.types.tool_use_block_param import ToolUseBlockParam
 
@@ -62,6 +66,7 @@
         ToolUseBlockParam,
         ToolResultBlockParam,
         DocumentBlockParam,
+        ThinkingBlockParam,
     ]
 else:
     Message = object
@@ -72,154 +77,168 @@ def ChatAnthropic(
     *,
     system_prompt: Optional[str] = None,
     model: "Optional[ModelParam]" = None,
-    api_key: Optional[str] = None,
     max_tokens: int = 4096,
     cache: Literal["5m", "1h", "none"] = "5m",
+    reasoning: Optional["int | ThinkingConfigEnabledParam"] = None,
+    api_key: Optional[str] = None,
     kwargs: Optional["ChatClientArgs"] = None,
 ) -> Chat["SubmitInputArgs", Message]:
     """
-    Chat with an Anthropic Claude model.
-
-    [Anthropic](https://www.anthropic.com) provides a number of chat based
-    models under the [Claude](https://www.anthropic.com/claude) moniker.
-
-    Prerequisites
-    -------------
-
-    ::: {.callout-note}
-    ## API key
-
-    Note that a Claude Pro membership does not give you the ability to call
-    models via the API. You will need to go to the [developer
-    console](https://console.anthropic.com/account/keys) to sign up (and pay
-    for) a developer account that will give you an API key that you can use with
-    this package.
-    :::
-
-    ::: {.callout-note}
-    ## Python requirements
-
-    `ChatAnthropic` requires the `anthropic` package: `pip install "chatlas[anthropic]"`.
-    :::
-
-    Examples
-    --------
-
-    ```python
-    import os
-    from chatlas import ChatAnthropic
-
-    chat = ChatAnthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
-    chat.chat("What is the capital of France?")
-    ```
-
-    Parameters
-    ----------
-    system_prompt
-        A system prompt to set the behavior of the assistant.
-    model
-        The model to use for the chat. The default, None, will pick a reasonable
-        default, and warn you about it. We strongly recommend explicitly
-        choosing a model for all but the most casual use.
-    api_key
-        The API key to use for authentication. You generally should not supply
-        this directly, but instead set the `ANTHROPIC_API_KEY` environment
-        variable.
-    max_tokens
-        Maximum number of tokens to generate before stopping.
-    cache
-        How long to cache inputs? Defaults to "5m" (five minutes).
-        Set to "none" to disable caching or "1h" to cache for one hour.
-        See the Caching section for details.
-    kwargs
-        Additional arguments to pass to the `anthropic.Anthropic()` client
-        constructor.
-
-    Returns
-    -------
-    Chat
-        A Chat object.
-
-    Note
-    ----
-    Pasting an API key into a chat constructor (e.g., `ChatAnthropic(api_key="...")`)
-    is the simplest way to get started, and is fine for interactive use, but is
-    problematic for code that may be shared with others.
-
-    Instead, consider using environment variables or a configuration file to manage
-    your credentials. One popular way to manage credentials is to use a `.env` file
-    to store your credentials, and then use the `python-dotenv` package to load them
-    into your environment.
-
-    ```shell
-    pip install python-dotenv
-    ```
-
-    ```shell
-    # .env
-    ANTHROPIC_API_KEY=...
-    ```
-
-    ```python
-    from chatlas import ChatAnthropic
-    from dotenv import load_dotenv
-
-    load_dotenv()
-    chat = ChatAnthropic()
-    chat.console()
-    ```
-
-    Another, more general, solution is to load your environment variables into the shell
-    before starting Python (maybe in a `.bashrc`, `.zshrc`, etc. file):
-
-    ```shell
-    export ANTHROPIC_API_KEY=...
-    ```
-
-    Caching
-    -------
-
-    Caching with Claude is a bit more complicated than other providers but we
-    believe that on average it will save you both money and time, so we have
-    enabled it by default. With other providers, like OpenAI and Google,
-    you only pay for cache reads, which cost 10% of the normal price. With
-    Claude, you also pay for cache writes, which cost 125% of the normal price
-    for 5 minute caching and 200% of the normal price for 1 hour caching.
-
-    How does this affect the total cost of a conversation? Imagine the first
-    turn sends 1000 input tokens and receives 200 output tokens. The second
-    turn must first send both the input and output from the previous turn
-    (1200 tokens). It then sends a further 1000 tokens and receives 200 tokens
-    back.
-
-    To compare the prices of these two approaches we can ignore the cost of
-    output tokens, because they are the same for both. How much will the input
-    tokens cost? If we don't use caching, we send 1000 tokens in the first turn
-    and 2200 (1000 + 200 + 1000) tokens in the second turn for a total of 3200
-    tokens. If we use caching, we'll send (the equivalent of) 1000 * 1.25 = 1250
-    tokens in the first turn. In the second turn, 1000 of the input tokens will
-    be cached so the total cost is 1000 * 0.1 + (200 + 1000) * 1.25 = 1600
-    tokens. That makes a total of 2850 tokens, i.e. 11% fewer tokens,
-    decreasing the overall cost.
-
-    Obviously, the details will vary from conversation to conversation, but
-    if you have a large system prompt that you re-use many times you should
-    expect to see larger savings. You can see exactly how many input and
-    cache input tokens each turn uses, along with the total cost,
-    with `chat.get_tokens()`. If you don't see savings for your use case, you can
-    suppress caching with `cache="none"`.
-
-    Note: Claude will only cache longer prompts, with caching requiring at least
-    1024-4096 tokens, depending on the model. So don't be surprised if you
-    don't see any differences with caching if you have a short prompt.
-
-    See all the details at
-    <https://docs.claude.com/en/docs/build-with-claude/prompt-caching>.
+        Chat with an Anthropic Claude model.
+
+        [Anthropic](https://www.anthropic.com) provides a number of chat based
+        models under the [Claude](https://www.anthropic.com/claude) moniker.
+
+        Prerequisites
+        -------------
+
+        ::: {.callout-note}
+        ## API key
+
+        Note that a Claude Pro membership does not give you the ability to call
+        models via the API. You will need to go to the [developer
+        console](https://console.anthropic.com/account/keys) to sign up (and pay
+        for) a developer account that will give you an API key that you can use with
+        this package.
+        :::
+
+        ::: {.callout-note}
+        ## Python requirements
+
+        `ChatAnthropic` requires the `anthropic` package: `pip install "chatlas[anthropic]"`.
+        :::
+
+        Examples
+        --------
+
+        ```python
+        import os
+        from chatlas import ChatAnthropic
+
+        chat = ChatAnthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
+        chat.chat("What is the capital of France?")
+        ```
+
+        Parameters
+        ----------
+        system_prompt
+            A system prompt to set the behavior of the assistant.
+        model
+            The model to use for the chat. The default, None, will pick a reasonable
+            default, and warn you about it. We strongly recommend explicitly
+            choosing a model for all but the most casual use.
+        max_tokens
+            Maximum number of tokens to generate before stopping.
+        cache
+            How long to cache inputs? Defaults to "5m" (five minutes).
+            Set to "none" to disable caching or "1h" to cache for one hour.
+            See the Caching section for details.
+        reasoning
+            Determines how many tokens Claude can be allocated to reasoning. Must be
+            ≥1024 and less than `max_tokens`. Larger budgets can enable more
+            thorough analysis for complex problems, improving response quality.  See
+            [extended
+            thinking](https://docs.claude.com/en/docs/build-with-claude/extended-thinking)
+            for details.
+        api_key
+            The API key to use for authentication. You generally should not supply
+            this directly, but instead set the `ANTHROPIC_API_KEY` environment
+            variable.
+        kwargs
+            Additional arguments to pass to the `anthropic.Anthropic()` client
+            constructor.
+
+        Returns
+        -------
+        Chat
+            A Chat object.
+
+        Note
+        ----
+        Pasting an API key into a chat constructor (e.g., `ChatAnthropic(api_key="...")`)
+        is the simplest way to get started, and is fine for interactive use, but is
+        problematic for code that may be shared with others.
+
+        Instead, consider using environment variables or a configuration file to manage
+        your credentials. One popular way to manage credentials is to use a `.env` file
+        to store your credentials, and then use the `python-dotenv` package to load them
+        into your environment.
+
+        ```shell
+        pip install python-dotenv
+        ```
+
+        ```shell
+        # .env
+        ANTHROPIC_API_KEY=...
+        ```
+
+        ```python
+        from chatlas import ChatAnthropic
+        from dotenv import load_dotenv
+
+        load_dotenv()
+        chat = ChatAnthropic()
+        chat.console()
+        ```
+
+        Another, more general, solution is to load your environment variables into the shell
+        before starting Python (maybe in a `.bashrc`, `.zshrc`, etc. file):
+
+        ```shell
+        export ANTHROPIC_API_KEY=...
+        ```
+
+        Caching
+        -------
+
+        Caching with Claude is a bit more complicated than other providers but we
+        believe that on average it will save you both money and time, so we have
+        enabled it by default. With other providers, like OpenAI and Google,
+        you only pay for cache reads, which cost 10% of the normal price. With
+        Claude, you also pay for cache writes, which cost 125% of the normal price
+        for 5 minute caching and 200% of the normal price for 1 hour caching.
+
+        How does this affect the total cost of a conversation? Imagine the first
+        turn sends 1000 input tokens and receives 200 output tokens. The second
+        turn must first send both the input and output from the previous turn
+        (1200 tokens). It then sends a further 1000 tokens and receives 200 tokens
+        back.
+
+        To compare the prices of these two approaches we can ignore the cost of
+        output tokens, because they are the same for both. How much will the input
+        tokens cost? If we don't use caching, we send 1000 tokens in the first turn
+        and 2200 (1000 + 200 + 1000) tokens in the second turn for a total of 3200
+        tokens. If we use caching, we'll send (the equivalent of) 1000 * 1.25 = 1250
+        tokens in the first turn. In the second turn, 1000 of the input tokens will
+        be cached so the total cost is 1000 * 0.1 + (200 + 1000) * 1.25 = 1600
+        tokens. That makes a total of 2850 tokens, i.e. 11% fewer tokens,
+        decreasing the overall cost.
+
+        Obviously, the details will vary from conversation to conversation, but
+        if you have a large system prompt that you re-use many times you should
+        expect to see larger savings. You can see exactly how many input and
+        cache input tokens each turn uses, along with the total cost,
+        with `chat.get_tokens()`. If you don't see savings for your use case, you can
+        suppress caching with `cache="none"`.
+
+        Note: Claude will only cache longer prompts, with caching requiring at least
+        1024-4096 tokens, depending on the model. So don't be surprised if you
+        don't see any differences with caching if you have a short prompt.
+
+        See all the details at
+        <https://docs.claude.com/en/docs/build-with-claude/prompt-caching>.
     """
 
     if model is None:
         model = log_model_default("claude-sonnet-4-0")
 
+    kwargs_chat: "SubmitInputArgs" = {}
+    if reasoning is not None:
+        if isinstance(reasoning, int):
+            reasoning = {"type": "enabled", "budget_tokens": reasoning}
+        kwargs_chat = {"thinking": reasoning}
+
     return Chat(
         provider=AnthropicProvider(
             api_key=api_key,
@@ -229,6 +248,7 @@ def ChatAnthropic(
             kwargs=kwargs,
         ),
         system_prompt=system_prompt,
+        kwargs_chat=kwargs_chat,
     )
 
 
@@ -451,6 +471,12 @@ def stream_merge_chunks(self, completion, chunk):
                 if not isinstance(this_content.input, str):
                     this_content.input = ""  # type: ignore
                 this_content.input += json_delta  # type: ignore
+            elif chunk.delta.type == "thinking_delta":
+                this_content = cast("ThinkingBlock", this_content)
+                this_content.thinking += chunk.delta.thinking
+            elif chunk.delta.type == "signature_delta":
+                this_content = cast("ThinkingBlock", this_content)
+                this_content.signature += chunk.delta.signature
         elif chunk.type == "content_block_stop":
             this_content = completion.content[chunk.index]
             if this_content.type == "tool_use" and isinstance(this_content.input, str):
@@ -656,6 +682,13 @@ def _as_content_block(content: Content) -> "ContentBlockParam":
                 res["content"] = content.get_model_value()  # type: ignore
 
             return res
+        elif isinstance(content, ContentThinking):
+            extra = content.extra or {}
+            return {
+                "type": "thinking",
+                "thinking": content.thinking,
+                "signature": extra.get("signature", ""),
+            }
 
         raise ValueError(f"Unknown content type: {type(content)}")
 
@@ -709,6 +742,13 @@ def _as_turn(self, completion: Message, has_data_model=False) -> Turn:
                             arguments=content.input,
                         )
                     )
+            elif content.type == "thinking":
+                contents.append(
+                    ContentThinking(
+                        thinking=content.thinking,
+                        extra={"signature": content.signature},
+                    )
+                )
 
         return Turn(
             "assistant",