diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f61be0e..8276d3d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,18 +12,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### New features * Added new family of functions (`parallel_chat()`, `parallel_chat_text()`, and `parallel_chat_structured()`) for submitting multiple prompts at once with some basic rate limiting toggles. (#188) +* `ChatOpenAI()` and `ChatAzureOpenAI()` gain access to latest models, built-in tools, image generation, etc. as a result of moving to the new [Responses API](https://platform.openai.com/docs/api-reference/responses). (#192) +* `ChatOpenAI()`, `ChatAnthropic()`, and `ChatGoogle()` gain a new `reasoning` parameter to easily opt-into, and fully customize, reasoning capabilities. (#202) + * A new `ContentThinking` content type was added and captures the "thinking" portion of a reasoning model. (#192) +* `ChatAnthropic()` and `ChatBedrockAnthropic()` gain new `cache` parameter to control caching. By default it is set to "5m". This should (on average) reduce the cost of your chats. (#215) * Added support for systematic evaluation via [Inspect AI](https://inspect.aisi.org.uk/). This includes: * A new `.export_eval()` method for exporting conversation history as an Inspect eval dataset sample. This supports multi-turn conversations, tool calls, images, PDFs, and structured data. * A new `.to_solver()` method for translating chat instances into Inspect solvers that can be used with Inspect's evaluation framework. * A new `Turn.to_inspect_messages()` method for converting turns to Inspect's message format. * Comprehensive documentation in the [Evals guide](https://posit-dev.github.io/chatlas/misc/evals.html). -* `ChatOpenAI()` (and `ChatAzureOpenAI()`) gain access to latest models, built-in tools, etc. as a result of moving to the new [Responses API](https://platform.openai.com/docs/api-reference/responses). (#192) -* Added rudimentary support for a new `ContentThinking` type. (#192) -* `ChatAnthropic()` and `ChatBedrockAnthropic()` gain new `cache` parameter to control caching. By default it is set to "5m". This should (on average) reduce the cost of your chats. (#215) ### Changes -* `ChatOpenAI()` (and `ChatAzureOpenAI()`) move from OpenAI's Completions API to [Responses API](https://platform.openai.com/docs/api-reference/responses). If this happens to break behavior, change `ChatOpenAI()` -> `ChatOpenAICompletions()` (or `ChatAzureOpenAI()` -> `ChatAzureOpenAICompletions()`). (#192) +* `ChatOpenAI()` and `ChatAzureOpenAI()` move from OpenAI's Completions API to [Responses API](https://platform.openai.com/docs/api-reference/responses). If this happens to break behavior, change `ChatOpenAI()` -> `ChatOpenAICompletions()` (or `ChatAzureOpenAI()` -> `ChatAzureOpenAICompletions()`). (#192) * The `.set_model_params()` method no longer accepts `kwargs`. Instead, use the new `chat.kwargs_chat` attribute to set chat input parameters that persist across the chat session. (#212) * `Provider` implementations now require an additional `.value_tokens()` method. Previously, it was assumed that token info was logged and attached to the `Turn` as part of the `.value_turn()` method. The logging and attaching is now handled automatically. (#194) diff --git a/chatlas/_provider_anthropic.py b/chatlas/_provider_anthropic.py index fb5eb1a6..0d70393c 100644 --- a/chatlas/_provider_anthropic.py +++ b/chatlas/_provider_anthropic.py @@ -17,6 +17,7 @@ ContentJson, ContentPDF, ContentText, + ContentThinking, ContentToolRequest, ContentToolResult, ContentToolResultImage, @@ -41,6 +42,8 @@ MessageParam, RawMessageStreamEvent, TextBlock, + ThinkingBlock, + ThinkingBlockParam, ToolParam, ToolUseBlock, ) @@ -51,6 +54,7 @@ from anthropic.types.messages.batch_create_params import Request as BatchRequest from anthropic.types.model_param import ModelParam from anthropic.types.text_block_param import TextBlockParam + from anthropic.types.thinking_config_enabled_param import ThinkingConfigEnabledParam from anthropic.types.tool_result_block_param import ToolResultBlockParam from anthropic.types.tool_use_block_param import ToolUseBlockParam @@ -62,6 +66,7 @@ ToolUseBlockParam, ToolResultBlockParam, DocumentBlockParam, + ThinkingBlockParam, ] else: Message = object @@ -72,9 +77,10 @@ def ChatAnthropic( *, system_prompt: Optional[str] = None, model: "Optional[ModelParam]" = None, - api_key: Optional[str] = None, max_tokens: int = 4096, cache: Literal["5m", "1h", "none"] = "5m", + reasoning: Optional["int | ThinkingConfigEnabledParam"] = None, + api_key: Optional[str] = None, kwargs: Optional["ChatClientArgs"] = None, ) -> Chat["SubmitInputArgs", Message]: """ @@ -121,16 +127,23 @@ def ChatAnthropic( The model to use for the chat. The default, None, will pick a reasonable default, and warn you about it. We strongly recommend explicitly choosing a model for all but the most casual use. - api_key - The API key to use for authentication. You generally should not supply - this directly, but instead set the `ANTHROPIC_API_KEY` environment - variable. max_tokens Maximum number of tokens to generate before stopping. cache How long to cache inputs? Defaults to "5m" (five minutes). Set to "none" to disable caching or "1h" to cache for one hour. See the Caching section for details. + reasoning + Determines how many tokens Claude can be allocated to reasoning. Must be + ≥1024 and less than `max_tokens`. Larger budgets can enable more + thorough analysis for complex problems, improving response quality. See + [extended + thinking](https://docs.claude.com/en/docs/build-with-claude/extended-thinking) + for details. + api_key + The API key to use for authentication. You generally should not supply + this directly, but instead set the `ANTHROPIC_API_KEY` environment + variable. kwargs Additional arguments to pass to the `anthropic.Anthropic()` client constructor. @@ -220,6 +233,12 @@ def ChatAnthropic( if model is None: model = log_model_default("claude-sonnet-4-0") + kwargs_chat: "SubmitInputArgs" = {} + if reasoning is not None: + if isinstance(reasoning, int): + reasoning = {"type": "enabled", "budget_tokens": reasoning} + kwargs_chat = {"thinking": reasoning} + return Chat( provider=AnthropicProvider( api_key=api_key, @@ -229,6 +248,7 @@ def ChatAnthropic( kwargs=kwargs, ), system_prompt=system_prompt, + kwargs_chat=kwargs_chat, ) @@ -425,8 +445,11 @@ def _structured_tool_call(**kwargs: Any): return kwargs_full def stream_text(self, chunk) -> Optional[str]: - if chunk.type == "content_block_delta" and chunk.delta.type == "text_delta": - return chunk.delta.text + if chunk.type == "content_block_delta": + if chunk.delta.type == "text_delta": + return chunk.delta.text + if chunk.delta.type == "thinking_delta": + return chunk.delta.thinking return None def stream_merge_chunks(self, completion, chunk): @@ -451,6 +474,12 @@ def stream_merge_chunks(self, completion, chunk): if not isinstance(this_content.input, str): this_content.input = "" # type: ignore this_content.input += json_delta # type: ignore + elif chunk.delta.type == "thinking_delta": + this_content = cast("ThinkingBlock", this_content) + this_content.thinking += chunk.delta.thinking + elif chunk.delta.type == "signature_delta": + this_content = cast("ThinkingBlock", this_content) + this_content.signature += chunk.delta.signature elif chunk.type == "content_block_stop": this_content = completion.content[chunk.index] if this_content.type == "tool_use" and isinstance(this_content.input, str): @@ -656,6 +685,13 @@ def _as_content_block(content: Content) -> "ContentBlockParam": res["content"] = content.get_model_value() # type: ignore return res + elif isinstance(content, ContentThinking): + extra = content.extra or {} + return { + "type": "thinking", + "thinking": content.thinking, + "signature": extra.get("signature", ""), + } raise ValueError(f"Unknown content type: {type(content)}") @@ -709,6 +745,13 @@ def _as_turn(self, completion: Message, has_data_model=False) -> Turn: arguments=content.input, ) ) + elif content.type == "thinking": + contents.append( + ContentThinking( + thinking=content.thinking, + extra={"signature": content.signature}, + ) + ) return Turn( "assistant", diff --git a/chatlas/_provider_google.py b/chatlas/_provider_google.py index eea999ee..ac0936d9 100644 --- a/chatlas/_provider_google.py +++ b/chatlas/_provider_google.py @@ -34,6 +34,7 @@ GenerateContentResponseDict, Part, PartDict, + ThinkingConfigDict, ) from .types.google import ChatClientArgs, SubmitInputArgs @@ -45,6 +46,7 @@ def ChatGoogle( *, system_prompt: Optional[str] = None, model: Optional[str] = None, + reasoning: Optional["int | ThinkingConfigDict"] = None, api_key: Optional[str] = None, kwargs: Optional["ChatClientArgs"] = None, ) -> Chat["SubmitInputArgs", GenerateContentResponse]: @@ -86,6 +88,10 @@ def ChatGoogle( The model to use for the chat. The default, None, will pick a reasonable default, and warn you about it. We strongly recommend explicitly choosing a model for all but the most casual use. + reasoning + If provided, enables reasoning (a.k.a. "thoughts") in the model's + responses. This can be an integer number of tokens to use for reasoning, + or a full `ThinkingConfigDict` to customize the reasoning behavior. api_key The API key to use for authentication. You generally should not supply this directly, but instead set the `GOOGLE_API_KEY` environment variable. @@ -137,14 +143,20 @@ def ChatGoogle( if model is None: model = log_model_default("gemini-2.5-flash") + kwargs_chat: "SubmitInputArgs" = {} + if reasoning is not None: + if isinstance(reasoning, int): + reasoning = {"thinking_budget": reasoning, "include_thoughts": True} + kwargs_chat["config"] = {"thinking_config": reasoning} + return Chat( provider=GoogleProvider( model=model, api_key=api_key, - name="Google/Gemini", kwargs=kwargs, ), system_prompt=system_prompt, + kwargs_chat=kwargs_chat, ) @@ -367,7 +379,7 @@ def value_tokens(self, completion): cached = usage.cached_content_token_count or 0 return ( (usage.prompt_token_count or 0) - cached, - usage.candidates_token_count or 0, + (usage.candidates_token_count or 0) + (usage.thoughts_token_count or 0), usage.cached_content_token_count or 0, ) diff --git a/chatlas/_provider_openai.py b/chatlas/_provider_openai.py index 255aa6bd..2e84e980 100644 --- a/chatlas/_provider_openai.py +++ b/chatlas/_provider_openai.py @@ -35,6 +35,8 @@ ) from openai.types.responses.easy_input_message_param import EasyInputMessageParam from openai.types.responses.tool_param import ToolParam + from openai.types.shared.reasoning_effort import ReasoningEffort + from openai.types.shared_params.reasoning import Reasoning from openai.types.shared_params.responses_model import ResponsesModel from .types.openai import ChatClientArgs @@ -47,8 +49,9 @@ def ChatOpenAI( *, system_prompt: Optional[str] = None, model: "Optional[ResponsesModel | str]" = None, - api_key: Optional[str] = None, base_url: str = "https://api.openai.com/v1", + reasoning: "Optional[ReasoningEffort | Reasoning]" = None, + api_key: Optional[str] = None, kwargs: Optional["ChatClientArgs"] = None, ) -> Chat["SubmitInputArgs", Response]: """ @@ -87,12 +90,15 @@ def ChatOpenAI( The model to use for the chat. The default, None, will pick a reasonable default, and warn you about it. We strongly recommend explicitly choosing a model for all but the most casual use. + base_url + The base URL to the endpoint; the default uses OpenAI. + reasoning + The reasoning effort to use (for reasoning-capable models like the o and + gpt-5 series). api_key The API key to use for authentication. You generally should not supply this directly, but instead set the `OPENAI_API_KEY` environment variable. - base_url - The base URL to the endpoint; the default uses OpenAI. kwargs Additional arguments to pass to the `openai.OpenAI()` client constructor. @@ -146,6 +152,14 @@ def ChatOpenAI( if model is None: model = log_model_default("gpt-4.1") + kwargs_chat: "SubmitInputArgs" = {} + if reasoning is not None: + if not is_reasoning_model(model): + warnings.warn(f"Model {model} is not reasoning-capable", UserWarning) + if isinstance(reasoning, str): + reasoning = {"effort": reasoning, "summary": "auto"} + kwargs_chat = {"reasoning": reasoning} + return Chat( provider=OpenAIProvider( api_key=api_key, @@ -154,6 +168,7 @@ def ChatOpenAI( kwargs=kwargs, ), system_prompt=system_prompt, + kwargs_chat=kwargs_chat, ) @@ -239,7 +254,7 @@ def _chat_perform_args( # Request reasoning content for reasoning models include = [] - if self._is_reasoning(self.model): + if is_reasoning_model(self.model): include.append("reasoning.encrypted_content") if "log_probs" in kwargs_full: @@ -254,7 +269,14 @@ def _chat_perform_args( def stream_text(self, chunk): if chunk.type == "response.output_text.delta": + # https://platform.openai.com/docs/api-reference/responses-streaming/response/output_text/delta + return chunk.delta + if chunk.type == "response.reasoning_summary_text.delta": + # https://platform.openai.com/docs/api-reference/responses-streaming/response/reasoning_summary_text/delta return chunk.delta + if chunk.type == "response.reasoning_summary_text.done": + # https://platform.openai.com/docs/api-reference/responses-streaming/response/reasoning_summary_text/done + return "\n\n" return None def stream_merge_chunks(self, completion, chunk): @@ -337,11 +359,6 @@ def _response_as_turn(completion: Response, has_data_model: bool) -> Turn: completion=completion, ) - @staticmethod - def _is_reasoning(model: str) -> bool: - # https://platform.openai.com/docs/models/compare - return model.startswith("o") or model.startswith("gpt-5") - @staticmethod def _turns_as_inputs(turns: list[Turn]) -> "list[ResponseInputItemParam]": res: "list[ResponseInputItemParam]" = [] @@ -456,3 +473,8 @@ def as_input_param(content: Content, role: Role) -> "ResponseInputItemParam": def as_message(x: "ResponseInputContentParam", role: Role) -> "EasyInputMessageParam": return {"role": role, "content": [x]} + + +def is_reasoning_model(model: str) -> bool: + # https://platform.openai.com/docs/models/compare + return model.startswith("o") or model.startswith("gpt-5")