Fix DeepSeek reasoning content handling for LiteLLM

Harrrryz · Harrrryz · commit 1611a601559d · 2025-12-04T22:44:18.000-05:00
diff --git a/src/agents/extensions/models/litellm_model.py b/src/agents/extensions/models/litellm_model.py
@@ -280,7 +280,9 @@ async def _fetch_response(
         )
 
         converted_messages = Converter.items_to_messages(
-            input, preserve_thinking_blocks=preserve_thinking_blocks
+            input,
+            preserve_thinking_blocks=preserve_thinking_blocks,
+            include_reasoning_content=self._should_include_reasoning_content(model_settings),
         )
 
         # Fix for interleaved thinking bug: reorder messages to ensure tool_use comes before tool_result  # noqa: E501
@@ -436,6 +438,25 @@ async def _fetch_response(
         )
         return response, ret
 
+    def _should_include_reasoning_content(self, model_settings: ModelSettings) -> bool:
+        """Determine whether to forward reasoning_content on assistant messages.
+
+        DeepSeek thinking mode requires reasoning_content to be present on messages with tool
+        calls, otherwise the API returns a 400.
+        """
+        model_name = str(self.model).lower()
+        base_url = (self.base_url or "").lower()
+
+        if "deepseek" in model_name or "deepseek.com" in base_url:
+            return True
+
+        if isinstance(model_settings.extra_body, dict) and "thinking" in model_settings.extra_body:
+            return True
+        if model_settings.extra_args and "thinking" in model_settings.extra_args:
+            return True
+
+        return False
+
     def _fix_tool_message_ordering(
         self, messages: list[ChatCompletionMessageParam]
     ) -> list[ChatCompletionMessageParam]:
diff --git a/src/agents/models/chatcmpl_converter.py b/src/agents/models/chatcmpl_converter.py
@@ -340,6 +340,7 @@ def items_to_messages(
         cls,
         items: str | Iterable[TResponseInputItem],
         preserve_thinking_blocks: bool = False,
+        include_reasoning_content: bool = False,
     ) -> list[ChatCompletionMessageParam]:
         """
         Convert a sequence of 'Item' objects into a list of ChatCompletionMessageParam.
@@ -372,6 +373,21 @@ def items_to_messages(
         result: list[ChatCompletionMessageParam] = []
         current_assistant_msg: ChatCompletionAssistantMessageParam | None = None
         pending_thinking_blocks: list[dict[str, str]] | None = None
+        pending_reasoning_content: str | None = None
+
+        def apply_pending_reasoning_content(
+            message: ChatCompletionAssistantMessageParam,
+        ) -> None:
+            nonlocal pending_reasoning_content
+            if (
+                not include_reasoning_content
+                or pending_reasoning_content is None
+                or "reasoning_content" in message
+            ):
+                return
+
+            cast(dict[str, Any], message)["reasoning_content"] = pending_reasoning_content
+            pending_reasoning_content = None
 
         def flush_assistant_message() -> None:
             nonlocal current_assistant_msg
@@ -387,6 +403,9 @@ def ensure_assistant_message() -> ChatCompletionAssistantMessageParam:
             if current_assistant_msg is None:
                 current_assistant_msg = ChatCompletionAssistantMessageParam(role="assistant")
                 current_assistant_msg["tool_calls"] = []
+                apply_pending_reasoning_content(current_assistant_msg)
+            else:
+                apply_pending_reasoning_content(current_assistant_msg)
 
             return current_assistant_msg
 
@@ -479,6 +498,7 @@ def ensure_assistant_message() -> ChatCompletionAssistantMessageParam:
                     new_asst["content"] = combined
 
                 new_asst["tool_calls"] = []
+                apply_pending_reasoning_content(new_asst)
                 current_assistant_msg = new_asst
 
             # 4) function/file-search calls => attach to assistant
@@ -556,6 +576,32 @@ def ensure_assistant_message() -> ChatCompletionAssistantMessageParam:
 
             # 7) reasoning message => extract thinking blocks if present
             elif reasoning_item := cls.maybe_reasoning_message(item):
+                # Capture reasoning content if present so we can attach it to the next assistant
+                # message (required by some providers for tool calls).
+                summary_items = reasoning_item.get("summary")
+                if (
+                    include_reasoning_content
+                    and isinstance(summary_items, list)
+                    and len(summary_items) > 0
+                ):
+                    reasoning_text = summary_items[0].get("text")
+                    if reasoning_text is not None:
+                        pending_reasoning_content = reasoning_text
+                if (
+                    include_reasoning_content
+                    and pending_reasoning_content is None
+                    and isinstance(reasoning_item.get("content"), list)
+                ):
+                    reasoning_texts = [
+                        content_item.get("text")
+                        for content_item in cast(list[dict[str, Any]], reasoning_item["content"])
+                        if isinstance(content_item, dict)
+                        and content_item.get("type") == "reasoning_text"
+                        and content_item.get("text") is not None
+                    ]
+                    if reasoning_texts:
+                        pending_reasoning_content = "".join(cast(list[str], reasoning_texts))
+
                 # Reconstruct thinking blocks from content (text) and encrypted_content (signature)
                 content_items = reasoning_item.get("content", [])
                 encrypted_content = reasoning_item.get("encrypted_content")
diff --git a/tests/test_anthropic_thinking_blocks.py b/tests/test_anthropic_thinking_blocks.py
@@ -16,6 +16,7 @@
 from openai.types.chat.chat_completion_message_tool_call import Function
 
 from agents.extensions.models.litellm_model import InternalChatCompletionMessage
+from agents.items import TResponseInputItem
 from agents.models.chatcmpl_converter import Converter
 
 
@@ -58,7 +59,7 @@ def test_converter_skips_reasoning_items():
     ]
 
     # Convert to messages
-    messages = Converter.items_to_messages(test_items)  # type: ignore[arg-type]
+    messages = Converter.items_to_messages(cast(list[TResponseInputItem], test_items))
 
     # Should have user message and assistant message, but no reasoning content
     assert len(messages) == 2
@@ -242,3 +243,34 @@ def test_anthropic_thinking_blocks_with_tool_calls():
     tool_calls = assistant_msg.get("tool_calls", [])
     assert len(cast(list[Any], tool_calls)) == 1, "Tool calls should be preserved"
     assert cast(list[Any], tool_calls)[0]["function"]["name"] == "get_weather"
+
+
+def test_reasoning_content_added_when_enabled():
+    """
+    Verify reasoning content is attached to the assistant tool-call message when requested.
+    """
+    test_items: list[dict[str, Any]] = [
+        {"role": "user", "content": "Hello"},
+        {
+            "id": "reasoning_123",
+            "type": "reasoning",
+            "summary": [{"text": "Thinking about the weather", "type": "summary_text"}],
+        },
+        {
+            "id": "call_123",
+            "type": "function_call",
+            "name": "get_weather",
+            "arguments": '{"city": "Tokyo"}',
+            "call_id": "call_123",
+        },
+    ]
+
+    messages = Converter.items_to_messages(
+        cast(list[TResponseInputItem], test_items),
+        include_reasoning_content=True,
+    )
+
+    assistant_msg = next(msg for msg in messages if msg.get("role") == "assistant")
+    assert assistant_msg.get("reasoning_content") == "Thinking about the weather"
+    tool_calls = assistant_msg.get("tool_calls")
+    assert tool_calls and len(cast(list[Any], tool_calls)) == 1