Merge branch 'main' into fix/deepseek-litellm-reasoning-content

Harrrryz · web-flow · commit f6c770aa7e95 · 2025-12-11T21:36:15.000-05:00
diff --git a/examples/basic/stream_items.py b/examples/basic/stream_items.py
@@ -31,7 +31,7 @@ async def main():
             continue
         elif event.type == "run_item_stream_event":
             if event.item.type == "tool_call_item":
-                print("-- Tool was called")
+                print(f"-- Tool was called: {getattr(event.item.raw_item, 'name', 'Unknown Tool')}")
             elif event.item.type == "tool_call_output_item":
                 print(f"-- Tool output: {event.item.output}")
             elif event.item.type == "message_output_item":
@@ -47,7 +47,7 @@ async def main():
 
     # === Run starting ===
     # Agent updated: Joker
-    # -- Tool was called
+    # -- Tool was called: how_many_jokes
     # -- Tool output: 4
     # -- Message output:
     #  Sure, here are four jokes for you:
diff --git a/examples/realtime/cli/demo.py b/examples/realtime/cli/demo.py
@@ -25,6 +25,7 @@
 ENERGY_THRESHOLD = 0.015  # RMS threshold for barge‑in while assistant is speaking
 PREBUFFER_CHUNKS = 3  # initial jitter buffer (~120ms with 40ms chunks)
 FADE_OUT_MS = 12  # short fade to avoid clicks when interrupting
+PLAYBACK_ECHO_MARGIN = 0.002  # extra energy above playback echo required to count as speech
 
 # Set up logging for OpenAI agents SDK
 # logging.basicConfig(
@@ -78,6 +79,7 @@ def __init__(self) -> None:
         self.fade_total_samples = 0
         self.fade_done_samples = 0
         self.fade_samples = int(SAMPLE_RATE * (FADE_OUT_MS / 1000.0))
+        self.playback_rms = 0.0  # smoothed playback energy to filter out echo
 
     def _output_callback(self, outdata, frames: int, time, status) -> None:
         """Callback for audio output - handles continuous audio stream from server."""
@@ -123,6 +125,7 @@ def _output_callback(self, outdata, frames: int, time, status) -> None:
                 gain = 1.0 - (idx / float(self.fade_total_samples))
                 ramped = np.clip(src * gain, -32768.0, 32767.0).astype(np.int16)
                 outdata[samples_filled : samples_filled + n, 0] = ramped
+                self._update_playback_rms(ramped)
 
                 # Optionally report played bytes (ramped) to playback tracker
                 try:
@@ -183,6 +186,7 @@ def _output_callback(self, outdata, frames: int, time, status) -> None:
                 chunk_data = samples[self.chunk_position : self.chunk_position + samples_to_copy]
                 # More efficient: direct assignment for mono audio instead of reshape
                 outdata[samples_filled : samples_filled + samples_to_copy, 0] = chunk_data
+                self._update_playback_rms(chunk_data)
                 samples_filled += samples_to_copy
                 self.chunk_position += samples_to_copy
 
@@ -273,14 +277,6 @@ async def capture_audio(self) -> None:
         read_size = int(SAMPLE_RATE * CHUNK_LENGTH_S)
 
         try:
-            # Simple energy-based barge-in: if user speaks while audio is playing, interrupt.
-            def rms_energy(samples: np.ndarray[Any, np.dtype[Any]]) -> float:
-                if samples.size == 0:
-                    return 0.0
-                # Normalize int16 to [-1, 1]
-                x = samples.astype(np.float32) / 32768.0
-                return float(np.sqrt(np.mean(x * x)))
-
             while self.recording:
                 # Check if there's enough data to read
                 if self.audio_stream.read_available < read_size:
@@ -300,7 +296,13 @@ def rms_energy(samples: np.ndarray[Any, np.dtype[Any]]) -> float:
                 if assistant_playing:
                     # Compute RMS energy to detect speech while assistant is talking
                     samples = data.reshape(-1)
-                    if rms_energy(samples) >= ENERGY_THRESHOLD:
+                    mic_rms = self._compute_rms(samples)
+                    # Require the mic to be louder than the echo of the assistant playback.
+                    playback_gate = max(
+                        ENERGY_THRESHOLD,
+                        self.playback_rms * 0.6 + PLAYBACK_ECHO_MARGIN,
+                    )
+                    if mic_rms >= playback_gate:
                         # Locally flush queued assistant audio for snappier interruption.
                         self.interrupt_event.set()
                         await self.session.send_audio(audio_bytes)
@@ -356,6 +358,18 @@ async def _on_event(self, event: RealtimeSessionEvent) -> None:
         except Exception as e:
             print(f"Error processing event: {_truncate_str(str(e), 200)}")
 
+    def _compute_rms(self, samples: np.ndarray[Any, np.dtype[Any]]) -> float:
+        """Compute RMS energy for int16 samples normalized to [-1, 1]."""
+        if samples.size == 0:
+            return 0.0
+        x = samples.astype(np.float32) / 32768.0
+        return float(np.sqrt(np.mean(x * x)))
+
+    def _update_playback_rms(self, samples: np.ndarray[Any, np.dtype[Any]]) -> None:
+        """Keep a smoothed estimate of playback energy to filter out echo feedback."""
+        sample_rms = self._compute_rms(samples)
+        self.playback_rms = 0.9 * self.playback_rms + 0.1 * sample_rms
+
 
 if __name__ == "__main__":
     demo = NoUIDemo()
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,13 +1,13 @@
 [project]
 name = "openai-agents"
-version = "0.6.2"
+version = "0.6.3"
 description = "OpenAI Agents SDK"
 readme = "README.md"
 requires-python = ">=3.9"
 license = "MIT"
 authors = [{ name = "OpenAI", email = "support@openai.com" }]
 dependencies = [
-    "openai>=2.8.0,<3",
+    "openai>=2.9.0,<3",
     "pydantic>=2.12.3, <3",
     "griffe>=1.5.6, <2",
     "typing-extensions>=4.12.2, <5",
diff --git a/src/agents/_run_impl.py b/src/agents/_run_impl.py
@@ -1826,7 +1826,10 @@ async def execute(
         output_text = ""
 
         try:
-            operation = _coerce_apply_patch_operation(call.tool_call)
+            operation = _coerce_apply_patch_operation(
+                call.tool_call,
+                context_wrapper=context_wrapper,
+            )
             editor = apply_patch_tool.editor
             if operation.type == "create_file":
                 result = editor.create_file(operation)
@@ -2093,7 +2096,9 @@ def _extract_apply_patch_call_id(tool_call: Any) -> str:
     return str(value)
 
 
-def _coerce_apply_patch_operation(tool_call: Any) -> ApplyPatchOperation:
+def _coerce_apply_patch_operation(
+    tool_call: Any, *, context_wrapper: RunContextWrapper[Any]
+) -> ApplyPatchOperation:
     raw_operation = _get_mapping_or_attr(tool_call, "operation")
     if raw_operation is None:
         raise ModelBehaviorError("Apply patch call is missing an operation payload.")
@@ -2117,7 +2122,12 @@ def _coerce_apply_patch_operation(tool_call: Any) -> ApplyPatchOperation:
     else:
         diff = None
 
-    return ApplyPatchOperation(type=op_type_literal, path=str(path), diff=diff)
+    return ApplyPatchOperation(
+        type=op_type_literal,
+        path=str(path),
+        diff=diff,
+        ctx_wrapper=context_wrapper,
+    )
 
 
 def _normalize_apply_patch_result(
diff --git a/src/agents/editor.py b/src/agents/editor.py
@@ -4,6 +4,7 @@
 from dataclasses import dataclass
 from typing import Literal, Protocol, runtime_checkable
 
+from .run_context import RunContextWrapper
 from .util._types import MaybeAwaitable
 
 ApplyPatchOperationType = Literal["create_file", "update_file", "delete_file"]
@@ -18,6 +19,7 @@ class ApplyPatchOperation:
     type: ApplyPatchOperationType
     path: str
     diff: str | None = None
+    ctx_wrapper: RunContextWrapper | None = None
 
 
 @dataclass(**_DATACLASS_KWARGS)
diff --git a/src/agents/models/chatcmpl_helpers.py b/src/agents/models/chatcmpl_helpers.py
@@ -3,6 +3,12 @@
 from contextvars import ContextVar
 
 from openai import AsyncOpenAI
+from openai.types.chat.chat_completion_token_logprob import ChatCompletionTokenLogprob
+from openai.types.responses.response_output_text import Logprob, LogprobTopLogprob
+from openai.types.responses.response_text_delta_event import (
+    Logprob as DeltaLogprob,
+    LogprobTopLogprob as DeltaTopLogprob,
+)
 
 from ..model_settings import ModelSettings
 from ..version import __version__
@@ -41,3 +47,54 @@ def get_stream_options_param(
         )
         stream_options = {"include_usage": include_usage} if include_usage is not None else None
         return stream_options
+
+    @classmethod
+    def convert_logprobs_for_output_text(
+        cls, logprobs: list[ChatCompletionTokenLogprob] | None
+    ) -> list[Logprob] | None:
+        if not logprobs:
+            return None
+
+        converted: list[Logprob] = []
+        for token_logprob in logprobs:
+            converted.append(
+                Logprob(
+                    token=token_logprob.token,
+                    logprob=token_logprob.logprob,
+                    bytes=token_logprob.bytes or [],
+                    top_logprobs=[
+                        LogprobTopLogprob(
+                            token=top_logprob.token,
+                            logprob=top_logprob.logprob,
+                            bytes=top_logprob.bytes or [],
+                        )
+                        for top_logprob in token_logprob.top_logprobs
+                    ],
+                )
+            )
+        return converted
+
+    @classmethod
+    def convert_logprobs_for_text_delta(
+        cls, logprobs: list[ChatCompletionTokenLogprob] | None
+    ) -> list[DeltaLogprob] | None:
+        if not logprobs:
+            return None
+
+        converted: list[DeltaLogprob] = []
+        for token_logprob in logprobs:
+            converted.append(
+                DeltaLogprob(
+                    token=token_logprob.token,
+                    logprob=token_logprob.logprob,
+                    top_logprobs=[
+                        DeltaTopLogprob(
+                            token=top_logprob.token,
+                            logprob=top_logprob.logprob,
+                        )
+                        for top_logprob in token_logprob.top_logprobs
+                    ]
+                    or None,
+                )
+            )
+        return converted
diff --git a/src/agents/models/chatcmpl_stream_handler.py b/src/agents/models/chatcmpl_stream_handler.py
@@ -42,6 +42,7 @@
 from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails
 
 from ..items import TResponseStreamEvent
+from .chatcmpl_helpers import ChatCmplHelpers
 from .fake_id import FAKE_RESPONSES_ID
 
 
@@ -105,6 +106,7 @@ async def handle_stream(
                 continue
 
             delta = chunk.choices[0].delta
+            choice_logprobs = chunk.choices[0].logprobs
 
             # Handle thinking blocks from Anthropic (for preserving signatures)
             if hasattr(delta, "thinking_blocks") and delta.thinking_blocks:
@@ -266,6 +268,15 @@ async def handle_stream(
                         type="response.content_part.added",
                         sequence_number=sequence_number.get_and_increment(),
                     )
+                delta_logprobs = (
+                    ChatCmplHelpers.convert_logprobs_for_text_delta(
+                        choice_logprobs.content if choice_logprobs else None
+                    )
+                    or []
+                )
+                output_logprobs = ChatCmplHelpers.convert_logprobs_for_output_text(
+                    choice_logprobs.content if choice_logprobs else None
+                )
                 # Emit the delta for this segment of content
                 yield ResponseTextDeltaEvent(
                     content_index=state.text_content_index_and_output[0],
@@ -275,10 +286,15 @@ async def handle_stream(
                     is not None,  # fixed 0 -> 0 or 1
                     type="response.output_text.delta",
                     sequence_number=sequence_number.get_and_increment(),
-                    logprobs=[],
+                    logprobs=delta_logprobs,
                 )
                 # Accumulate the text into the response part
                 state.text_content_index_and_output[1].text += delta.content
+                if output_logprobs:
+                    existing_logprobs = state.text_content_index_and_output[1].logprobs or []
+                    state.text_content_index_and_output[1].logprobs = (
+                        existing_logprobs + output_logprobs
+                    )
 
             # Handle refusals (model declines to answer)
             # This is always set by the OpenAI API, but not by others e.g. LiteLLM
diff --git a/src/agents/models/openai_chatcompletions.py b/src/agents/models/openai_chatcompletions.py
@@ -9,7 +9,13 @@
 from openai.types import ChatModel
 from openai.types.chat import ChatCompletion, ChatCompletionChunk, ChatCompletionMessage
 from openai.types.chat.chat_completion import Choice
-from openai.types.responses import Response
+from openai.types.responses import (
+    Response,
+    ResponseOutputItem,
+    ResponseOutputMessage,
+    ResponseOutputText,
+)
+from openai.types.responses.response_output_text import Logprob
 from openai.types.responses.response_prompt_param import ResponsePromptParam
 
 from .. import _debug
@@ -119,12 +125,33 @@ async def get_response(
 
             items = Converter.message_to_output_items(message) if message is not None else []
 
+            logprob_models = None
+            if first_choice and first_choice.logprobs and first_choice.logprobs.content:
+                logprob_models = ChatCmplHelpers.convert_logprobs_for_output_text(
+                    first_choice.logprobs.content
+                )
+
+            if logprob_models:
+                self._attach_logprobs_to_output(items, logprob_models)
+
             return ModelResponse(
                 output=items,
                 usage=usage,
                 response_id=None,
             )
 
+    def _attach_logprobs_to_output(
+        self, output_items: list[ResponseOutputItem], logprobs: list[Logprob]
+    ) -> None:
+        for output_item in output_items:
+            if not isinstance(output_item, ResponseOutputMessage):
+                continue
+
+            for content in output_item.content:
+                if isinstance(content, ResponseOutputText):
+                    content.logprobs = logprobs
+                    return
+
     async def stream_response(
         self,
         system_instructions: str | None,
diff --git a/tests/test_apply_patch_tool.py b/tests/test_apply_patch_tool.py
@@ -63,6 +63,7 @@ async def test_apply_patch_tool_success() -> None:
     assert raw_item["status"] == "completed"
     assert raw_item["call_id"] == "call_apply"
     assert editor.operations[0].type == "update_file"
+    assert editor.operations[0].ctx_wrapper is context_wrapper
     assert isinstance(raw_item["output"], str)
     assert raw_item["output"].startswith("Updated tasks.md")
     input_payload = result.to_input_item()
@@ -137,3 +138,4 @@ async def test_apply_patch_tool_accepts_mapping_call() -> None:
     raw_item = cast(dict[str, Any], result.raw_item)
     assert raw_item["call_id"] == "call_mapping"
     assert editor.operations[0].path == "notes.md"
+    assert editor.operations[0].ctx_wrapper is context_wrapper
diff --git a/tests/test_openai_chatcompletions.py b/tests/test_openai_chatcompletions.py
diff --git a/tests/test_openai_chatcompletions_stream.py b/tests/test_openai_chatcompletions_stream.py
diff --git a/uv.lock b/uv.lock