Skip to content

Commit f6c770a

Browse files
authored
Merge branch 'main' into fix/deepseek-litellm-reasoning-content
2 parents e280092 + 71fa12c commit f6c770a

File tree

12 files changed

+328
-24
lines changed

12 files changed

+328
-24
lines changed

examples/basic/stream_items.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ async def main():
3131
continue
3232
elif event.type == "run_item_stream_event":
3333
if event.item.type == "tool_call_item":
34-
print("-- Tool was called")
34+
print(f"-- Tool was called: {getattr(event.item.raw_item, 'name', 'Unknown Tool')}")
3535
elif event.item.type == "tool_call_output_item":
3636
print(f"-- Tool output: {event.item.output}")
3737
elif event.item.type == "message_output_item":
@@ -47,7 +47,7 @@ async def main():
4747

4848
# === Run starting ===
4949
# Agent updated: Joker
50-
# -- Tool was called
50+
# -- Tool was called: how_many_jokes
5151
# -- Tool output: 4
5252
# -- Message output:
5353
# Sure, here are four jokes for you:

examples/realtime/cli/demo.py

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
ENERGY_THRESHOLD = 0.015 # RMS threshold for barge‑in while assistant is speaking
2626
PREBUFFER_CHUNKS = 3 # initial jitter buffer (~120ms with 40ms chunks)
2727
FADE_OUT_MS = 12 # short fade to avoid clicks when interrupting
28+
PLAYBACK_ECHO_MARGIN = 0.002 # extra energy above playback echo required to count as speech
2829

2930
# Set up logging for OpenAI agents SDK
3031
# logging.basicConfig(
@@ -78,6 +79,7 @@ def __init__(self) -> None:
7879
self.fade_total_samples = 0
7980
self.fade_done_samples = 0
8081
self.fade_samples = int(SAMPLE_RATE * (FADE_OUT_MS / 1000.0))
82+
self.playback_rms = 0.0 # smoothed playback energy to filter out echo
8183

8284
def _output_callback(self, outdata, frames: int, time, status) -> None:
8385
"""Callback for audio output - handles continuous audio stream from server."""
@@ -123,6 +125,7 @@ def _output_callback(self, outdata, frames: int, time, status) -> None:
123125
gain = 1.0 - (idx / float(self.fade_total_samples))
124126
ramped = np.clip(src * gain, -32768.0, 32767.0).astype(np.int16)
125127
outdata[samples_filled : samples_filled + n, 0] = ramped
128+
self._update_playback_rms(ramped)
126129

127130
# Optionally report played bytes (ramped) to playback tracker
128131
try:
@@ -183,6 +186,7 @@ def _output_callback(self, outdata, frames: int, time, status) -> None:
183186
chunk_data = samples[self.chunk_position : self.chunk_position + samples_to_copy]
184187
# More efficient: direct assignment for mono audio instead of reshape
185188
outdata[samples_filled : samples_filled + samples_to_copy, 0] = chunk_data
189+
self._update_playback_rms(chunk_data)
186190
samples_filled += samples_to_copy
187191
self.chunk_position += samples_to_copy
188192

@@ -273,14 +277,6 @@ async def capture_audio(self) -> None:
273277
read_size = int(SAMPLE_RATE * CHUNK_LENGTH_S)
274278

275279
try:
276-
# Simple energy-based barge-in: if user speaks while audio is playing, interrupt.
277-
def rms_energy(samples: np.ndarray[Any, np.dtype[Any]]) -> float:
278-
if samples.size == 0:
279-
return 0.0
280-
# Normalize int16 to [-1, 1]
281-
x = samples.astype(np.float32) / 32768.0
282-
return float(np.sqrt(np.mean(x * x)))
283-
284280
while self.recording:
285281
# Check if there's enough data to read
286282
if self.audio_stream.read_available < read_size:
@@ -300,7 +296,13 @@ def rms_energy(samples: np.ndarray[Any, np.dtype[Any]]) -> float:
300296
if assistant_playing:
301297
# Compute RMS energy to detect speech while assistant is talking
302298
samples = data.reshape(-1)
303-
if rms_energy(samples) >= ENERGY_THRESHOLD:
299+
mic_rms = self._compute_rms(samples)
300+
# Require the mic to be louder than the echo of the assistant playback.
301+
playback_gate = max(
302+
ENERGY_THRESHOLD,
303+
self.playback_rms * 0.6 + PLAYBACK_ECHO_MARGIN,
304+
)
305+
if mic_rms >= playback_gate:
304306
# Locally flush queued assistant audio for snappier interruption.
305307
self.interrupt_event.set()
306308
await self.session.send_audio(audio_bytes)
@@ -356,6 +358,18 @@ async def _on_event(self, event: RealtimeSessionEvent) -> None:
356358
except Exception as e:
357359
print(f"Error processing event: {_truncate_str(str(e), 200)}")
358360

361+
def _compute_rms(self, samples: np.ndarray[Any, np.dtype[Any]]) -> float:
362+
"""Compute RMS energy for int16 samples normalized to [-1, 1]."""
363+
if samples.size == 0:
364+
return 0.0
365+
x = samples.astype(np.float32) / 32768.0
366+
return float(np.sqrt(np.mean(x * x)))
367+
368+
def _update_playback_rms(self, samples: np.ndarray[Any, np.dtype[Any]]) -> None:
369+
"""Keep a smoothed estimate of playback energy to filter out echo feedback."""
370+
sample_rms = self._compute_rms(samples)
371+
self.playback_rms = 0.9 * self.playback_rms + 0.1 * sample_rms
372+
359373

360374
if __name__ == "__main__":
361375
demo = NoUIDemo()

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
[project]
22
name = "openai-agents"
3-
version = "0.6.2"
3+
version = "0.6.3"
44
description = "OpenAI Agents SDK"
55
readme = "README.md"
66
requires-python = ">=3.9"
77
license = "MIT"
88
authors = [{ name = "OpenAI", email = "support@openai.com" }]
99
dependencies = [
10-
"openai>=2.8.0,<3",
10+
"openai>=2.9.0,<3",
1111
"pydantic>=2.12.3, <3",
1212
"griffe>=1.5.6, <2",
1313
"typing-extensions>=4.12.2, <5",

src/agents/_run_impl.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1826,7 +1826,10 @@ async def execute(
18261826
output_text = ""
18271827

18281828
try:
1829-
operation = _coerce_apply_patch_operation(call.tool_call)
1829+
operation = _coerce_apply_patch_operation(
1830+
call.tool_call,
1831+
context_wrapper=context_wrapper,
1832+
)
18301833
editor = apply_patch_tool.editor
18311834
if operation.type == "create_file":
18321835
result = editor.create_file(operation)
@@ -2093,7 +2096,9 @@ def _extract_apply_patch_call_id(tool_call: Any) -> str:
20932096
return str(value)
20942097

20952098

2096-
def _coerce_apply_patch_operation(tool_call: Any) -> ApplyPatchOperation:
2099+
def _coerce_apply_patch_operation(
2100+
tool_call: Any, *, context_wrapper: RunContextWrapper[Any]
2101+
) -> ApplyPatchOperation:
20972102
raw_operation = _get_mapping_or_attr(tool_call, "operation")
20982103
if raw_operation is None:
20992104
raise ModelBehaviorError("Apply patch call is missing an operation payload.")
@@ -2117,7 +2122,12 @@ def _coerce_apply_patch_operation(tool_call: Any) -> ApplyPatchOperation:
21172122
else:
21182123
diff = None
21192124

2120-
return ApplyPatchOperation(type=op_type_literal, path=str(path), diff=diff)
2125+
return ApplyPatchOperation(
2126+
type=op_type_literal,
2127+
path=str(path),
2128+
diff=diff,
2129+
ctx_wrapper=context_wrapper,
2130+
)
21212131

21222132

21232133
def _normalize_apply_patch_result(

src/agents/editor.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from dataclasses import dataclass
55
from typing import Literal, Protocol, runtime_checkable
66

7+
from .run_context import RunContextWrapper
78
from .util._types import MaybeAwaitable
89

910
ApplyPatchOperationType = Literal["create_file", "update_file", "delete_file"]
@@ -18,6 +19,7 @@ class ApplyPatchOperation:
1819
type: ApplyPatchOperationType
1920
path: str
2021
diff: str | None = None
22+
ctx_wrapper: RunContextWrapper | None = None
2123

2224

2325
@dataclass(**_DATACLASS_KWARGS)

src/agents/models/chatcmpl_helpers.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,12 @@
33
from contextvars import ContextVar
44

55
from openai import AsyncOpenAI
6+
from openai.types.chat.chat_completion_token_logprob import ChatCompletionTokenLogprob
7+
from openai.types.responses.response_output_text import Logprob, LogprobTopLogprob
8+
from openai.types.responses.response_text_delta_event import (
9+
Logprob as DeltaLogprob,
10+
LogprobTopLogprob as DeltaTopLogprob,
11+
)
612

713
from ..model_settings import ModelSettings
814
from ..version import __version__
@@ -41,3 +47,54 @@ def get_stream_options_param(
4147
)
4248
stream_options = {"include_usage": include_usage} if include_usage is not None else None
4349
return stream_options
50+
51+
@classmethod
52+
def convert_logprobs_for_output_text(
53+
cls, logprobs: list[ChatCompletionTokenLogprob] | None
54+
) -> list[Logprob] | None:
55+
if not logprobs:
56+
return None
57+
58+
converted: list[Logprob] = []
59+
for token_logprob in logprobs:
60+
converted.append(
61+
Logprob(
62+
token=token_logprob.token,
63+
logprob=token_logprob.logprob,
64+
bytes=token_logprob.bytes or [],
65+
top_logprobs=[
66+
LogprobTopLogprob(
67+
token=top_logprob.token,
68+
logprob=top_logprob.logprob,
69+
bytes=top_logprob.bytes or [],
70+
)
71+
for top_logprob in token_logprob.top_logprobs
72+
],
73+
)
74+
)
75+
return converted
76+
77+
@classmethod
78+
def convert_logprobs_for_text_delta(
79+
cls, logprobs: list[ChatCompletionTokenLogprob] | None
80+
) -> list[DeltaLogprob] | None:
81+
if not logprobs:
82+
return None
83+
84+
converted: list[DeltaLogprob] = []
85+
for token_logprob in logprobs:
86+
converted.append(
87+
DeltaLogprob(
88+
token=token_logprob.token,
89+
logprob=token_logprob.logprob,
90+
top_logprobs=[
91+
DeltaTopLogprob(
92+
token=top_logprob.token,
93+
logprob=top_logprob.logprob,
94+
)
95+
for top_logprob in token_logprob.top_logprobs
96+
]
97+
or None,
98+
)
99+
)
100+
return converted

src/agents/models/chatcmpl_stream_handler.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails
4343

4444
from ..items import TResponseStreamEvent
45+
from .chatcmpl_helpers import ChatCmplHelpers
4546
from .fake_id import FAKE_RESPONSES_ID
4647

4748

@@ -105,6 +106,7 @@ async def handle_stream(
105106
continue
106107

107108
delta = chunk.choices[0].delta
109+
choice_logprobs = chunk.choices[0].logprobs
108110

109111
# Handle thinking blocks from Anthropic (for preserving signatures)
110112
if hasattr(delta, "thinking_blocks") and delta.thinking_blocks:
@@ -266,6 +268,15 @@ async def handle_stream(
266268
type="response.content_part.added",
267269
sequence_number=sequence_number.get_and_increment(),
268270
)
271+
delta_logprobs = (
272+
ChatCmplHelpers.convert_logprobs_for_text_delta(
273+
choice_logprobs.content if choice_logprobs else None
274+
)
275+
or []
276+
)
277+
output_logprobs = ChatCmplHelpers.convert_logprobs_for_output_text(
278+
choice_logprobs.content if choice_logprobs else None
279+
)
269280
# Emit the delta for this segment of content
270281
yield ResponseTextDeltaEvent(
271282
content_index=state.text_content_index_and_output[0],
@@ -275,10 +286,15 @@ async def handle_stream(
275286
is not None, # fixed 0 -> 0 or 1
276287
type="response.output_text.delta",
277288
sequence_number=sequence_number.get_and_increment(),
278-
logprobs=[],
289+
logprobs=delta_logprobs,
279290
)
280291
# Accumulate the text into the response part
281292
state.text_content_index_and_output[1].text += delta.content
293+
if output_logprobs:
294+
existing_logprobs = state.text_content_index_and_output[1].logprobs or []
295+
state.text_content_index_and_output[1].logprobs = (
296+
existing_logprobs + output_logprobs
297+
)
282298

283299
# Handle refusals (model declines to answer)
284300
# This is always set by the OpenAI API, but not by others e.g. LiteLLM

src/agents/models/openai_chatcompletions.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,13 @@
99
from openai.types import ChatModel
1010
from openai.types.chat import ChatCompletion, ChatCompletionChunk, ChatCompletionMessage
1111
from openai.types.chat.chat_completion import Choice
12-
from openai.types.responses import Response
12+
from openai.types.responses import (
13+
Response,
14+
ResponseOutputItem,
15+
ResponseOutputMessage,
16+
ResponseOutputText,
17+
)
18+
from openai.types.responses.response_output_text import Logprob
1319
from openai.types.responses.response_prompt_param import ResponsePromptParam
1420

1521
from .. import _debug
@@ -119,12 +125,33 @@ async def get_response(
119125

120126
items = Converter.message_to_output_items(message) if message is not None else []
121127

128+
logprob_models = None
129+
if first_choice and first_choice.logprobs and first_choice.logprobs.content:
130+
logprob_models = ChatCmplHelpers.convert_logprobs_for_output_text(
131+
first_choice.logprobs.content
132+
)
133+
134+
if logprob_models:
135+
self._attach_logprobs_to_output(items, logprob_models)
136+
122137
return ModelResponse(
123138
output=items,
124139
usage=usage,
125140
response_id=None,
126141
)
127142

143+
def _attach_logprobs_to_output(
144+
self, output_items: list[ResponseOutputItem], logprobs: list[Logprob]
145+
) -> None:
146+
for output_item in output_items:
147+
if not isinstance(output_item, ResponseOutputMessage):
148+
continue
149+
150+
for content in output_item.content:
151+
if isinstance(content, ResponseOutputText):
152+
content.logprobs = logprobs
153+
return
154+
128155
async def stream_response(
129156
self,
130157
system_instructions: str | None,

tests/test_apply_patch_tool.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ async def test_apply_patch_tool_success() -> None:
6363
assert raw_item["status"] == "completed"
6464
assert raw_item["call_id"] == "call_apply"
6565
assert editor.operations[0].type == "update_file"
66+
assert editor.operations[0].ctx_wrapper is context_wrapper
6667
assert isinstance(raw_item["output"], str)
6768
assert raw_item["output"].startswith("Updated tasks.md")
6869
input_payload = result.to_input_item()
@@ -137,3 +138,4 @@ async def test_apply_patch_tool_accepts_mapping_call() -> None:
137138
raw_item = cast(dict[str, Any], result.raw_item)
138139
assert raw_item["call_id"] == "call_mapping"
139140
assert editor.operations[0].path == "notes.md"
141+
assert editor.operations[0].ctx_wrapper is context_wrapper

0 commit comments

Comments
 (0)