diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 0c6e1a8623..986390db98 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "2.35.1" + ".": "2.36.0" } \ No newline at end of file diff --git a/.stats.yml b/.stats.yml index f3c3b26134..9b6dc7e58b 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ configured_endpoints: 233 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai/openai-84d31411083374ec6cdb4a722f8b8b83c1230741157306b1ca7ba1a3cf246672.yml -openapi_spec_hash: 051fce676f959b8207e2317225ec4bdc -config_hash: a2916f18a94ff65c8116ca2fe3256f10 +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai/openai-371f497afe4d6070f6e252e5febbe8f453c7058a8dff0c26a01b4d88442a4ac2.yml +openapi_spec_hash: d39f46e8fda45f77096448105efd175a +config_hash: b64135fff1fe9cf4069b9ecf59ae8b07 diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b5e6afda7..af067330f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ # Changelog +## 2.36.0 (2026-05-07) + +Full Changelog: [v2.35.1...v2.36.0](https://github.com/openai/openai-python/compare/v2.35.1...v2.36.0) + +### Features + +* **api:** manual updates ([13c639c](https://github.com/openai/openai-python/commit/13c639cc7d57e4fbd4406563511e15eeb88a54b2)) +* **api:** realtime 2 ([8fe0ab8](https://github.com/openai/openai-python/commit/8fe0ab87e67eeb3cc27426b50093845229520f0e)) + ## 2.35.1 (2026-05-06) Full Changelog: [v2.35.0...v2.35.1](https://github.com/openai/openai-python/compare/v2.35.0...v2.35.1) diff --git a/pyproject.toml b/pyproject.toml index cb8c01191d..ec1af48c8b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "openai" -version = "2.35.1" +version = "2.36.0" description = "The official Python library for the openai API" dynamic = ["readme"] license = "Apache-2.0" diff --git a/src/openai/_version.py b/src/openai/_version.py index 7f151cf0cd..a6435eede3 100644 --- a/src/openai/_version.py +++ b/src/openai/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. __title__ = "openai" -__version__ = "2.35.1" # x-release-please-version +__version__ = "2.36.0" # x-release-please-version diff --git a/src/openai/resources/realtime/api.md b/src/openai/resources/realtime/api.md index 1a178384db..2be1b85cbf 100644 --- a/src/openai/resources/realtime/api.md +++ b/src/openai/resources/realtime/api.md @@ -58,6 +58,8 @@ from openai.types.realtime import ( RealtimeMcpToolCall, RealtimeMcpToolExecutionError, RealtimeMcphttpError, + RealtimeReasoning, + RealtimeReasoningEffort, RealtimeResponse, RealtimeResponseCreateAudioOutput, RealtimeResponseCreateMcpTool, @@ -77,6 +79,22 @@ from openai.types.realtime import ( RealtimeTranscriptionSessionAudioInput, RealtimeTranscriptionSessionAudioInputTurnDetection, RealtimeTranscriptionSessionCreateRequest, + RealtimeTranslationClientEvent, + RealtimeTranslationClientSecretCreateRequest, + RealtimeTranslationClientSecretCreateResponse, + RealtimeTranslationInputAudioBufferAppendEvent, + RealtimeTranslationInputTranscriptDeltaEvent, + RealtimeTranslationOutputAudioDeltaEvent, + RealtimeTranslationOutputTranscriptDeltaEvent, + RealtimeTranslationServerEvent, + RealtimeTranslationSession, + RealtimeTranslationSessionCloseEvent, + RealtimeTranslationSessionClosedEvent, + RealtimeTranslationSessionCreateRequest, + RealtimeTranslationSessionCreatedEvent, + RealtimeTranslationSessionUpdateEvent, + RealtimeTranslationSessionUpdateRequest, + RealtimeTranslationSessionUpdatedEvent, RealtimeTruncation, RealtimeTruncationRetentionRatio, ResponseAudioDeltaEvent, @@ -114,7 +132,6 @@ Types: ```python from openai.types.realtime import ( - RealtimeSessionClientSecret, RealtimeSessionCreateResponse, RealtimeTranscriptionSessionCreateResponse, RealtimeTranscriptionSessionTurnDetection, diff --git a/src/openai/resources/realtime/calls.py b/src/openai/resources/realtime/calls.py index 7a4fcc0110..0674b2b010 100644 --- a/src/openai/resources/realtime/calls.py +++ b/src/openai/resources/realtime/calls.py @@ -28,6 +28,7 @@ call_reject_params, ) from ...types.responses.response_prompt_param import ResponsePromptParam +from ...types.realtime.realtime_reasoning_param import RealtimeReasoningParam from ...types.realtime.realtime_truncation_param import RealtimeTruncationParam from ...types.realtime.realtime_audio_config_param import RealtimeAudioConfigParam from ...types.realtime.realtime_tools_config_param import RealtimeToolsConfigParam @@ -121,6 +122,7 @@ def accept( Literal[ "gpt-realtime", "gpt-realtime-1.5", + "gpt-realtime-2", "gpt-realtime-2025-08-28", "gpt-4o-realtime-preview", "gpt-4o-realtime-preview-2024-10-01", @@ -139,7 +141,9 @@ def accept( ] | Omit = omit, output_modalities: List[Literal["text", "audio"]] | Omit = omit, + parallel_tool_calls: bool | Omit = omit, prompt: Optional[ResponsePromptParam] | Omit = omit, + reasoning: RealtimeReasoningParam | Omit = omit, tool_choice: RealtimeToolChoiceConfigParam | Omit = omit, tools: RealtimeToolsConfigParam | Omit = omit, tracing: Optional[RealtimeTracingConfigParam] | Omit = omit, @@ -188,9 +192,14 @@ def accept( can be used to make the model respond with text only. It is not possible to request both `text` and `audio` at the same time. + parallel_tool_calls: Whether the model may call multiple tools in parallel. Only supported by + reasoning Realtime models such as `gpt-realtime-2`. + prompt: Reference to a prompt template and its variables. [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts). + reasoning: Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`. + tool_choice: How the model chooses tools. Provide one of the string modes or force a specific function/MCP tool. @@ -245,7 +254,9 @@ def accept( "max_output_tokens": max_output_tokens, "model": model, "output_modalities": output_modalities, + "parallel_tool_calls": parallel_tool_calls, "prompt": prompt, + "reasoning": reasoning, "tool_choice": tool_choice, "tools": tools, "tracing": tracing, @@ -471,6 +482,7 @@ async def accept( Literal[ "gpt-realtime", "gpt-realtime-1.5", + "gpt-realtime-2", "gpt-realtime-2025-08-28", "gpt-4o-realtime-preview", "gpt-4o-realtime-preview-2024-10-01", @@ -489,7 +501,9 @@ async def accept( ] | Omit = omit, output_modalities: List[Literal["text", "audio"]] | Omit = omit, + parallel_tool_calls: bool | Omit = omit, prompt: Optional[ResponsePromptParam] | Omit = omit, + reasoning: RealtimeReasoningParam | Omit = omit, tool_choice: RealtimeToolChoiceConfigParam | Omit = omit, tools: RealtimeToolsConfigParam | Omit = omit, tracing: Optional[RealtimeTracingConfigParam] | Omit = omit, @@ -538,9 +552,14 @@ async def accept( can be used to make the model respond with text only. It is not possible to request both `text` and `audio` at the same time. + parallel_tool_calls: Whether the model may call multiple tools in parallel. Only supported by + reasoning Realtime models such as `gpt-realtime-2`. + prompt: Reference to a prompt template and its variables. [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts). + reasoning: Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`. + tool_choice: How the model chooses tools. Provide one of the string modes or force a specific function/MCP tool. @@ -595,7 +614,9 @@ async def accept( "max_output_tokens": max_output_tokens, "model": model, "output_modalities": output_modalities, + "parallel_tool_calls": parallel_tool_calls, "prompt": prompt, + "reasoning": reasoning, "tool_choice": tool_choice, "tools": tools, "tracing": tracing, diff --git a/src/openai/types/realtime/__init__.py b/src/openai/types/realtime/__init__.py index c2a141d727..d7a087ba9a 100644 --- a/src/openai/types/realtime/__init__.py +++ b/src/openai/types/realtime/__init__.py @@ -9,6 +9,7 @@ from .call_accept_params import CallAcceptParams as CallAcceptParams from .call_create_params import CallCreateParams as CallCreateParams from .call_reject_params import CallRejectParams as CallRejectParams +from .realtime_reasoning import RealtimeReasoning as RealtimeReasoning from .audio_transcription import AudioTranscription as AudioTranscription from .log_prob_properties import LogProbProperties as LogProbProperties from .realtime_truncation import RealtimeTruncation as RealtimeTruncation @@ -38,11 +39,13 @@ from .realtime_response_usage import RealtimeResponseUsage as RealtimeResponseUsage from .realtime_tracing_config import RealtimeTracingConfig as RealtimeTracingConfig from .mcp_list_tools_completed import McpListToolsCompleted as McpListToolsCompleted +from .realtime_reasoning_param import RealtimeReasoningParam as RealtimeReasoningParam from .realtime_response_status import RealtimeResponseStatus as RealtimeResponseStatus from .response_mcp_call_failed import ResponseMcpCallFailed as ResponseMcpCallFailed from .response_text_done_event import ResponseTextDoneEvent as ResponseTextDoneEvent from .audio_transcription_param import AudioTranscriptionParam as AudioTranscriptionParam from .rate_limits_updated_event import RateLimitsUpdatedEvent as RateLimitsUpdatedEvent +from .realtime_reasoning_effort import RealtimeReasoningEffort as RealtimeReasoningEffort from .realtime_truncation_param import RealtimeTruncationParam as RealtimeTruncationParam from .response_audio_done_event import ResponseAudioDoneEvent as ResponseAudioDoneEvent from .response_text_delta_event import ResponseTextDeltaEvent as ResponseTextDeltaEvent @@ -75,7 +78,6 @@ from .conversation_item_delete_event import ConversationItemDeleteEvent as ConversationItemDeleteEvent from .input_audio_buffer_clear_event import InputAudioBufferClearEvent as InputAudioBufferClearEvent from .realtime_mcp_approval_response import RealtimeMcpApprovalResponse as RealtimeMcpApprovalResponse -from .realtime_session_client_secret import RealtimeSessionClientSecret as RealtimeSessionClientSecret from .conversation_item_created_event import ConversationItemCreatedEvent as ConversationItemCreatedEvent from .conversation_item_deleted_event import ConversationItemDeletedEvent as ConversationItemDeletedEvent from .input_audio_buffer_append_event import InputAudioBufferAppendEvent as InputAudioBufferAppendEvent diff --git a/src/openai/types/realtime/audio_transcription.py b/src/openai/types/realtime/audio_transcription.py index 0a8c1371e0..45e2e388ca 100644 --- a/src/openai/types/realtime/audio_transcription.py +++ b/src/openai/types/realtime/audio_transcription.py @@ -9,6 +9,13 @@ class AudioTranscription(BaseModel): + delay: Optional[Literal["minimal", "low", "medium", "high", "xhigh"]] = None + """ + Controls how long the model waits before emitting transcription text. Higher + values can improve transcription accuracy at the cost of latency. Only supported + with `gpt-realtime-whisper` in GA Realtime sessions. + """ + language: Optional[str] = None """The language of the input audio. @@ -25,15 +32,16 @@ class AudioTranscription(BaseModel): "gpt-4o-mini-transcribe-2025-12-15", "gpt-4o-transcribe", "gpt-4o-transcribe-diarize", + "gpt-realtime-whisper", ], None, ] = None """The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, - `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and - `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need - diarization with speaker labels. + `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, + `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use + `gpt-4o-transcribe-diarize` when you need diarization with speaker labels. """ prompt: Optional[str] = None @@ -43,4 +51,5 @@ class AudioTranscription(BaseModel): [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting). For `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology". + Prompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions. """ diff --git a/src/openai/types/realtime/audio_transcription_param.py b/src/openai/types/realtime/audio_transcription_param.py index 7e60a003ce..9206921542 100644 --- a/src/openai/types/realtime/audio_transcription_param.py +++ b/src/openai/types/realtime/audio_transcription_param.py @@ -9,6 +9,13 @@ class AudioTranscriptionParam(TypedDict, total=False): + delay: Literal["minimal", "low", "medium", "high", "xhigh"] + """ + Controls how long the model waits before emitting transcription text. Higher + values can improve transcription accuracy at the cost of latency. Only supported + with `gpt-realtime-whisper` in GA Realtime sessions. + """ + language: str """The language of the input audio. @@ -25,14 +32,15 @@ class AudioTranscriptionParam(TypedDict, total=False): "gpt-4o-mini-transcribe-2025-12-15", "gpt-4o-transcribe", "gpt-4o-transcribe-diarize", + "gpt-realtime-whisper", ], ] """The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, - `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and - `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need - diarization with speaker labels. + `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, + `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use + `gpt-4o-transcribe-diarize` when you need diarization with speaker labels. """ prompt: str @@ -42,4 +50,5 @@ class AudioTranscriptionParam(TypedDict, total=False): [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting). For `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology". + Prompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions. """ diff --git a/src/openai/types/realtime/call_accept_params.py b/src/openai/types/realtime/call_accept_params.py index 1baddbfc2c..b4a48fc8b5 100644 --- a/src/openai/types/realtime/call_accept_params.py +++ b/src/openai/types/realtime/call_accept_params.py @@ -5,6 +5,7 @@ from typing import List, Union, Optional from typing_extensions import Literal, Required, TypedDict +from .realtime_reasoning_param import RealtimeReasoningParam from .realtime_truncation_param import RealtimeTruncationParam from .realtime_audio_config_param import RealtimeAudioConfigParam from .realtime_tools_config_param import RealtimeToolsConfigParam @@ -57,6 +58,7 @@ class CallAcceptParams(TypedDict, total=False): Literal[ "gpt-realtime", "gpt-realtime-1.5", + "gpt-realtime-2", "gpt-realtime-2025-08-28", "gpt-4o-realtime-preview", "gpt-4o-realtime-preview-2024-10-01", @@ -83,12 +85,21 @@ class CallAcceptParams(TypedDict, total=False): only. It is not possible to request both `text` and `audio` at the same time. """ + parallel_tool_calls: bool + """Whether the model may call multiple tools in parallel. + + Only supported by reasoning Realtime models such as `gpt-realtime-2`. + """ + prompt: Optional[ResponsePromptParam] """ Reference to a prompt template and its variables. [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts). """ + reasoning: RealtimeReasoningParam + """Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.""" + tool_choice: RealtimeToolChoiceConfigParam """How the model chooses tools. diff --git a/src/openai/types/realtime/realtime_audio_config_input.py b/src/openai/types/realtime/realtime_audio_config_input.py index 08e1b14601..ba7d211d0d 100644 --- a/src/openai/types/realtime/realtime_audio_config_input.py +++ b/src/openai/types/realtime/realtime_audio_config_input.py @@ -67,4 +67,7 @@ class RealtimeAudioConfigInput(BaseModel): trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency. + + For `gpt-realtime-whisper` transcription sessions, turn detection must be set to + `null`; VAD is not supported. """ diff --git a/src/openai/types/realtime/realtime_audio_config_input_param.py b/src/openai/types/realtime/realtime_audio_config_input_param.py index 73495e6cd3..5cea9f0efe 100644 --- a/src/openai/types/realtime/realtime_audio_config_input_param.py +++ b/src/openai/types/realtime/realtime_audio_config_input_param.py @@ -69,4 +69,7 @@ class RealtimeAudioConfigInputParam(TypedDict, total=False): trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency. + + For `gpt-realtime-whisper` transcription sessions, turn detection must be set to + `null`; VAD is not supported. """ diff --git a/src/openai/types/realtime/realtime_reasoning.py b/src/openai/types/realtime/realtime_reasoning.py new file mode 100644 index 0000000000..5d49c9bd0b --- /dev/null +++ b/src/openai/types/realtime/realtime_reasoning.py @@ -0,0 +1,18 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Optional + +from ..._models import BaseModel +from .realtime_reasoning_effort import RealtimeReasoningEffort + +__all__ = ["RealtimeReasoning"] + + +class RealtimeReasoning(BaseModel): + """Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.""" + + effort: Optional[RealtimeReasoningEffort] = None + """ + Constrains effort on reasoning for reasoning-capable Realtime models such as + `gpt-realtime-2`. + """ diff --git a/src/openai/types/realtime/realtime_reasoning_effort.py b/src/openai/types/realtime/realtime_reasoning_effort.py new file mode 100644 index 0000000000..dcf9e303a7 --- /dev/null +++ b/src/openai/types/realtime/realtime_reasoning_effort.py @@ -0,0 +1,7 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing_extensions import Literal, TypeAlias + +__all__ = ["RealtimeReasoningEffort"] + +RealtimeReasoningEffort: TypeAlias = Literal["minimal", "low", "medium", "high", "xhigh"] diff --git a/src/openai/types/realtime/realtime_reasoning_param.py b/src/openai/types/realtime/realtime_reasoning_param.py new file mode 100644 index 0000000000..f6de89c99a --- /dev/null +++ b/src/openai/types/realtime/realtime_reasoning_param.py @@ -0,0 +1,19 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing_extensions import TypedDict + +from .realtime_reasoning_effort import RealtimeReasoningEffort + +__all__ = ["RealtimeReasoningParam"] + + +class RealtimeReasoningParam(TypedDict, total=False): + """Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.""" + + effort: RealtimeReasoningEffort + """ + Constrains effort on reasoning for reasoning-capable Realtime models such as + `gpt-realtime-2`. + """ diff --git a/src/openai/types/realtime/realtime_response_create_params.py b/src/openai/types/realtime/realtime_response_create_params.py index deec8c9280..8e20168e34 100644 --- a/src/openai/types/realtime/realtime_response_create_params.py +++ b/src/openai/types/realtime/realtime_response_create_params.py @@ -6,6 +6,7 @@ from ..._models import BaseModel from ..shared.metadata import Metadata from .conversation_item import ConversationItem +from .realtime_reasoning import RealtimeReasoning from .realtime_function_tool import RealtimeFunctionTool from ..responses.response_prompt import ResponsePrompt from ..responses.tool_choice_mcp import ToolChoiceMcp @@ -84,12 +85,21 @@ class RealtimeResponseCreateParams(BaseModel): model. """ + parallel_tool_calls: Optional[bool] = None + """Whether the model may call multiple tools in parallel. + + Only supported by reasoning Realtime models such as `gpt-realtime-2`. + """ + prompt: Optional[ResponsePrompt] = None """ Reference to a prompt template and its variables. [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts). """ + reasoning: Optional[RealtimeReasoning] = None + """Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.""" + tool_choice: Optional[ToolChoice] = None """How the model chooses tools. diff --git a/src/openai/types/realtime/realtime_response_create_params_param.py b/src/openai/types/realtime/realtime_response_create_params_param.py index caad5bc900..aafa3d4e25 100644 --- a/src/openai/types/realtime/realtime_response_create_params_param.py +++ b/src/openai/types/realtime/realtime_response_create_params_param.py @@ -7,6 +7,7 @@ from ..shared_params.metadata import Metadata from .conversation_item_param import ConversationItemParam +from .realtime_reasoning_param import RealtimeReasoningParam from .realtime_function_tool_param import RealtimeFunctionToolParam from ..responses.tool_choice_options import ToolChoiceOptions from ..responses.response_prompt_param import ResponsePromptParam @@ -85,12 +86,21 @@ class RealtimeResponseCreateParamsParam(TypedDict, total=False): model. """ + parallel_tool_calls: bool + """Whether the model may call multiple tools in parallel. + + Only supported by reasoning Realtime models such as `gpt-realtime-2`. + """ + prompt: Optional[ResponsePromptParam] """ Reference to a prompt template and its variables. [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts). """ + reasoning: RealtimeReasoningParam + """Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.""" + tool_choice: ToolChoice """How the model chooses tools. diff --git a/src/openai/types/realtime/realtime_session_client_secret.py b/src/openai/types/realtime/realtime_session_client_secret.py deleted file mode 100644 index 13a12f5502..0000000000 --- a/src/openai/types/realtime/realtime_session_client_secret.py +++ /dev/null @@ -1,22 +0,0 @@ -# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - -from ..._models import BaseModel - -__all__ = ["RealtimeSessionClientSecret"] - - -class RealtimeSessionClientSecret(BaseModel): - """Ephemeral key returned by the API.""" - - expires_at: int - """Timestamp for when the token expires. - - Currently, all tokens expire after one minute. - """ - - value: str - """ - Ephemeral key usable in client environments to authenticate connections to the - Realtime API. Use this in client-side environments rather than a standard API - token, which should only be used server-side. - """ diff --git a/src/openai/types/realtime/realtime_session_create_request.py b/src/openai/types/realtime/realtime_session_create_request.py index 163a0d16d8..cf681e99a1 100644 --- a/src/openai/types/realtime/realtime_session_create_request.py +++ b/src/openai/types/realtime/realtime_session_create_request.py @@ -4,6 +4,7 @@ from typing_extensions import Literal from ..._models import BaseModel +from .realtime_reasoning import RealtimeReasoning from .realtime_truncation import RealtimeTruncation from .realtime_audio_config import RealtimeAudioConfig from .realtime_tools_config import RealtimeToolsConfig @@ -58,6 +59,7 @@ class RealtimeSessionCreateRequest(BaseModel): Literal[ "gpt-realtime", "gpt-realtime-1.5", + "gpt-realtime-2", "gpt-realtime-2025-08-28", "gpt-4o-realtime-preview", "gpt-4o-realtime-preview-2024-10-01", @@ -85,12 +87,21 @@ class RealtimeSessionCreateRequest(BaseModel): only. It is not possible to request both `text` and `audio` at the same time. """ + parallel_tool_calls: Optional[bool] = None + """Whether the model may call multiple tools in parallel. + + Only supported by reasoning Realtime models such as `gpt-realtime-2`. + """ + prompt: Optional[ResponsePrompt] = None """ Reference to a prompt template and its variables. [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts). """ + reasoning: Optional[RealtimeReasoning] = None + """Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.""" + tool_choice: Optional[RealtimeToolChoiceConfig] = None """How the model chooses tools. diff --git a/src/openai/types/realtime/realtime_session_create_request_param.py b/src/openai/types/realtime/realtime_session_create_request_param.py index 19c73b909b..ab7de47c3c 100644 --- a/src/openai/types/realtime/realtime_session_create_request_param.py +++ b/src/openai/types/realtime/realtime_session_create_request_param.py @@ -5,6 +5,7 @@ from typing import List, Union, Optional from typing_extensions import Literal, Required, TypedDict +from .realtime_reasoning_param import RealtimeReasoningParam from .realtime_truncation_param import RealtimeTruncationParam from .realtime_audio_config_param import RealtimeAudioConfigParam from .realtime_tools_config_param import RealtimeToolsConfigParam @@ -59,6 +60,7 @@ class RealtimeSessionCreateRequestParam(TypedDict, total=False): Literal[ "gpt-realtime", "gpt-realtime-1.5", + "gpt-realtime-2", "gpt-realtime-2025-08-28", "gpt-4o-realtime-preview", "gpt-4o-realtime-preview-2024-10-01", @@ -85,12 +87,21 @@ class RealtimeSessionCreateRequestParam(TypedDict, total=False): only. It is not possible to request both `text` and `audio` at the same time. """ + parallel_tool_calls: bool + """Whether the model may call multiple tools in parallel. + + Only supported by reasoning Realtime models such as `gpt-realtime-2`. + """ + prompt: Optional[ResponsePromptParam] """ Reference to a prompt template and its variables. [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts). """ + reasoning: RealtimeReasoningParam + """Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.""" + tool_choice: RealtimeToolChoiceConfigParam """How the model chooses tools. diff --git a/src/openai/types/realtime/realtime_session_create_response.py b/src/openai/types/realtime/realtime_session_create_response.py index e2ed5ddce5..7193eafcc1 100644 --- a/src/openai/types/realtime/realtime_session_create_response.py +++ b/src/openai/types/realtime/realtime_session_create_response.py @@ -5,6 +5,7 @@ from ..._utils import PropertyInfo from ..._models import BaseModel +from .realtime_reasoning import RealtimeReasoning from .audio_transcription import AudioTranscription from .realtime_truncation import RealtimeTruncation from .noise_reduction_type import NoiseReductionType @@ -13,7 +14,6 @@ from ..responses.response_prompt import ResponsePrompt from ..responses.tool_choice_mcp import ToolChoiceMcp from ..responses.tool_choice_options import ToolChoiceOptions -from .realtime_session_client_secret import RealtimeSessionClientSecret from ..responses.tool_choice_function import ToolChoiceFunction __all__ = [ @@ -176,16 +176,6 @@ class AudioInput(BaseModel): """ transcription: Optional[AudioTranscription] = None - """ - Configuration for input audio transcription, defaults to off and can be set to - `null` to turn off once on. Input audio transcription is not native to the - model, since the model consumes audio directly. Transcription runs - asynchronously through - [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription) - and should be treated as guidance of input audio content rather than precisely - what the model heard. The client can optionally set the language and prompt for - transcription, these offer additional guidance to the transcription service. - """ turn_detection: Optional[AudioInputTurnDetection] = None """Configuration for turn detection, ether Server VAD or Semantic VAD. @@ -202,6 +192,9 @@ class AudioInput(BaseModel): trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency. + + For `gpt-realtime-whisper` transcription sessions, turn detection must be set to + `null`; VAD is not supported. """ @@ -414,14 +407,13 @@ class TracingTracingConfiguration(BaseModel): class RealtimeSessionCreateResponse(BaseModel): - """A new Realtime session configuration, with an ephemeral key. + """A Realtime session configuration object.""" - Default TTL - for keys is one minute. - """ + id: str + """Unique identifier for the session that looks like `sess_1234567890abcdef`.""" - client_secret: RealtimeSessionClientSecret - """Ephemeral key returned by the API.""" + object: Literal["realtime.session"] + """The object type. Always `realtime.session`.""" type: Literal["realtime"] """The type of session to create. Always `realtime` for the Realtime API.""" @@ -429,6 +421,9 @@ class RealtimeSessionCreateResponse(BaseModel): audio: Optional[Audio] = None """Configuration for input and output audio.""" + expires_at: Optional[int] = None + """Expiration timestamp for the session, in seconds since epoch.""" + include: Optional[List[Literal["item.input_audio_transcription.logprobs"]]] = None """Additional fields to include in server outputs. @@ -464,6 +459,7 @@ class RealtimeSessionCreateResponse(BaseModel): Literal[ "gpt-realtime", "gpt-realtime-1.5", + "gpt-realtime-2", "gpt-realtime-2025-08-28", "gpt-4o-realtime-preview", "gpt-4o-realtime-preview-2024-10-01", @@ -497,6 +493,9 @@ class RealtimeSessionCreateResponse(BaseModel): [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts). """ + reasoning: Optional[RealtimeReasoning] = None + """Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.""" + tool_choice: Optional[ToolChoice] = None """How the model chooses tools. diff --git a/src/openai/types/realtime/realtime_transcription_session_audio_input.py b/src/openai/types/realtime/realtime_transcription_session_audio_input.py index 80ff223590..e6f044bc22 100644 --- a/src/openai/types/realtime/realtime_transcription_session_audio_input.py +++ b/src/openai/types/realtime/realtime_transcription_session_audio_input.py @@ -69,4 +69,7 @@ class RealtimeTranscriptionSessionAudioInput(BaseModel): trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency. + + For `gpt-realtime-whisper` transcription sessions, turn detection must be set to + `null`; VAD is not supported. """ diff --git a/src/openai/types/realtime/realtime_transcription_session_audio_input_param.py b/src/openai/types/realtime/realtime_transcription_session_audio_input_param.py index dd908c72f6..cacb38dc22 100644 --- a/src/openai/types/realtime/realtime_transcription_session_audio_input_param.py +++ b/src/openai/types/realtime/realtime_transcription_session_audio_input_param.py @@ -71,4 +71,7 @@ class RealtimeTranscriptionSessionAudioInputParam(TypedDict, total=False): trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency. + + For `gpt-realtime-whisper` transcription sessions, turn detection must be set to + `null`; VAD is not supported. """ diff --git a/src/openai/types/realtime/realtime_transcription_session_create_response.py b/src/openai/types/realtime/realtime_transcription_session_create_response.py index 6ca6c3808b..68f7e71a54 100644 --- a/src/openai/types/realtime/realtime_transcription_session_create_response.py +++ b/src/openai/types/realtime/realtime_transcription_session_create_response.py @@ -31,14 +31,13 @@ class AudioInput(BaseModel): """Configuration for input audio noise reduction.""" transcription: Optional[AudioTranscription] = None - """Configuration of the transcription model.""" turn_detection: Optional[RealtimeTranscriptionSessionTurnDetection] = None """Configuration for turn detection. Can be set to `null` to turn off. Server VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user - speech. + speech. For `gpt-realtime-whisper`, this must be `null`; VAD is not supported. """ diff --git a/src/openai/types/realtime/realtime_transcription_session_turn_detection.py b/src/openai/types/realtime/realtime_transcription_session_turn_detection.py index 8dacd60a07..0f3d5b1c00 100644 --- a/src/openai/types/realtime/realtime_transcription_session_turn_detection.py +++ b/src/openai/types/realtime/realtime_transcription_session_turn_detection.py @@ -12,7 +12,7 @@ class RealtimeTranscriptionSessionTurnDetection(BaseModel): Can be set to `null` to turn off. Server VAD means that the model will detect the start and end of speech based on - audio volume and respond at the end of user speech. + audio volume and respond at the end of user speech. For `gpt-realtime-whisper`, this must be `null`; VAD is not supported. """ prefix_padding_ms: Optional[int] = None diff --git a/tests/api_resources/realtime/test_calls.py b/tests/api_resources/realtime/test_calls.py index 43ab9afe01..3d87a77f3b 100644 --- a/tests/api_resources/realtime/test_calls.py +++ b/tests/api_resources/realtime/test_calls.py @@ -47,6 +47,7 @@ def test_method_create_with_all_params(self, client: OpenAI, respx_mock: MockRou }, "noise_reduction": {"type": "near_field"}, "transcription": { + "delay": "minimal", "language": "language", "model": "whisper-1", "prompt": "prompt", @@ -75,11 +76,13 @@ def test_method_create_with_all_params(self, client: OpenAI, respx_mock: MockRou "max_output_tokens": "inf", "model": "gpt-realtime", "output_modalities": ["text"], + "parallel_tool_calls": True, "prompt": { "id": "id", "variables": {"foo": "string"}, "version": "version", }, + "reasoning": {"effort": "minimal"}, "tool_choice": "none", "tools": [ { @@ -146,6 +149,7 @@ def test_method_accept_with_all_params(self, client: OpenAI) -> None: }, "noise_reduction": {"type": "near_field"}, "transcription": { + "delay": "minimal", "language": "language", "model": "whisper-1", "prompt": "prompt", @@ -174,11 +178,13 @@ def test_method_accept_with_all_params(self, client: OpenAI) -> None: max_output_tokens="inf", model="gpt-realtime", output_modalities=["text"], + parallel_tool_calls=True, prompt={ "id": "id", "variables": {"foo": "string"}, "version": "version", }, + reasoning={"effort": "minimal"}, tool_choice="none", tools=[ { @@ -385,6 +391,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI, re }, "noise_reduction": {"type": "near_field"}, "transcription": { + "delay": "minimal", "language": "language", "model": "whisper-1", "prompt": "prompt", @@ -413,11 +420,13 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI, re "max_output_tokens": "inf", "model": "gpt-realtime", "output_modalities": ["text"], + "parallel_tool_calls": True, "prompt": { "id": "id", "variables": {"foo": "string"}, "version": "version", }, + "reasoning": {"effort": "minimal"}, "tool_choice": "none", "tools": [ { @@ -484,6 +493,7 @@ async def test_method_accept_with_all_params(self, async_client: AsyncOpenAI) -> }, "noise_reduction": {"type": "near_field"}, "transcription": { + "delay": "minimal", "language": "language", "model": "whisper-1", "prompt": "prompt", @@ -512,11 +522,13 @@ async def test_method_accept_with_all_params(self, async_client: AsyncOpenAI) -> max_output_tokens="inf", model="gpt-realtime", output_modalities=["text"], + parallel_tool_calls=True, prompt={ "id": "id", "variables": {"foo": "string"}, "version": "version", }, + reasoning={"effort": "minimal"}, tool_choice="none", tools=[ { diff --git a/tests/api_resources/realtime/test_client_secrets.py b/tests/api_resources/realtime/test_client_secrets.py index a354019eac..c8ec8f3d3b 100644 --- a/tests/api_resources/realtime/test_client_secrets.py +++ b/tests/api_resources/realtime/test_client_secrets.py @@ -39,6 +39,7 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None: }, "noise_reduction": {"type": "near_field"}, "transcription": { + "delay": "minimal", "language": "language", "model": "whisper-1", "prompt": "prompt", @@ -67,11 +68,13 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None: "max_output_tokens": "inf", "model": "gpt-realtime", "output_modalities": ["text"], + "parallel_tool_calls": True, "prompt": { "id": "id", "variables": {"foo": "string"}, "version": "version", }, + "reasoning": {"effort": "minimal"}, "tool_choice": "none", "tools": [ { @@ -135,6 +138,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> }, "noise_reduction": {"type": "near_field"}, "transcription": { + "delay": "minimal", "language": "language", "model": "whisper-1", "prompt": "prompt", @@ -163,11 +167,13 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> "max_output_tokens": "inf", "model": "gpt-realtime", "output_modalities": ["text"], + "parallel_tool_calls": True, "prompt": { "id": "id", "variables": {"foo": "string"}, "version": "version", }, + "reasoning": {"effort": "minimal"}, "tool_choice": "none", "tools": [ {