@@ -46,18 +46,19 @@ def with_streaming_response(self) -> SessionsWithStreamingResponse:
4646 def create (
4747 self ,
4848 * ,
49+ input_audio_format : Literal ["pcm16" , "g711_ulaw" , "g711_alaw" ] | NotGiven = NOT_GIVEN ,
50+ input_audio_transcription : session_create_params .InputAudioTranscription | NotGiven = NOT_GIVEN ,
51+ instructions : str | NotGiven = NOT_GIVEN ,
52+ max_response_output_tokens : Union [int , Literal ["inf" ]] | NotGiven = NOT_GIVEN ,
53+ modalities : List [Literal ["text" , "audio" ]] | NotGiven = NOT_GIVEN ,
4954 model : Literal [
5055 "gpt-4o-realtime-preview" ,
5156 "gpt-4o-realtime-preview-2024-10-01" ,
5257 "gpt-4o-realtime-preview-2024-12-17" ,
5358 "gpt-4o-mini-realtime-preview" ,
5459 "gpt-4o-mini-realtime-preview-2024-12-17" ,
55- ],
56- input_audio_format : Literal ["pcm16" , "g711_ulaw" , "g711_alaw" ] | NotGiven = NOT_GIVEN ,
57- input_audio_transcription : session_create_params .InputAudioTranscription | NotGiven = NOT_GIVEN ,
58- instructions : str | NotGiven = NOT_GIVEN ,
59- max_response_output_tokens : Union [int , Literal ["inf" ]] | NotGiven = NOT_GIVEN ,
60- modalities : List [Literal ["text" , "audio" ]] | NotGiven = NOT_GIVEN ,
60+ ]
61+ | NotGiven = NOT_GIVEN ,
6162 output_audio_format : Literal ["pcm16" , "g711_ulaw" , "g711_alaw" ] | NotGiven = NOT_GIVEN ,
6263 temperature : float | NotGiven = NOT_GIVEN ,
6364 tool_choice : str | NotGiven = NOT_GIVEN ,
@@ -81,9 +82,9 @@ def create(
8182 the Realtime API.
8283
8384 Args:
84- model : The Realtime model used for this session.
85-
86- input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw` .
85+ input_audio_format : The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
86+ `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
87+ (mono), and little-endian byte order .
8788
8889 input_audio_transcription: Configuration for input audio transcription, defaults to off and can be set to
8990 `null` to turn off once on. Input audio transcription is not native to the
@@ -110,7 +111,10 @@ def create(
110111 modalities: The set of modalities the model can respond with. To disable audio, set this to
111112 ["text"].
112113
114+ model: The Realtime model used for this session.
115+
113116 output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
117+ For `pcm16`, output audio is sampled at a rate of 24kHz.
114118
115119 temperature: Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.
116120
@@ -140,12 +144,12 @@ def create(
140144 "/realtime/sessions" ,
141145 body = maybe_transform (
142146 {
143- "model" : model ,
144147 "input_audio_format" : input_audio_format ,
145148 "input_audio_transcription" : input_audio_transcription ,
146149 "instructions" : instructions ,
147150 "max_response_output_tokens" : max_response_output_tokens ,
148151 "modalities" : modalities ,
152+ "model" : model ,
149153 "output_audio_format" : output_audio_format ,
150154 "temperature" : temperature ,
151155 "tool_choice" : tool_choice ,
@@ -185,18 +189,19 @@ def with_streaming_response(self) -> AsyncSessionsWithStreamingResponse:
185189 async def create (
186190 self ,
187191 * ,
192+ input_audio_format : Literal ["pcm16" , "g711_ulaw" , "g711_alaw" ] | NotGiven = NOT_GIVEN ,
193+ input_audio_transcription : session_create_params .InputAudioTranscription | NotGiven = NOT_GIVEN ,
194+ instructions : str | NotGiven = NOT_GIVEN ,
195+ max_response_output_tokens : Union [int , Literal ["inf" ]] | NotGiven = NOT_GIVEN ,
196+ modalities : List [Literal ["text" , "audio" ]] | NotGiven = NOT_GIVEN ,
188197 model : Literal [
189198 "gpt-4o-realtime-preview" ,
190199 "gpt-4o-realtime-preview-2024-10-01" ,
191200 "gpt-4o-realtime-preview-2024-12-17" ,
192201 "gpt-4o-mini-realtime-preview" ,
193202 "gpt-4o-mini-realtime-preview-2024-12-17" ,
194- ],
195- input_audio_format : Literal ["pcm16" , "g711_ulaw" , "g711_alaw" ] | NotGiven = NOT_GIVEN ,
196- input_audio_transcription : session_create_params .InputAudioTranscription | NotGiven = NOT_GIVEN ,
197- instructions : str | NotGiven = NOT_GIVEN ,
198- max_response_output_tokens : Union [int , Literal ["inf" ]] | NotGiven = NOT_GIVEN ,
199- modalities : List [Literal ["text" , "audio" ]] | NotGiven = NOT_GIVEN ,
203+ ]
204+ | NotGiven = NOT_GIVEN ,
200205 output_audio_format : Literal ["pcm16" , "g711_ulaw" , "g711_alaw" ] | NotGiven = NOT_GIVEN ,
201206 temperature : float | NotGiven = NOT_GIVEN ,
202207 tool_choice : str | NotGiven = NOT_GIVEN ,
@@ -220,9 +225,9 @@ async def create(
220225 the Realtime API.
221226
222227 Args:
223- model : The Realtime model used for this session.
224-
225- input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw` .
228+ input_audio_format : The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
229+ `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
230+ (mono), and little-endian byte order .
226231
227232 input_audio_transcription: Configuration for input audio transcription, defaults to off and can be set to
228233 `null` to turn off once on. Input audio transcription is not native to the
@@ -249,7 +254,10 @@ async def create(
249254 modalities: The set of modalities the model can respond with. To disable audio, set this to
250255 ["text"].
251256
257+ model: The Realtime model used for this session.
258+
252259 output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
260+ For `pcm16`, output audio is sampled at a rate of 24kHz.
253261
254262 temperature: Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.
255263
@@ -279,12 +287,12 @@ async def create(
279287 "/realtime/sessions" ,
280288 body = await async_maybe_transform (
281289 {
282- "model" : model ,
283290 "input_audio_format" : input_audio_format ,
284291 "input_audio_transcription" : input_audio_transcription ,
285292 "instructions" : instructions ,
286293 "max_response_output_tokens" : max_response_output_tokens ,
287294 "modalities" : modalities ,
295+ "model" : model ,
288296 "output_audio_format" : output_audio_format ,
289297 "temperature" : temperature ,
290298 "tool_choice" : tool_choice ,
0 commit comments