@@ -46,18 +46,19 @@ def with_streaming_response(self) -> SessionsWithStreamingResponse:
46
46
def create (
47
47
self ,
48
48
* ,
49
+ input_audio_format : Literal ["pcm16" , "g711_ulaw" , "g711_alaw" ] | NotGiven = NOT_GIVEN ,
50
+ input_audio_transcription : session_create_params .InputAudioTranscription | NotGiven = NOT_GIVEN ,
51
+ instructions : str | NotGiven = NOT_GIVEN ,
52
+ max_response_output_tokens : Union [int , Literal ["inf" ]] | NotGiven = NOT_GIVEN ,
53
+ modalities : List [Literal ["text" , "audio" ]] | NotGiven = NOT_GIVEN ,
49
54
model : Literal [
50
55
"gpt-4o-realtime-preview" ,
51
56
"gpt-4o-realtime-preview-2024-10-01" ,
52
57
"gpt-4o-realtime-preview-2024-12-17" ,
53
58
"gpt-4o-mini-realtime-preview" ,
54
59
"gpt-4o-mini-realtime-preview-2024-12-17" ,
55
- ],
56
- input_audio_format : Literal ["pcm16" , "g711_ulaw" , "g711_alaw" ] | NotGiven = NOT_GIVEN ,
57
- input_audio_transcription : session_create_params .InputAudioTranscription | NotGiven = NOT_GIVEN ,
58
- instructions : str | NotGiven = NOT_GIVEN ,
59
- max_response_output_tokens : Union [int , Literal ["inf" ]] | NotGiven = NOT_GIVEN ,
60
- modalities : List [Literal ["text" , "audio" ]] | NotGiven = NOT_GIVEN ,
60
+ ]
61
+ | NotGiven = NOT_GIVEN ,
61
62
output_audio_format : Literal ["pcm16" , "g711_ulaw" , "g711_alaw" ] | NotGiven = NOT_GIVEN ,
62
63
temperature : float | NotGiven = NOT_GIVEN ,
63
64
tool_choice : str | NotGiven = NOT_GIVEN ,
@@ -81,9 +82,9 @@ def create(
81
82
the Realtime API.
82
83
83
84
Args:
84
- model : The Realtime model used for this session.
85
-
86
- input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw` .
85
+ input_audio_format : The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
86
+ `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
87
+ (mono), and little-endian byte order .
87
88
88
89
input_audio_transcription: Configuration for input audio transcription, defaults to off and can be set to
89
90
`null` to turn off once on. Input audio transcription is not native to the
@@ -110,7 +111,10 @@ def create(
110
111
modalities: The set of modalities the model can respond with. To disable audio, set this to
111
112
["text"].
112
113
114
+ model: The Realtime model used for this session.
115
+
113
116
output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
117
+ For `pcm16`, output audio is sampled at a rate of 24kHz.
114
118
115
119
temperature: Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.
116
120
@@ -140,12 +144,12 @@ def create(
140
144
"/realtime/sessions" ,
141
145
body = maybe_transform (
142
146
{
143
- "model" : model ,
144
147
"input_audio_format" : input_audio_format ,
145
148
"input_audio_transcription" : input_audio_transcription ,
146
149
"instructions" : instructions ,
147
150
"max_response_output_tokens" : max_response_output_tokens ,
148
151
"modalities" : modalities ,
152
+ "model" : model ,
149
153
"output_audio_format" : output_audio_format ,
150
154
"temperature" : temperature ,
151
155
"tool_choice" : tool_choice ,
@@ -185,18 +189,19 @@ def with_streaming_response(self) -> AsyncSessionsWithStreamingResponse:
185
189
async def create (
186
190
self ,
187
191
* ,
192
+ input_audio_format : Literal ["pcm16" , "g711_ulaw" , "g711_alaw" ] | NotGiven = NOT_GIVEN ,
193
+ input_audio_transcription : session_create_params .InputAudioTranscription | NotGiven = NOT_GIVEN ,
194
+ instructions : str | NotGiven = NOT_GIVEN ,
195
+ max_response_output_tokens : Union [int , Literal ["inf" ]] | NotGiven = NOT_GIVEN ,
196
+ modalities : List [Literal ["text" , "audio" ]] | NotGiven = NOT_GIVEN ,
188
197
model : Literal [
189
198
"gpt-4o-realtime-preview" ,
190
199
"gpt-4o-realtime-preview-2024-10-01" ,
191
200
"gpt-4o-realtime-preview-2024-12-17" ,
192
201
"gpt-4o-mini-realtime-preview" ,
193
202
"gpt-4o-mini-realtime-preview-2024-12-17" ,
194
- ],
195
- input_audio_format : Literal ["pcm16" , "g711_ulaw" , "g711_alaw" ] | NotGiven = NOT_GIVEN ,
196
- input_audio_transcription : session_create_params .InputAudioTranscription | NotGiven = NOT_GIVEN ,
197
- instructions : str | NotGiven = NOT_GIVEN ,
198
- max_response_output_tokens : Union [int , Literal ["inf" ]] | NotGiven = NOT_GIVEN ,
199
- modalities : List [Literal ["text" , "audio" ]] | NotGiven = NOT_GIVEN ,
203
+ ]
204
+ | NotGiven = NOT_GIVEN ,
200
205
output_audio_format : Literal ["pcm16" , "g711_ulaw" , "g711_alaw" ] | NotGiven = NOT_GIVEN ,
201
206
temperature : float | NotGiven = NOT_GIVEN ,
202
207
tool_choice : str | NotGiven = NOT_GIVEN ,
@@ -220,9 +225,9 @@ async def create(
220
225
the Realtime API.
221
226
222
227
Args:
223
- model : The Realtime model used for this session.
224
-
225
- input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw` .
228
+ input_audio_format : The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
229
+ `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
230
+ (mono), and little-endian byte order .
226
231
227
232
input_audio_transcription: Configuration for input audio transcription, defaults to off and can be set to
228
233
`null` to turn off once on. Input audio transcription is not native to the
@@ -249,7 +254,10 @@ async def create(
249
254
modalities: The set of modalities the model can respond with. To disable audio, set this to
250
255
["text"].
251
256
257
+ model: The Realtime model used for this session.
258
+
252
259
output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
260
+ For `pcm16`, output audio is sampled at a rate of 24kHz.
253
261
254
262
temperature: Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.
255
263
@@ -279,12 +287,12 @@ async def create(
279
287
"/realtime/sessions" ,
280
288
body = await async_maybe_transform (
281
289
{
282
- "model" : model ,
283
290
"input_audio_format" : input_audio_format ,
284
291
"input_audio_transcription" : input_audio_transcription ,
285
292
"instructions" : instructions ,
286
293
"max_response_output_tokens" : max_response_output_tokens ,
287
294
"modalities" : modalities ,
295
+ "model" : model ,
288
296
"output_audio_format" : output_audio_format ,
289
297
"temperature" : temperature ,
290
298
"tool_choice" : tool_choice ,
0 commit comments