Skip to content

Commit 0acd0da

Browse files
feat(api): add new realtime and audio models, realtime session options
1 parent cca0970 commit 0acd0da

File tree

10 files changed

+277
-10
lines changed

10 files changed

+277
-10
lines changed

.stats.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
configured_endpoints: 111
2-
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-2bcc845d8635bf93ddcf9ee723af4d7928248412a417bee5fc10d863a1e13867.yml
3-
openapi_spec_hash: 865230cb3abeb01bd85de05891af23c4
2+
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-04213ea42074f52b8e7e60e101ed7d7ae47b8abcc233c7e8eae310bba544454d.yml
3+
openapi_spec_hash: 5fb148608764103ba3700cd6bda4f22e
44
config_hash: ed1e6b3c5f93d12b80d31167f55c557c

src/openai/resources/beta/realtime/sessions.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,14 +54,17 @@ def create(
5454
"gpt-4o-realtime-preview",
5555
"gpt-4o-realtime-preview-2024-10-01",
5656
"gpt-4o-realtime-preview-2024-12-17",
57+
"gpt-4o-realtime-preview-2025-06-03",
5758
"gpt-4o-mini-realtime-preview",
5859
"gpt-4o-mini-realtime-preview-2024-12-17",
5960
]
6061
| NotGiven = NOT_GIVEN,
6162
output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
63+
speed: float | NotGiven = NOT_GIVEN,
6264
temperature: float | NotGiven = NOT_GIVEN,
6365
tool_choice: str | NotGiven = NOT_GIVEN,
6466
tools: Iterable[session_create_params.Tool] | NotGiven = NOT_GIVEN,
67+
tracing: session_create_params.Tracing | NotGiven = NOT_GIVEN,
6568
turn_detection: session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
6669
voice: Union[
6770
str, Literal["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"]
@@ -129,6 +132,10 @@ def create(
129132
output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
130133
For `pcm16`, output audio is sampled at a rate of 24kHz.
131134
135+
speed: The speed of the model's spoken response. 1.0 is the default speed. 0.25 is the
136+
minimum speed. 1.5 is the maximum speed. This value can only be changed in
137+
between model turns, not while a response is in progress.
138+
132139
temperature: Sampling temperature for the model, limited to [0.6, 1.2]. For audio models a
133140
temperature of 0.8 is highly recommended for best performance.
134141
@@ -137,6 +144,12 @@ def create(
137144
138145
tools: Tools (functions) available to the model.
139146
147+
tracing: Configuration options for tracing. Set to null to disable tracing. Once tracing
148+
is enabled for a session, the configuration cannot be modified.
149+
150+
`auto` will create a trace for the session with default values for the workflow
151+
name, group id, and metadata.
152+
140153
turn_detection: Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
141154
set to `null` to turn off, in which case the client must manually trigger model
142155
response. Server VAD means that the model will detect the start and end of
@@ -175,9 +188,11 @@ def create(
175188
"modalities": modalities,
176189
"model": model,
177190
"output_audio_format": output_audio_format,
191+
"speed": speed,
178192
"temperature": temperature,
179193
"tool_choice": tool_choice,
180194
"tools": tools,
195+
"tracing": tracing,
181196
"turn_detection": turn_detection,
182197
"voice": voice,
183198
},
@@ -224,14 +239,17 @@ async def create(
224239
"gpt-4o-realtime-preview",
225240
"gpt-4o-realtime-preview-2024-10-01",
226241
"gpt-4o-realtime-preview-2024-12-17",
242+
"gpt-4o-realtime-preview-2025-06-03",
227243
"gpt-4o-mini-realtime-preview",
228244
"gpt-4o-mini-realtime-preview-2024-12-17",
229245
]
230246
| NotGiven = NOT_GIVEN,
231247
output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
248+
speed: float | NotGiven = NOT_GIVEN,
232249
temperature: float | NotGiven = NOT_GIVEN,
233250
tool_choice: str | NotGiven = NOT_GIVEN,
234251
tools: Iterable[session_create_params.Tool] | NotGiven = NOT_GIVEN,
252+
tracing: session_create_params.Tracing | NotGiven = NOT_GIVEN,
235253
turn_detection: session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
236254
voice: Union[
237255
str, Literal["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"]
@@ -299,6 +317,10 @@ async def create(
299317
output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
300318
For `pcm16`, output audio is sampled at a rate of 24kHz.
301319
320+
speed: The speed of the model's spoken response. 1.0 is the default speed. 0.25 is the
321+
minimum speed. 1.5 is the maximum speed. This value can only be changed in
322+
between model turns, not while a response is in progress.
323+
302324
temperature: Sampling temperature for the model, limited to [0.6, 1.2]. For audio models a
303325
temperature of 0.8 is highly recommended for best performance.
304326
@@ -307,6 +329,12 @@ async def create(
307329
308330
tools: Tools (functions) available to the model.
309331
332+
tracing: Configuration options for tracing. Set to null to disable tracing. Once tracing
333+
is enabled for a session, the configuration cannot be modified.
334+
335+
`auto` will create a trace for the session with default values for the workflow
336+
name, group id, and metadata.
337+
310338
turn_detection: Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
311339
set to `null` to turn off, in which case the client must manually trigger model
312340
response. Server VAD means that the model will detect the start and end of
@@ -345,9 +373,11 @@ async def create(
345373
"modalities": modalities,
346374
"model": model,
347375
"output_audio_format": output_audio_format,
376+
"speed": speed,
348377
"temperature": temperature,
349378
"tool_choice": tool_choice,
350379
"tools": tools,
380+
"tracing": tracing,
351381
"turn_detection": turn_detection,
352382
"voice": voice,
353383
},

src/openai/types/beta/realtime/session.py

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,19 @@
11
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
22

33
from typing import List, Union, Optional
4-
from typing_extensions import Literal
4+
from typing_extensions import Literal, TypeAlias
55

66
from ...._models import BaseModel
77

8-
__all__ = ["Session", "InputAudioNoiseReduction", "InputAudioTranscription", "Tool", "TurnDetection"]
8+
__all__ = [
9+
"Session",
10+
"InputAudioNoiseReduction",
11+
"InputAudioTranscription",
12+
"Tool",
13+
"Tracing",
14+
"TracingUnionMember1",
15+
"TurnDetection",
16+
]
917

1018

1119
class InputAudioNoiseReduction(BaseModel):
@@ -59,6 +67,29 @@ class Tool(BaseModel):
5967
"""The type of the tool, i.e. `function`."""
6068

6169

70+
class TracingUnionMember1(BaseModel):
71+
group_id: Optional[str] = None
72+
"""
73+
The group id to attach to this trace to enable filtering and grouping in the
74+
traces dashboard.
75+
"""
76+
77+
metadata: Optional[object] = None
78+
"""
79+
The arbitrary metadata to attach to this trace to enable filtering in the traces
80+
dashboard.
81+
"""
82+
83+
workflow_name: Optional[str] = None
84+
"""The name of the workflow to attach to this trace.
85+
86+
This is used to name the trace in the traces dashboard.
87+
"""
88+
89+
90+
Tracing: TypeAlias = Union[Literal["auto"], TracingUnionMember1]
91+
92+
6293
class TurnDetection(BaseModel):
6394
create_response: Optional[bool] = None
6495
"""
@@ -175,6 +206,7 @@ class Session(BaseModel):
175206
"gpt-4o-realtime-preview",
176207
"gpt-4o-realtime-preview-2024-10-01",
177208
"gpt-4o-realtime-preview-2024-12-17",
209+
"gpt-4o-realtime-preview-2025-06-03",
178210
"gpt-4o-mini-realtime-preview",
179211
"gpt-4o-mini-realtime-preview-2024-12-17",
180212
]
@@ -188,6 +220,14 @@ class Session(BaseModel):
188220
sampled at a rate of 24kHz.
189221
"""
190222

223+
speed: Optional[float] = None
224+
"""The speed of the model's spoken response.
225+
226+
1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed.
227+
This value can only be changed in between model turns, not while a response is
228+
in progress.
229+
"""
230+
191231
temperature: Optional[float] = None
192232
"""Sampling temperature for the model, limited to [0.6, 1.2].
193233
@@ -204,6 +244,16 @@ class Session(BaseModel):
204244
tools: Optional[List[Tool]] = None
205245
"""Tools (functions) available to the model."""
206246

247+
tracing: Optional[Tracing] = None
248+
"""Configuration options for tracing.
249+
250+
Set to null to disable tracing. Once tracing is enabled for a session, the
251+
configuration cannot be modified.
252+
253+
`auto` will create a trace for the session with default values for the workflow
254+
name, group id, and metadata.
255+
"""
256+
207257
turn_detection: Optional[TurnDetection] = None
208258
"""Configuration for turn detection, ether Server VAD or Semantic VAD.
209259
@@ -227,5 +277,5 @@ class Session(BaseModel):
227277
228278
Voice cannot be changed during the session once the model has responded with
229279
audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
230-
`coral`, `echo` `sage`, `shimmer` and `verse`.
280+
`coral`, `echo`, `fable`, `onyx`, `nova`, `sage`, `shimmer`, and `verse`.
231281
"""

src/openai/types/beta/realtime/session_create_params.py

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from __future__ import annotations
44

55
from typing import List, Union, Iterable
6-
from typing_extensions import Literal, TypedDict
6+
from typing_extensions import Literal, TypeAlias, TypedDict
77

88
__all__ = [
99
"SessionCreateParams",
@@ -12,6 +12,8 @@
1212
"InputAudioNoiseReduction",
1313
"InputAudioTranscription",
1414
"Tool",
15+
"Tracing",
16+
"TracingUnionMember1",
1517
"TurnDetection",
1618
]
1719

@@ -82,6 +84,7 @@ class SessionCreateParams(TypedDict, total=False):
8284
"gpt-4o-realtime-preview",
8385
"gpt-4o-realtime-preview-2024-10-01",
8486
"gpt-4o-realtime-preview-2024-12-17",
87+
"gpt-4o-realtime-preview-2025-06-03",
8588
"gpt-4o-mini-realtime-preview",
8689
"gpt-4o-mini-realtime-preview-2024-12-17",
8790
]
@@ -94,6 +97,14 @@ class SessionCreateParams(TypedDict, total=False):
9497
sampled at a rate of 24kHz.
9598
"""
9699

100+
speed: float
101+
"""The speed of the model's spoken response.
102+
103+
1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed.
104+
This value can only be changed in between model turns, not while a response is
105+
in progress.
106+
"""
107+
97108
temperature: float
98109
"""Sampling temperature for the model, limited to [0.6, 1.2].
99110
@@ -110,6 +121,16 @@ class SessionCreateParams(TypedDict, total=False):
110121
tools: Iterable[Tool]
111122
"""Tools (functions) available to the model."""
112123

124+
tracing: Tracing
125+
"""Configuration options for tracing.
126+
127+
Set to null to disable tracing. Once tracing is enabled for a session, the
128+
configuration cannot be modified.
129+
130+
`auto` will create a trace for the session with default values for the workflow
131+
name, group id, and metadata.
132+
"""
133+
113134
turn_detection: TurnDetection
114135
"""Configuration for turn detection, ether Server VAD or Semantic VAD.
115136
@@ -205,6 +226,29 @@ class Tool(TypedDict, total=False):
205226
"""The type of the tool, i.e. `function`."""
206227

207228

229+
class TracingUnionMember1(TypedDict, total=False):
230+
group_id: str
231+
"""
232+
The group id to attach to this trace to enable filtering and grouping in the
233+
traces dashboard.
234+
"""
235+
236+
metadata: object
237+
"""
238+
The arbitrary metadata to attach to this trace to enable filtering in the traces
239+
dashboard.
240+
"""
241+
242+
workflow_name: str
243+
"""The name of the workflow to attach to this trace.
244+
245+
This is used to name the trace in the traces dashboard.
246+
"""
247+
248+
249+
Tracing: TypeAlias = Union[Literal["auto"], TracingUnionMember1]
250+
251+
208252
class TurnDetection(TypedDict, total=False):
209253
create_response: bool
210254
"""

src/openai/types/beta/realtime/session_create_response.py

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,19 @@
11
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
22

33
from typing import List, Union, Optional
4-
from typing_extensions import Literal
4+
from typing_extensions import Literal, TypeAlias
55

66
from ...._models import BaseModel
77

8-
__all__ = ["SessionCreateResponse", "ClientSecret", "InputAudioTranscription", "Tool", "TurnDetection"]
8+
__all__ = [
9+
"SessionCreateResponse",
10+
"ClientSecret",
11+
"InputAudioTranscription",
12+
"Tool",
13+
"Tracing",
14+
"TracingUnionMember1",
15+
"TurnDetection",
16+
]
917

1018

1119
class ClientSecret(BaseModel):
@@ -48,6 +56,29 @@ class Tool(BaseModel):
4856
"""The type of the tool, i.e. `function`."""
4957

5058

59+
class TracingUnionMember1(BaseModel):
60+
group_id: Optional[str] = None
61+
"""
62+
The group id to attach to this trace to enable filtering and grouping in the
63+
traces dashboard.
64+
"""
65+
66+
metadata: Optional[object] = None
67+
"""
68+
The arbitrary metadata to attach to this trace to enable filtering in the traces
69+
dashboard.
70+
"""
71+
72+
workflow_name: Optional[str] = None
73+
"""The name of the workflow to attach to this trace.
74+
75+
This is used to name the trace in the traces dashboard.
76+
"""
77+
78+
79+
Tracing: TypeAlias = Union[Literal["auto"], TracingUnionMember1]
80+
81+
5182
class TurnDetection(BaseModel):
5283
prefix_padding_ms: Optional[int] = None
5384
"""Amount of audio to include before the VAD detected speech (in milliseconds).
@@ -121,6 +152,14 @@ class SessionCreateResponse(BaseModel):
121152
output_audio_format: Optional[str] = None
122153
"""The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
123154

155+
speed: Optional[float] = None
156+
"""The speed of the model's spoken response.
157+
158+
1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed.
159+
This value can only be changed in between model turns, not while a response is
160+
in progress.
161+
"""
162+
124163
temperature: Optional[float] = None
125164
"""Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
126165

@@ -133,6 +172,16 @@ class SessionCreateResponse(BaseModel):
133172
tools: Optional[List[Tool]] = None
134173
"""Tools (functions) available to the model."""
135174

175+
tracing: Optional[Tracing] = None
176+
"""Configuration options for tracing.
177+
178+
Set to null to disable tracing. Once tracing is enabled for a session, the
179+
configuration cannot be modified.
180+
181+
`auto` will create a trace for the session with default values for the workflow
182+
name, group id, and metadata.
183+
"""
184+
136185
turn_detection: Optional[TurnDetection] = None
137186
"""Configuration for turn detection.
138187

0 commit comments

Comments
 (0)