Skip to content

Commit 58c5b03

Browse files
authored
Merge pull request tjardoo#45 from tjardoo/audio-stream
Audio speech stream support
2 parents caadca4 + 79689f9 commit 58c5b03

File tree

10 files changed

+159
-1
lines changed

10 files changed

+159
-1
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,9 @@ Learn how to turn audio into text or text into audio.
425425

426426
Generates audio from the input text.
427427

428+
> [!NOTE]
429+
> This endpoint also has `stream` support. See the [examples/audio/create_speech_stream](https://github.com/tjardoo/openai-client/tree/master/examples/audio/create_speech_stream) example.
430+
428431
```rust
429432
use openai_dive::v1::api::Client;
430433
use openai_dive::v1::resources::audio::{

examples/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ members = [
44
"audio/create_transcription",
55
"audio/create_translation",
66
"audio/create_speech",
7+
"audio/create_speech_stream",
78
"chat/create_chat_completion",
89
"chat/create_chat_completion_stream",
910
"chat/create_image_chat_completion",

examples/audio/Cargo.toml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
11
[workspace]
22
resolver = "2"
3-
members = ["create_transcription", "create_translation", "create_speech"]
3+
members = [
4+
"create_transcription",
5+
"create_translation",
6+
"create_speech",
7+
"create_speech_stream",
8+
]
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
[package]
2+
name = "create_speech_stream"
3+
version = "0.1.0"
4+
edition = "2021"
5+
publish = false
6+
7+
[dependencies]
8+
openai_dive = { path = "./../../../../openai-client", features = ["stream"] }
9+
tokio = { version = "1.0", features = ["full"] }
10+
futures = "0.3"
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
*
2+
!.gitignore
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
use futures::future;
2+
use futures::stream::StreamExt;
3+
use openai_dive::v1::api::Client;
4+
use openai_dive::v1::resources::audio::{
5+
AudioSpeechParameters, AudioSpeechResponseFormat, AudioVoice,
6+
};
7+
use std::env;
8+
use std::fs::File;
9+
use std::io::Write;
10+
11+
#[tokio::main]
12+
async fn main() {
13+
let api_key = env::var("OPENAI_API_KEY").expect("$OPENAI_API_KEY is not set");
14+
15+
let client = Client::new(api_key);
16+
17+
let parameters = AudioSpeechParameters {
18+
model: "tts-1".to_string(),
19+
input: "The quick brown fox jumped over the lazy dog.".to_string(),
20+
voice: AudioVoice::Alloy,
21+
response_format: Some(AudioSpeechResponseFormat::Mp3),
22+
speed: Some(1.0),
23+
};
24+
25+
let mut file = File::create("./files/example-stream.mp3").unwrap();
26+
27+
let stream = client
28+
.audio()
29+
.create_speech_stream(parameters)
30+
.await
31+
.unwrap();
32+
33+
stream
34+
.for_each(|chunk| {
35+
match chunk {
36+
Ok(chunk) => {
37+
println!("Received chunk of {} bytes", chunk.bytes.len());
38+
39+
file.write(&chunk.bytes).unwrap();
40+
}
41+
Err(error) => println!("Steam error: {:?}", error),
42+
}
43+
44+
future::ready(())
45+
})
46+
.await;
47+
}

src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,9 @@
418418
//!
419419
//! Generates audio from the input text.
420420
//!
421+
//! > [!NOTE]
422+
//! > This endpoint also has `stream` support. See the [examples/audio/create_speech_stream](https://github.com/tjardoo/openai-client/tree/master/examples/audio/create_speech_stream) example.
423+
//!
421424
//! ```rust
422425
//! use openai_dive::v1::api::Client;
423426
//! use openai_dive::v1::resources::audio::{

src/v1/api.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,30 @@ impl Client {
183183
Client::process_stream::<O>(event_source).await
184184
}
185185

186+
#[cfg(feature = "stream")]
187+
pub async fn post_stream_raw<I>(
188+
&self,
189+
path: &str,
190+
parameters: &I,
191+
) -> Result<Pin<Box<dyn Stream<Item = Result<Bytes, APIError>> + Send>>, APIError>
192+
where
193+
I: Serialize,
194+
{
195+
let stream = self
196+
.build_request(Method::POST, path)
197+
.json(&parameters)
198+
.send()
199+
.await
200+
.unwrap()
201+
.bytes_stream()
202+
.map(|item| item.map_err(|error| APIError::StreamError(error.to_string())));
203+
204+
Ok(Box::pin(stream)
205+
as Pin<
206+
Box<dyn Stream<Item = Result<Bytes, APIError>> + Send>,
207+
>)
208+
}
209+
186210
#[cfg(feature = "stream")]
187211
pub async fn process_stream<O>(
188212
mut event_soure: EventSource,

src/v1/endpoints/audio.rs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,15 @@ use crate::v1::error::APIError;
33
use crate::v1::helpers::file_from_disk_to_form_part;
44
use crate::v1::resources::audio::AudioSpeechParameters;
55
use crate::v1::resources::audio::AudioSpeechResponse;
6+
#[cfg(feature = "stream")]
7+
use crate::v1::resources::audio::AudioSpeechResponseChunkResponse;
68
use crate::v1::resources::audio::{AudioTranscriptionParameters, AudioTranslationParameters};
9+
#[cfg(feature = "stream")]
10+
use futures::Stream;
11+
#[cfg(feature = "stream")]
12+
use futures::StreamExt;
13+
#[cfg(feature = "stream")]
14+
use std::pin::Pin;
715

816
pub struct Audio<'a> {
917
pub client: &'a Client,
@@ -94,4 +102,35 @@ impl Audio<'_> {
94102

95103
Ok(response)
96104
}
105+
106+
#[cfg(feature = "stream")]
107+
/// Generates audio from the input text.
108+
pub async fn create_speech_stream(
109+
&self,
110+
parameters: AudioSpeechParameters,
111+
) -> Result<
112+
Pin<Box<dyn Stream<Item = Result<AudioSpeechResponseChunkResponse, APIError>> + Send>>,
113+
APIError,
114+
> {
115+
use crate::v1::resources::audio::StreamAudioSpeechParameters;
116+
117+
let stream_parameters = StreamAudioSpeechParameters {
118+
model: parameters.model,
119+
input: parameters.input,
120+
voice: parameters.voice,
121+
response_format: parameters.response_format,
122+
speed: parameters.speed,
123+
stream: true,
124+
};
125+
126+
let stream = Box::pin(
127+
self.client
128+
.post_stream_raw("/audio/speech", &stream_parameters)
129+
.await
130+
.unwrap()
131+
.map(|item| item.map(|bytes| AudioSpeechResponseChunkResponse { bytes })),
132+
);
133+
134+
Ok(stream)
135+
}
97136
}

src/v1/resources/audio.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,30 @@ pub struct AudioSpeechResponse {
6565
pub bytes: Bytes,
6666
}
6767

68+
#[cfg(feature = "stream")]
69+
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
70+
pub struct StreamAudioSpeechParameters {
71+
/// One of the available TTS models: tts-1 or tts-1-hd.
72+
pub model: String,
73+
/// The text to generate audio for. The maximum length is 4096 characters.
74+
pub input: String,
75+
/// The voice to use when generating the audio. Supported voices are alloy, echo, fable, onyx, nova, and shimmer.
76+
pub voice: AudioVoice,
77+
/// The format to audio in. Supported formats are mp3, opus, aac, and flac.
78+
#[serde(skip_serializing_if = "Option::is_none")]
79+
pub response_format: Option<AudioSpeechResponseFormat>,
80+
/// The speed of the generated audio. Select a value from 0.25 to 4.0. 1.0 is the default.
81+
#[serde(skip_serializing_if = "Option::is_none")]
82+
pub speed: Option<f32>,
83+
pub stream: bool,
84+
}
85+
86+
#[cfg(feature = "stream")]
87+
#[derive(Debug, Clone, PartialEq)]
88+
pub struct AudioSpeechResponseChunkResponse {
89+
pub bytes: Bytes,
90+
}
91+
6892
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
6993
#[serde(rename_all = "lowercase")]
7094
pub enum AudioOutputFormat {

0 commit comments

Comments
 (0)