Skip to content

Commit 7dc4cc2

Browse files
committed
Add support for additional voice instructions and stream formats in speech settings
1 parent 10dc1c8 commit 7dc4cc2

File tree

2 files changed

+29
-5
lines changed

2 files changed

+29
-5
lines changed

openai-client/src/main/scala/io/cequence/openaiscala/service/impl/OpenAIServiceImpl.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -376,8 +376,10 @@ private[service] trait OpenAIServiceImpl
376376
Param.input -> Some(input),
377377
Param.model -> Some(settings.model),
378378
Param.voice -> Some(settings.voice.toString),
379+
Param.instructions -> settings.instructions,
379380
Param.speed -> settings.speed,
380-
Param.response_format -> settings.response_format.map(_.toString)
381+
Param.response_format -> settings.response_format.map(_.toString),
382+
Param.stream_format -> settings.stream_format.map(_.toString)
381383
)
382384
).map(_.source)
383385

openai-core/src/main/scala/io/cequence/openaiscala/domain/settings/CreateSpeechSettings.scala

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,37 +3,59 @@ package io.cequence.openaiscala.domain.settings
33
import io.cequence.wsclient.domain.EnumValue
44

55
case class CreateSpeechSettings(
6-
// One of the available TTS models: tts-1 or tts-1-hd
6+
// One of the available TTS models: tts-1, tts-1-hd or gpt-4o-mini-tts.
77
model: String,
88

9-
// The voice to use when generating the audio. Supported voices are alloy, echo, fable, onyx, nova, and shimmer.
9+
// The voice to use when generating the audio. Supported voices are alloy, ash, ballad, coral, echo, fable, onyx, nova, sage, shimmer, and verse.
1010
voice: VoiceType,
1111

12-
// The format to audio in. Supported formats are mp3, opus, aac, and flac.
12+
// Control the voice of your generated audio with additional instructions. Does not work with tts-1 or tts-1-hd.
13+
instructions: Option[String] = None,
14+
15+
// The format to audio in. Supported formats are mp3, opus, aac, flac, wav, and pcm.
1316
// Defaults to mp3.
1417
response_format: Option[SpeechResponseFormatType] = None,
1518

1619
// The speed of the generated audio. Select a value from 0.25 to 4.0.
1720
// Defaults to 1.0.
18-
speed: Option[Double] = None
21+
speed: Option[Double] = None,
22+
23+
// The format to stream the audio in. Supported formats are sse and audio.
24+
// sse is not supported for tts-1 or tts-1-hd.
25+
// Defaults to audio.
26+
stream_format: Option[StreamFormatType] = None
1927
)
2028

29+
sealed trait StreamFormatType extends EnumValue
30+
31+
object StreamFormatType {
32+
case object sse extends StreamFormatType
33+
case object audio extends StreamFormatType
34+
}
35+
2136
sealed trait SpeechResponseFormatType extends EnumValue
2237

2338
object SpeechResponseFormatType {
2439
case object mp3 extends SpeechResponseFormatType
2540
case object opus extends SpeechResponseFormatType
2641
case object aac extends SpeechResponseFormatType
2742
case object flac extends SpeechResponseFormatType
43+
case object wav extends SpeechResponseFormatType
44+
case object pcm extends SpeechResponseFormatType
2845
}
2946

3047
sealed trait VoiceType extends EnumValue
3148

3249
object VoiceType {
3350
case object alloy extends VoiceType
51+
case object ash extends VoiceType
52+
case object ballad extends VoiceType
53+
case object coral extends VoiceType
3454
case object echo extends VoiceType
3555
case object fable extends VoiceType
3656
case object onyx extends VoiceType
3757
case object nova extends VoiceType
58+
case object sage extends VoiceType
3859
case object shimmer extends VoiceType
60+
case object verse extends VoiceType
3961
}

0 commit comments

Comments
 (0)