Skip to content

Commit 5487ef1

Browse files
committed
[whisper] Add OpenAI API compatibility
Apply PR comments Signed-off-by: Gwendal Roulleau <[email protected]>
1 parent e404735 commit 5487ef1

File tree

2 files changed

+19
-7
lines changed

2 files changed

+19
-7
lines changed

bundles/org.openhab.voice.whisperstt/src/main/java/org/openhab/voice/whisperstt/internal/WhisperSTTService.java

+1-3
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,6 @@ public Set<AudioFormat> getSupportedFormats() {
286286
@Override
287287
public STTServiceHandle recognize(STTListener sttListener, AudioStream audioStream, Locale locale, Set<String> set)
288288
throws STTException {
289-
290289
AtomicBoolean aborted = new AtomicBoolean(false);
291290
try {
292291
logger.debug("Creating VAD instance...");
@@ -607,7 +606,6 @@ private String recognizeLocal(int audioSamplesOffset, short[] audioSamples, Stri
607606
}
608607

609608
private String recognizeAPI(int audioSamplesOffset, short[] audioStream, String language) throws STTException {
610-
611609
// convert to byte array, Each short has 2 bytes
612610
int size = audioSamplesOffset * 2;
613611
ByteBuffer byteArrayBuffer = ByteBuffer.allocate(size).order(ByteOrder.LITTLE_ENDIAN);
@@ -621,7 +619,7 @@ private String recognizeAPI(int audioSamplesOffset, short[] audioStream, String
621619

622620
try {
623621
AudioInputStream audioInputStream = new AudioInputStream(new ByteArrayInputStream(byteArray), jAudioFormat,
624-
size);
622+
audioSamplesOffset);
625623

626624
// write stream as a WAV file, in a byte array stream :
627625
ByteArrayInputStream byteArrayInputStream = null;

bundles/org.openhab.voice.whisperstt/src/main/resources/OH-INF/i18n/whisperstt.properties

+18-4
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,12 @@
33
addon.whisperstt.name = Whisper Speech-to-Text
44
addon.whisperstt.description = Whisper STT Service uses the whisper.cpp library to transcript audio data to text.
55

6+
voice.config.whisperstt.apiKey.label = API Key
7+
voice.config.whisperstt.apiKey.description = Key to access the API
8+
voice.config.whisperstt.apiModelName.label = API Model
9+
voice.config.whisperstt.apiModelName.description = Model name to use (API only). Default to OpenAI only available model (whisper-1).
10+
voice.config.whisperstt.apiUrl.label = API Url
11+
voice.config.whisperstt.apiUrl.description = OpenAI compatible API URL. Default to OpenAI transcription service.
612
voice.config.whisperstt.audioContext.label = Audio Context
713
voice.config.whisperstt.audioContext.description = Overwrite the audio context size. (0 to use whisper default context size)
814
voice.config.whisperstt.beamSize.label = Beam Size
@@ -24,27 +30,35 @@ voice.config.whisperstt.greedyBestOf.description = Best Of configuration for sam
2430
voice.config.whisperstt.group.developer.label = Developer
2531
voice.config.whisperstt.group.developer.description = Options added for developers.
2632
voice.config.whisperstt.group.grammar.label = Grammar
27-
voice.config.whisperstt.group.grammar.description = Define a grammar to improve transcrptions.
33+
voice.config.whisperstt.group.grammar.description = Define a grammar to improve transcriptions.
2834
voice.config.whisperstt.group.messages.label = Info Messages
2935
voice.config.whisperstt.group.messages.description = Configure service information messages.
36+
voice.config.whisperstt.group.openaiapi.label = API Configuration Options
37+
voice.config.whisperstt.group.openaiapi.description = Configure OpenAI compatible API, if you don't want to use the local model.
3038
voice.config.whisperstt.group.stt.label = STT Configuration
3139
voice.config.whisperstt.group.stt.description = Configure Speech to Text.
3240
voice.config.whisperstt.group.vad.label = Voice Activity Detection
33-
voice.config.whisperstt.group.vad.description = Configure the VAD mechanisim used to isolate single phrases to feed whisper with.
41+
voice.config.whisperstt.group.vad.description = Configure the VAD mechanism used to isolate single phrases to feed whisper with.
3442
voice.config.whisperstt.group.whisper.label = Whisper Options
3543
voice.config.whisperstt.group.whisper.description = Configure the whisper.cpp transcription options.
3644
voice.config.whisperstt.initSilenceSeconds.label = Initial Silence Seconds
3745
voice.config.whisperstt.initSilenceSeconds.description = Max initial seconds of silence to discard transcription.
3846
voice.config.whisperstt.initialPrompt.label = Initial Prompt
3947
voice.config.whisperstt.initialPrompt.description = Initial prompt to feed whisper with.
48+
voice.config.whisperstt.language.label = Language
49+
voice.config.whisperstt.language.description = If specified, speed up recognition by avoiding auto-detection. Default to system locale.
4050
voice.config.whisperstt.maxSeconds.label = Max Transcription Seconds
4151
voice.config.whisperstt.maxSeconds.description = Seconds to force transcription before silence detection.
4252
voice.config.whisperstt.maxSilenceSeconds.label = Max Silence Seconds
4353
voice.config.whisperstt.maxSilenceSeconds.description = Seconds of silence to trigger transcription.
4454
voice.config.whisperstt.minSeconds.label = Min Transcription Seconds
4555
voice.config.whisperstt.minSeconds.description = Min transcription seconds passed to whisper.
46-
voice.config.whisperstt.modelName.label = Model Name
47-
voice.config.whisperstt.modelName.description = Model name without extension.
56+
voice.config.whisperstt.mode.label = Local Mode Or API
57+
voice.config.whisperstt.mode.description = Use the local model or the OpenAI compatible API.
58+
voice.config.whisperstt.mode.option.LOCAL = Local
59+
voice.config.whisperstt.mode.option.API = OpenAI API
60+
voice.config.whisperstt.modelName.label = Local Model Name
61+
voice.config.whisperstt.modelName.description = Model name without extension. Local mode only.
4862
voice.config.whisperstt.openvinoDevice.label = OpenVINO Device
4963
voice.config.whisperstt.openvinoDevice.description = Initialize OpenVINO encoder. (built-in binaries do not support OpenVINO, this has no effect)
5064
voice.config.whisperstt.preloadModel.label = Preload Model

0 commit comments

Comments
 (0)