Skip to content

Commit 6c79d93

Browse files
feat(stt): remove interimResults and lowLatency wss params
1 parent 8f19640 commit 6c79d93

File tree

5 files changed

+22
-28
lines changed

5 files changed

+22
-28
lines changed

lib/recognize-stream.ts

+2-6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/**
2-
* (C) Copyright IBM Corp. 2014, 2020.
2+
* (C) Copyright IBM Corp. 2014, 2024.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -62,7 +62,7 @@ class RecognizeStream extends Duplex {
6262
*
6363
* Uses WebSockets under the hood. For audio with no recognizable speech, no `data` events are emitted.
6464
*
65-
* By default, only finalized text is emitted in the data events, however when `objectMode`/`readableObjectMode` and `interim_results` are enabled, both interim and final results objects are emitted.
65+
* By default, only finalized text is emitted in the data events, however when `objectMode`/`readableObjectMode` is enabled, both interim and final results objects are emitted.
6666
* WriteableElementStream uses this, for example, to live-update the DOM with word-by-word transcriptions.
6767
*
6868
* Note that the WebSocket connection is not established until the first chunk of data is recieved. This allows for auto-detection of content type (for wav/flac/opus audio).
@@ -86,7 +86,6 @@ class RecognizeStream extends Duplex {
8686
* @param {string} [options.contentType] - The format (MIME type) of the audio
8787
* @param {number} [options.customizationWeight] - Tell the service how much weight to give to words from the custom language model compared to those from the base model for the current request
8888
* @param {number} [options.inactivityTimeout] - The time in seconds after which, if only silence (no speech) is detected in the audio, the connection is closed (default=30)
89-
* @param {boolean} [options.interimResults] - If true, the service returns interim results as a stream of JSON SpeechRecognitionResults objects (default=false)
9089
* @param {string[]} [options.keywords] - An array of keyword strings to spot in the audio
9190
* @param {number} [options.keywordsThreshold] - A confidence value that is the lower bound for spotting a keyword
9291
* @param {number} [options.maxAlternatives] - The maximum number of alternative transcripts that the service is to return (default=1)
@@ -105,7 +104,6 @@ class RecognizeStream extends Duplex {
105104
* @param {boolean} [options.splitTranscriptAtPhraseEnd] - If `true`, directs the service to split the transcript into multiple final results based on semantic features of the input
106105
* @param {number} [options.speechDetectorSensitivity] - The sensitivity of speech activity detection that the service is to perform
107106
* @param {number} [options.backgroundAudioSuppression] - The level to which the service is to suppress background audio based on its volume to prevent it from being transcribed as speech
108-
* @param {boolean} [params.lowLatency] - If `true` for next-generation `Multimedia` and `Telephony` models that support low latency, directs the service to produce results even more quickly than it usually does
109107
* @constructor
110108
*/
111109
constructor(options: RecognizeStream.Options) {
@@ -168,7 +166,6 @@ class RecognizeStream extends Duplex {
168166
'timestamps',
169167
'word_confidence',
170168
'content-type',
171-
'interim_results',
172169
'keywords',
173170
'keywords_threshold',
174171
'max_alternatives',
@@ -182,7 +179,6 @@ class RecognizeStream extends Duplex {
182179
'split_transcript_at_phrase_end',
183180
'speech_detector_sensitivity',
184181
'background_audio_suppression',
185-
'low_latency',
186182
];
187183
const openingMessage = processUserParameters(options, openingMessageParamsAllowed);
188184
openingMessage.action = 'start';

package-lock.json

+17-17
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

speech-to-text/v1-generated.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -6602,13 +6602,13 @@ namespace SpeechToTextV1 {
66026602
* elements: the word followed by its start and end time in seconds, for example:
66036603
* `[["hello",0.0,1.2],["world",1.2,2.5]]`. Timestamps are returned only for the best alternative.
66046604
*/
6605-
timestamps?: string[];
6605+
timestamps?: [string, number, number][];
66066606
/** A confidence score for each word of the transcript as a list of lists. Each inner list consists of two
66076607
* elements: the word and its confidence score in the range of 0.0 to 1.0, for example:
66086608
* `[["hello",0.95],["world",0.86]]`. Confidence scores are returned only for the best alternative and only with
66096609
* results marked as final.
66106610
*/
6611-
word_confidence?: string[];
6611+
word_confidence?: [string, number][];
66126612
}
66136613

66146614
/** Component results for a speech recognition request. */

speech-to-text/v1.ts

-2
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,6 @@ namespace SpeechToTextV1 {
266266
contentType?: string;
267267
customizationWeight?: number;
268268
inactivityTimeout?: number;
269-
interimResults?: boolean;
270269
keywords?: string[];
271270
keywordsThreshold?: number;
272271
maxAlternatives?: number;
@@ -286,7 +285,6 @@ namespace SpeechToTextV1 {
286285
splitTranscriptAtPhraseEnd?: boolean;
287286
speechDetectorSensitivity?: number;
288287
backgroundAudioSuppression?: number;
289-
lowLatency?: boolean;
290288
characterInsertionBias?: number;
291289
}
292290
}

test/unit/speech-to-text.v1.test.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/**
2-
* (C) Copyright IBM Corp. 2024.
2+
* (C) Copyright IBM Corp. 2018, 2024.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.

0 commit comments

Comments
 (0)