Skip to content

Commit d6303be

Browse files
authored
Add example on how to read and transcribe microphone input (#88)
1 parent 986ecf9 commit d6303be

2 files changed

Lines changed: 77 additions & 2 deletions

File tree

examples/README.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# Examples
22

3-
This folder provides some examples of how the Speechmatics python client can be used to build different systems. The current examples include:
3+
This folder provides some examples of how the Speechmatics python client can be used to build different systems.
4+
Each of the examples should have a separate README with all the necessary steps to get them up and running.
5+
The current examples include:
46

57
1. [notification_flow](./notification_flow/README.md) (webhooks)
68

@@ -18,4 +20,6 @@ Demonstrates how to run a websocket server that acts as a proxy to a speechmatic
1820

1921
Tools for batch synchronising a local folder of audio into a searchable database of transcriptions.
2022

21-
Each of the examples should have a separate README with all the necessary steps to get them up and running.
23+
5. [Microphone transcription example](./transcribe_from_microphone.py)
24+
25+
This shows how the `sounddevice` python package can be used to stream audio from a microphone to Speechmatics.
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import speechmatics
2+
import speechmatics.models
3+
import speechmatics.client
4+
import speechmatics.cli
5+
import asyncio
6+
import argparse
7+
import sys
8+
import sounddevice as sd
9+
10+
11+
class RawInputStreamWrapper:
12+
def __init__(self, wrapped: sd.RawInputStream):
13+
self.wrapped: sd.RawInputStream = wrapped
14+
15+
def read(self, frames):
16+
return bytes(self.wrapped.read(frames)[0])
17+
18+
19+
async def transcribe_from_device(device, speechmatics_client, language: str, max_delay):
20+
frame_rate = 44_100
21+
with sd.RawInputStream(
22+
device=device, channels=1, samplerate=frame_rate, dtype="float32"
23+
) as stream:
24+
settings = speechmatics.models.AudioSettings(
25+
sample_rate=frame_rate,
26+
encoding="pcm_f32" + ("le" if sys.byteorder == "little" else "be"),
27+
)
28+
29+
conf = speechmatics.models.TranscriptionConfig(
30+
language=language,
31+
operating_point="enhanced",
32+
max_delay=max_delay,
33+
enable_partials=True,
34+
enable_entities=True,
35+
)
36+
await speechmatics_client.run(RawInputStreamWrapper(stream), conf, settings)
37+
38+
39+
def main(args):
40+
speechmatics_client = speechmatics.client.WebsocketClient(
41+
connection_settings_or_auth_token=args.auth_token
42+
)
43+
transcripts = speechmatics.cli.Transcripts(text="", json=[])
44+
speechmatics.cli.add_printing_handlers(speechmatics_client, transcripts)
45+
46+
asyncio.run(
47+
transcribe_from_device(
48+
args.device, speechmatics_client, args.language, args.max_delay
49+
)
50+
)
51+
52+
53+
def int_or_str(text):
54+
try:
55+
return int(text)
56+
except ValueError:
57+
return text
58+
59+
60+
if __name__ == "__main__":
61+
parser = argparse.ArgumentParser(
62+
description="Speechmatics Microphone Realtime Transcription example"
63+
)
64+
parser.add_argument(
65+
"-d", "--device", type=int_or_str, help="input device (numeric ID or substring)"
66+
)
67+
parser.add_argument("-a", "--auth_token", type=str, required=True)
68+
parser.add_argument("-l", "--language", type=str, default="en")
69+
parser.add_argument("-m", "--max_delay", type=float, default=2.0)
70+
71+
main(parser.parse_args())

0 commit comments

Comments
 (0)