Skip to content

Commit 497b01b

Browse files
committedJul 4, 2020
added long speech recognition code to speech to text tutorial
1 parent 35e2998 commit 497b01b

File tree

4 files changed

+63
-1
lines changed

4 files changed

+63
-1
lines changed
 
Binary file not shown.
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# importing libraries
2+
import speech_recognition as sr
3+
import os
4+
from pydub import AudioSegment
5+
from pydub.silence import split_on_silence
6+
7+
# create a speech recognition object
8+
r = sr.Recognizer()
9+
10+
# a function that splits the audio file into chunks
11+
# and applies speech recognition
12+
def get_large_audio_transcription(path):
13+
"""
14+
Splitting the large audio file into chunks
15+
and apply speech recognition on each of these chunks
16+
"""
17+
# open the audio file using pydub
18+
sound = AudioSegment.from_wav(path)
19+
# split audio sound where silence is 700 miliseconds or more and get chunks
20+
chunks = split_on_silence(sound,
21+
# experiment with this value for your target audio file
22+
min_silence_len = 500,
23+
# adjust this per requirement
24+
silence_thresh = sound.dBFS-14,
25+
# keep the silence for 1 second, adjustable as well
26+
keep_silence=500,
27+
)
28+
folder_name = "audio-chunks"
29+
# create a directory to store the audio chunks
30+
if not os.path.isdir(folder_name):
31+
os.mkdir(folder_name)
32+
whole_text = ""
33+
# process each chunk
34+
for i, audio_chunk in enumerate(chunks, start=1):
35+
# export audio chunk and save it in
36+
# the `folder_name` directory.
37+
chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
38+
audio_chunk.export(chunk_filename, format="wav")
39+
# recognize the chunk
40+
with sr.AudioFile(chunk_filename) as source:
41+
audio_listened = r.record(source)
42+
# try converting it to text
43+
try:
44+
text = r.recognize_google(audio_listened)
45+
except sr.UnknownValueError as e:
46+
print("Error:", str(e))
47+
else:
48+
text = f"{text.capitalize()}. "
49+
print(chunk_filename, ":", text)
50+
whole_text += text
51+
# return the text for all chunks detected
52+
return whole_text
53+
54+
55+
if __name__ == '__main__':
56+
import sys
57+
# path = "30-4447-0004.wav"
58+
# path = "7601-291468-0006.wav"
59+
path = sys.argv[1]
60+
print("\nFull text:", get_large_audio_transcription(path))
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
1-
speech_recognition
1+
speech_recognition
2+
pyaudio
3+
pydub

0 commit comments

Comments
 (0)
Please sign in to comment.