1
+ # importing libraries
2
+ import speech_recognition as sr
3
+ import os
4
+ from pydub import AudioSegment
5
+ from pydub .silence import split_on_silence
6
+
7
+ # create a speech recognition object
8
+ r = sr .Recognizer ()
9
+
10
+ # a function that splits the audio file into chunks
11
+ # and applies speech recognition
12
+ def get_large_audio_transcription (path ):
13
+ """
14
+ Splitting the large audio file into chunks
15
+ and apply speech recognition on each of these chunks
16
+ """
17
+ # open the audio file using pydub
18
+ sound = AudioSegment .from_wav (path )
19
+ # split audio sound where silence is 700 miliseconds or more and get chunks
20
+ chunks = split_on_silence (sound ,
21
+ # experiment with this value for your target audio file
22
+ min_silence_len = 500 ,
23
+ # adjust this per requirement
24
+ silence_thresh = sound .dBFS - 14 ,
25
+ # keep the silence for 1 second, adjustable as well
26
+ keep_silence = 500 ,
27
+ )
28
+ folder_name = "audio-chunks"
29
+ # create a directory to store the audio chunks
30
+ if not os .path .isdir (folder_name ):
31
+ os .mkdir (folder_name )
32
+ whole_text = ""
33
+ # process each chunk
34
+ for i , audio_chunk in enumerate (chunks , start = 1 ):
35
+ # export audio chunk and save it in
36
+ # the `folder_name` directory.
37
+ chunk_filename = os .path .join (folder_name , f"chunk{ i } .wav" )
38
+ audio_chunk .export (chunk_filename , format = "wav" )
39
+ # recognize the chunk
40
+ with sr .AudioFile (chunk_filename ) as source :
41
+ audio_listened = r .record (source )
42
+ # try converting it to text
43
+ try :
44
+ text = r .recognize_google (audio_listened )
45
+ except sr .UnknownValueError as e :
46
+ print ("Error:" , str (e ))
47
+ else :
48
+ text = f"{ text .capitalize ()} . "
49
+ print (chunk_filename , ":" , text )
50
+ whole_text += text
51
+ # return the text for all chunks detected
52
+ return whole_text
53
+
54
+
55
+ if __name__ == '__main__' :
56
+ import sys
57
+ # path = "30-4447-0004.wav"
58
+ # path = "7601-291468-0006.wav"
59
+ path = sys .argv [1 ]
60
+ print ("\n Full text:" , get_large_audio_transcription (path ))
0 commit comments