recognizer.py
import speech_recognition as srimport sys# read filename from argumentsfilename = sys.argv[1]# initialize the recognizerr = sr.Recognizer()# open the filewith sr.AudioFile(filename) as source: # listen for the data (load audio to memory) audio_data = r.record(source) # recognize (convert from speech to text) text = r.recognize_google(audio_data) print(text)live_recognizer.py
import speech_recognition as srimport sys#read duration from the argumentsduration = int(sys.argv[1])# initialize the recognizerr = sr.Recognizer()print("Please talk")with sr.Microphone() as source: # read the audio data from the default microphone audio_data = r.record(source, duration=duration) print("Recognizing...") # convert speech to text text = r.recognize_google(audio_data) print(text)long_audio_recognizer.py
# importing libraries import speech_recognition as sr import os from pydub import AudioSegmentfrom pydub.silence import split_on_silence# create a speech recognition objectr = sr.Recognizer()# a function to recognize speech in the audio file# so that we don't repeat ourselves in in other functionsdef transcribe_audio(path): # use the audio file as the audio source with sr.AudioFile(path) as source: audio_listened = r.record(source) # try converting it to text text = r.recognize_google(audio_listened) return text# a function that splits the audio file into chunks on silence# and applies speech recognitiondef get_large_audio_transcription_on_silence(path): """Splitting the large audio file into chunks and apply speech recognition on each of these chunks""" # open the audio file using pydub sound = AudioSegment.from_file(path) # split audio sound where silence is 500 miliseconds or more and get chunks chunks = split_on_silence(sound, # experiment with this value for your target audio file min_silence_len = 500, # adjust this per requirement silence_thresh = sound.dBFS-14, # keep the silence for 1 second, adjustable as well keep_silence=500, ) folder_name = "audio-chunks" # create a directory to store the audio chunks if not os.path.isdir(folder_name): os.mkdir(folder_name) whole_text = "" # process each chunk for i, audio_chunk in enumerate(chunks, start=1): # export audio chunk and save it in # the `folder_name` directory. chunk_filename = os.path.join(folder_name, f"chunk{i}.wav") audio_chunk.export(chunk_filename, format="wav") # recognize the chunk try: text = transcribe_audio(chunk_filename) except sr.UnknownValueError as e: print("Error:", str(e)) else: text = f"{text.capitalize()}. " print(chunk_filename, ":", text) whole_text += text # return the text for all chunks detected return whole_text# a function that splits the audio file into fixed interval chunks# and applies speech recognitiondef get_large_audio_transcription_fixed_interval(path, minutes=5): """Splitting the large audio file into fixed interval chunks and apply speech recognition on each of these chunks""" # open the audio file using pydub sound = AudioSegment.from_file(path) # split the audio file into chunks chunk_length_ms = int(1000 * 60 * minutes) # convert to milliseconds chunks = [sound[i:i + chunk_length_ms] for i in range(0, len(sound), chunk_length_ms)] folder_name = "audio-fixed-chunks" # create a directory to store the audio chunks if not os.path.isdir(folder_name): os.mkdir(folder_name) whole_text = "" # process each chunk for i, audio_chunk in enumerate(chunks, start=1): # export audio chunk and save it in # the `folder_name` directory. chunk_filename = os.path.join(folder_name, f"chunk{i}.wav") audio_chunk.export(chunk_filename, format="wav") # recognize the chunk try: text = transcribe_audio(chunk_filename) except sr.UnknownValueError as e: print("Error:", str(e)) else: text = f"{text.capitalize()}. " print(chunk_filename, ":", text) whole_text += text # return the text for all chunks detected return whole_textif __name__ == '__main__': import sys # path = "30-4447-0004.wav" # path = "7601-291468-0006.wav" path = sys.argv[1] print("\nFull text:", get_large_audio_transcription_on_silence(path)) print("="*50) print("\nFull text:", get_large_audio_transcription_fixed_interval(path, minutes=1/6))
