Code forHow to Convert Speech to Text in Python Tutorial

recognizer.py

import speech_recognition as srimport sys# read filename from argumentsfilename = sys.argv[1]# initialize the recognizerr = sr.Recognizer()# open the filewith sr.AudioFile(filename) as source:    # listen for the data (load audio to memory)    audio_data = r.record(source)    # recognize (convert from speech to text)    text = r.recognize_google(audio_data)    print(text)

live_recognizer.py

import speech_recognition as srimport sys#read duration from the argumentsduration = int(sys.argv[1])# initialize the recognizerr = sr.Recognizer()print("Please talk")with sr.Microphone() as source:    # read the audio data from the default microphone    audio_data = r.record(source, duration=duration)    print("Recognizing...")    # convert speech to text    text = r.recognize_google(audio_data)    print(text)

long_audio_recognizer.py

# importing libraries import speech_recognition as sr import os from pydub import AudioSegmentfrom pydub.silence import split_on_silence# create a speech recognition objectr = sr.Recognizer()# a function to recognize speech in the audio file# so that we don't repeat ourselves in in other functionsdef transcribe_audio(path):    # use the audio file as the audio source    with sr.AudioFile(path) as source:        audio_listened = r.record(source)        # try converting it to text        text = r.recognize_google(audio_listened)    return text# a function that splits the audio file into chunks on silence# and applies speech recognitiondef get_large_audio_transcription_on_silence(path):    """Splitting the large audio file into chunks    and apply speech recognition on each of these chunks"""    # open the audio file using pydub    sound = AudioSegment.from_file(path)      # split audio sound where silence is 500 miliseconds or more and get chunks    chunks = split_on_silence(sound,        # experiment with this value for your target audio file        min_silence_len = 500,        # adjust this per requirement        silence_thresh = sound.dBFS-14,        # keep the silence for 1 second, adjustable as well        keep_silence=500,    )    folder_name = "audio-chunks"    # create a directory to store the audio chunks    if not os.path.isdir(folder_name):        os.mkdir(folder_name)    whole_text = ""    # process each chunk     for i, audio_chunk in enumerate(chunks, start=1):        # export audio chunk and save it in        # the `folder_name` directory.        chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")        audio_chunk.export(chunk_filename, format="wav")        # recognize the chunk        try:            text = transcribe_audio(chunk_filename)        except sr.UnknownValueError as e:            print("Error:", str(e))        else:            text = f"{text.capitalize()}. "            print(chunk_filename, ":", text)            whole_text += text    # return the text for all chunks detected    return whole_text# a function that splits the audio file into fixed interval chunks# and applies speech recognitiondef get_large_audio_transcription_fixed_interval(path, minutes=5):    """Splitting the large audio file into fixed interval chunks    and apply speech recognition on each of these chunks"""    # open the audio file using pydub    sound = AudioSegment.from_file(path)      # split the audio file into chunks    chunk_length_ms = int(1000 * 60 * minutes) # convert to milliseconds    chunks = [sound[i:i + chunk_length_ms] for i in range(0, len(sound), chunk_length_ms)]    folder_name = "audio-fixed-chunks"    # create a directory to store the audio chunks    if not os.path.isdir(folder_name):        os.mkdir(folder_name)    whole_text = ""    # process each chunk     for i, audio_chunk in enumerate(chunks, start=1):        # export audio chunk and save it in        # the `folder_name` directory.        chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")        audio_chunk.export(chunk_filename, format="wav")        # recognize the chunk        try:            text = transcribe_audio(chunk_filename)        except sr.UnknownValueError as e:            print("Error:", str(e))        else:            text = f"{text.capitalize()}. "            print(chunk_filename, ":", text)            whole_text += text    # return the text for all chunks detected    return whole_textif __name__ == '__main__':    import sys    # path = "30-4447-0004.wav"    # path = "7601-291468-0006.wav"    path = sys.argv[1]    print("\nFull text:", get_large_audio_transcription_on_silence(path))    print("="*50)    print("\nFull text:", get_large_audio_transcription_fixed_interval(path, minutes=1/6))

Ethical Hacking with Python EBook - Resources - Top

New Tutorials

Build an MCP Server in Python with FastMCP

Building an AI-Driven HTTP Security Headers Analyzer with Python

Building an Advanced Audiobook Generator with Python and ElevenLabs TTS

Building a Full-Stack RAG Chatbot with FastAPI, OpenAI, and Streamlit

How to Recover Deleted Files with Python

Movatterモバイル変換

Code forHow to Convert Speech to Text in Python Tutorial

Tags

New Tutorials

Popular Tutorials