Movatterモバイル変換


[0]ホーム

URL:



Code forHow to Convert Speech to Text in Python Tutorial


View on Github

recognizer.py

import speech_recognition as srimport sys# read filename from argumentsfilename = sys.argv[1]# initialize the recognizerr = sr.Recognizer()# open the filewith sr.AudioFile(filename) as source:    # listen for the data (load audio to memory)    audio_data = r.record(source)    # recognize (convert from speech to text)    text = r.recognize_google(audio_data)    print(text)

live_recognizer.py

import speech_recognition as srimport sys#read duration from the argumentsduration = int(sys.argv[1])# initialize the recognizerr = sr.Recognizer()print("Please talk")with sr.Microphone() as source:    # read the audio data from the default microphone    audio_data = r.record(source, duration=duration)    print("Recognizing...")    # convert speech to text    text = r.recognize_google(audio_data)    print(text)

long_audio_recognizer.py

# importing libraries import speech_recognition as sr import os from pydub import AudioSegmentfrom pydub.silence import split_on_silence# create a speech recognition objectr = sr.Recognizer()# a function to recognize speech in the audio file# so that we don't repeat ourselves in in other functionsdef transcribe_audio(path):    # use the audio file as the audio source    with sr.AudioFile(path) as source:        audio_listened = r.record(source)        # try converting it to text        text = r.recognize_google(audio_listened)    return text# a function that splits the audio file into chunks on silence# and applies speech recognitiondef get_large_audio_transcription_on_silence(path):    """Splitting the large audio file into chunks    and apply speech recognition on each of these chunks"""    # open the audio file using pydub    sound = AudioSegment.from_file(path)      # split audio sound where silence is 500 miliseconds or more and get chunks    chunks = split_on_silence(sound,        # experiment with this value for your target audio file        min_silence_len = 500,        # adjust this per requirement        silence_thresh = sound.dBFS-14,        # keep the silence for 1 second, adjustable as well        keep_silence=500,    )    folder_name = "audio-chunks"    # create a directory to store the audio chunks    if not os.path.isdir(folder_name):        os.mkdir(folder_name)    whole_text = ""    # process each chunk     for i, audio_chunk in enumerate(chunks, start=1):        # export audio chunk and save it in        # the `folder_name` directory.        chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")        audio_chunk.export(chunk_filename, format="wav")        # recognize the chunk        try:            text = transcribe_audio(chunk_filename)        except sr.UnknownValueError as e:            print("Error:", str(e))        else:            text = f"{text.capitalize()}. "            print(chunk_filename, ":", text)            whole_text += text    # return the text for all chunks detected    return whole_text# a function that splits the audio file into fixed interval chunks# and applies speech recognitiondef get_large_audio_transcription_fixed_interval(path, minutes=5):    """Splitting the large audio file into fixed interval chunks    and apply speech recognition on each of these chunks"""    # open the audio file using pydub    sound = AudioSegment.from_file(path)      # split the audio file into chunks    chunk_length_ms = int(1000 * 60 * minutes) # convert to milliseconds    chunks = [sound[i:i + chunk_length_ms] for i in range(0, len(sound), chunk_length_ms)]    folder_name = "audio-fixed-chunks"    # create a directory to store the audio chunks    if not os.path.isdir(folder_name):        os.mkdir(folder_name)    whole_text = ""    # process each chunk     for i, audio_chunk in enumerate(chunks, start=1):        # export audio chunk and save it in        # the `folder_name` directory.        chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")        audio_chunk.export(chunk_filename, format="wav")        # recognize the chunk        try:            text = transcribe_audio(chunk_filename)        except sr.UnknownValueError as e:            print("Error:", str(e))        else:            text = f"{text.capitalize()}. "            print(chunk_filename, ":", text)            whole_text += text    # return the text for all chunks detected    return whole_textif __name__ == '__main__':    import sys    # path = "30-4447-0004.wav"    # path = "7601-291468-0006.wav"    path = sys.argv[1]    print("\nFull text:", get_large_audio_transcription_on_silence(path))    print("="*50)    print("\nFull text:", get_large_audio_transcription_fixed_interval(path, minutes=1/6))

Ethical Hacking with Python EBook - Resources - Top


Join 50,000+ Python Programmers & Enthusiasts like you!



Tags

Mastering YOLO - Tutorials - Middle


New Tutorials

Popular Tutorials


Practical Python PDF Processing EBook - Tutorials - Bottom







[8]ページ先頭

©2009-2025 Movatter.jp