Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit497b01b

Browse files
committed
added long speech recognition code to speech to text tutorial
1 parent35e2998 commit497b01b

File tree

4 files changed

+63
-1
lines changed

4 files changed

+63
-1
lines changed
Binary file not shown.
Binary file not shown.
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# importing libraries
2+
importspeech_recognitionassr
3+
importos
4+
frompydubimportAudioSegment
5+
frompydub.silenceimportsplit_on_silence
6+
7+
# create a speech recognition object
8+
r=sr.Recognizer()
9+
10+
# a function that splits the audio file into chunks
11+
# and applies speech recognition
12+
defget_large_audio_transcription(path):
13+
"""
14+
Splitting the large audio file into chunks
15+
and apply speech recognition on each of these chunks
16+
"""
17+
# open the audio file using pydub
18+
sound=AudioSegment.from_wav(path)
19+
# split audio sound where silence is 700 miliseconds or more and get chunks
20+
chunks=split_on_silence(sound,
21+
# experiment with this value for your target audio file
22+
min_silence_len=500,
23+
# adjust this per requirement
24+
silence_thresh=sound.dBFS-14,
25+
# keep the silence for 1 second, adjustable as well
26+
keep_silence=500,
27+
)
28+
folder_name="audio-chunks"
29+
# create a directory to store the audio chunks
30+
ifnotos.path.isdir(folder_name):
31+
os.mkdir(folder_name)
32+
whole_text=""
33+
# process each chunk
34+
fori,audio_chunkinenumerate(chunks,start=1):
35+
# export audio chunk and save it in
36+
# the `folder_name` directory.
37+
chunk_filename=os.path.join(folder_name,f"chunk{i}.wav")
38+
audio_chunk.export(chunk_filename,format="wav")
39+
# recognize the chunk
40+
withsr.AudioFile(chunk_filename)assource:
41+
audio_listened=r.record(source)
42+
# try converting it to text
43+
try:
44+
text=r.recognize_google(audio_listened)
45+
exceptsr.UnknownValueErrorase:
46+
print("Error:",str(e))
47+
else:
48+
text=f"{text.capitalize()}. "
49+
print(chunk_filename,":",text)
50+
whole_text+=text
51+
# return the text for all chunks detected
52+
returnwhole_text
53+
54+
55+
if__name__=='__main__':
56+
importsys
57+
# path = "30-4447-0004.wav"
58+
# path = "7601-291468-0006.wav"
59+
path=sys.argv[1]
60+
print("\nFull text:",get_large_audio_transcription(path))
Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
1-
speech_recognition
1+
speech_recognition
2+
pyaudio
3+
pydub

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp