python - 在python中使用Azure语音服务读取音频文件并转换为文本,但只有第一句话被转换为语音
问题描述
下面是代码,
import json
import os
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
import azure.cognitiveservices.speech as speechsdk
def main(filename):
container_name="test-container"
print(filename)
blob_service_client = BlobServiceClient.from_connection_string("DefaultEndpoint")
container_client=blob_service_client.get_container_client(container_name)
blob_client = container_client.get_blob_client(filename)
with open(filename, "wb") as f:
data = blob_client.download_blob()
data.readinto(f)
speech_key, service_region = "1234567", "eastus"
speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
audio_input = speechsdk.audio.AudioConfig(filename=filename)
print("Audio Input:-",audio_input)
speech_config.speech_recognition_language="en-US"
speech_config.request_word_level_timestamps()
speech_config.enable_dictation()
speech_config.output_format = speechsdk.OutputFormat(1)
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input)
print("speech_recognizer:-",speech_recognizer)
#result = speech_recognizer.recognize_once()
all_results = []
def handle_final_result(evt):
all_results.append(evt.result.text)
done = False
def stop_cb(evt):
#print('CLOSING on {}'.format(evt))
speech_recognizer.stop_continuous_recognition()
global done
done= True
#Appends the recognized text to the all_results variable.
speech_recognizer.recognized.connect(handle_final_result)
speech_recognizer.session_stopped.connect(stop_cb)
speech_recognizer.canceled.connect(stop_cb)
speech_recognizer.start_continuous_recognition()
#while not done:
#time.sleep(.5)
print("Printing all results from speech to text:")
print(all_results)
main(filename="test.wav")
从主函数调用时出错,
test.wav
Audio Input:- <azure.cognitiveservices.speech.audio.AudioConfig object at 0x00000204D72F4E88>
speech_recognizer:- <azure.cognitiveservices.speech.SpeechRecognizer object at 0x00000204D7065148>
[]
预期输出(不使用主函数的输出)
test.wav
Audio Input:- <azure.cognitiveservices.speech.audio.AudioConfig object at 0x00000204D72F4E88>
speech_recognizer:- <azure.cognitiveservices.speech.SpeechRecognizer object at 0x00000204D7065148>
Printing all results from speech to text:
['hi', '', '', 'Uh.', 'A good laugh.', '1487', "OK, OK, I think that's enough.", '']
如果我们不使用 main 函数,现有代码可以完美运行,但是当我使用 main 函数调用它时,我没有得到所需的输出。请在缺少的部分指导我们。
解决方案
如本文中所述, recognize_once_async() (您正在使用的方法) - 此方法只会从检测到的语音开始到下一次暂停的输入中检测到已识别的话语。
据我了解,如果您使用start_continuous_recognition()将满足您的要求。 start 函数将启动并继续处理所有话语,直到您调用 stop 函数。
此方法有很多与之相关的事件,当语音识别过程发生时会触发“已识别”事件。您需要有一个事件处理程序来处理识别和提取文本。您可以参考此处的文章以获取更多信息。
分享一个使用start_continuous_recognition()将音频转换为文本的示例片段。
import azure.cognitiveservices.speech as speechsdk
import time
import datetime
# Creates an instance of a speech config with specified subscription key and service region.
# Replace with your own subscription key and region identifier from here: https://aka.ms/speech/sdkregion
speech_key, service_region = "YOURSUBSCRIPTIONKEY", "YOURREGION"
speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
# Creates an audio configuration that points to an audio file.
# Replace with your own audio filename.
audio_filename = "sample.wav"
audio_input = speechsdk.audio.AudioConfig(filename=audio_filename)
# Creates a recognizer with the given settings
speech_config.speech_recognition_language="en-US"
speech_config.request_word_level_timestamps()
speech_config.enable_dictation()
speech_config.output_format = speechsdk.OutputFormat(1)
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input)
#result = speech_recognizer.recognize_once()
all_results = []
#https://docs.microsoft.com/en-us/python/api/azure-cognitiveservices-speech/azure.cognitiveservices.speech.recognitionresult?view=azure-python
def handle_final_result(evt):
all_results.append(evt.result.text)
done = False
def stop_cb(evt):
print('CLOSING on {}'.format(evt))
speech_recognizer.stop_continuous_recognition()
global done
done= True
#Appends the recognized text to the all_results variable.
speech_recognizer.recognized.connect(handle_final_result)
#Connect callbacks to the events fired by the speech recognizer & displays the info/status
#Ref:https://docs.microsoft.com/en-us/python/api/azure-cognitiveservices-speech/azure.cognitiveservices.speech.eventsignal?view=azure-python
speech_recognizer.recognizing.connect(lambda evt: print('RECOGNIZING: {}'.format(evt)))
speech_recognizer.recognized.connect(lambda evt: print('RECOGNIZED: {}'.format(evt)))
speech_recognizer.session_started.connect(lambda evt: print('SESSION STARTED: {}'.format(evt)))
speech_recognizer.session_stopped.connect(lambda evt: print('SESSION STOPPED {}'.format(evt)))
speech_recognizer.canceled.connect(lambda evt: print('CANCELED {}'.format(evt)))
# stop continuous recognition on either session stopped or canceled events
speech_recognizer.session_stopped.connect(stop_cb)
speech_recognizer.canceled.connect(stop_cb)
speech_recognizer.start_continuous_recognition()
while not done:
time.sleep(.5)
print("Printing all results:")
print(all_results)
通过函数调用相同的
封装在一个函数中并尝试调用它。
只是调整了一些并封装在一个函数中。确保变量“完成”在非本地访问。请检查并告诉我
import azure.cognitiveservices.speech as speechsdk
import time
import datetime
def speech_to_text():
# Creates an instance of a speech config with specified subscription key and service region.
# Replace with your own subscription key and region identifier from here: https://aka.ms/speech/sdkregion
speech_key, service_region = "<>", "<>"
speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
# Creates an audio configuration that points to an audio file.
# Replace with your own audio filename.
audio_filename = "whatstheweatherlike.wav"
audio_input = speechsdk.audio.AudioConfig(filename=audio_filename)
# Creates a recognizer with the given settings
speech_config.speech_recognition_language="en-US"
speech_config.request_word_level_timestamps()
speech_config.enable_dictation()
speech_config.output_format = speechsdk.OutputFormat(1)
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input)
#result = speech_recognizer.recognize_once()
all_results = []
#https://docs.microsoft.com/en-us/python/api/azure-cognitiveservices-speech/azure.cognitiveservices.speech.recognitionresult?view=azure-python
def handle_final_result(evt):
all_results.append(evt.result.text)
done = False
def stop_cb(evt):
print('CLOSING on {}'.format(evt))
speech_recognizer.stop_continuous_recognition()
nonlocal done
done= True
#Appends the recognized text to the all_results variable.
speech_recognizer.recognized.connect(handle_final_result)
#Connect callbacks to the events fired by the speech recognizer & displays the info/status
#Ref:https://docs.microsoft.com/en-us/python/api/azure-cognitiveservices-speech/azure.cognitiveservices.speech.eventsignal?view=azure-python
speech_recognizer.recognizing.connect(lambda evt: print('RECOGNIZING: {}'.format(evt)))
speech_recognizer.recognized.connect(lambda evt: print('RECOGNIZED: {}'.format(evt)))
speech_recognizer.session_started.connect(lambda evt: print('SESSION STARTED: {}'.format(evt)))
speech_recognizer.session_stopped.connect(lambda evt: print('SESSION STOPPED {}'.format(evt)))
speech_recognizer.canceled.connect(lambda evt: print('CANCELED {}'.format(evt)))
# stop continuous recognition on either session stopped or canceled events
speech_recognizer.session_stopped.connect(stop_cb)
speech_recognizer.canceled.connect(stop_cb)
speech_recognizer.start_continuous_recognition()
while not done:
time.sleep(.5)
print("Printing all results:")
print(all_results)
#calling the conversion through a function
speech_to_text()
推荐阅读
- javascript - 从数组的各个部分计算减少总数?
- amazon-web-services - AWS cognito 用户池的 EmailConfiguration
- php - laravel 6 + elasticsearch-php 7.6 + xampp:在您的集群中找不到活动节点
- java - 为@ApiResponse 设置自定义示例
- c# - 使用 NUnit 模拟 EF 的 ExecuteSqlCommand
- python - 谁能建议一种更快的方法来创建这个数组?
- swift - 如何在 SwiftUI 中制作时区选择器?
- azure - Kusto 查询:根据时间检索最近的 2 次运行并进行汇总
- javascript - 根据条件更改 Bootstrap 弹出框
- ios - iPad 上的 SwiftUI 弹出框随设备旋转而移动