首页 > 解决方案 > 那是我的项目。我看到了运行时错误“写操作超时”。我想将摘要代码连接到流式应用程序(本地运行时)

问题描述

我有一个连接错误。我创建了一个 StreamLit 应用程序(本地运行时)并将其添加以总结代码。Summarizer 代码意味着我想通过抽象摘要来总结长文本。但它不能重复生成“超时错误”我尝试了很多可能的事情,但我做不到。请帮助我并在下面查看我的代码。

!pip install --ignore-installed PyYAML
exit()

!pip install pydub
from pydub import AudioSegment
!pip3 install google-cloud --upgrade
!pip3 install google-cloud-speech --upgrade
import io
import os
import wave
import moviepy.editor as mp
from google.cloud import speech_v1 as speech
from google.cloud import storage
!pip install datasets
!pip install transformers
!pip install torch
import torch
from transformers import LEDForConditionalGeneration, LEDTokenizer

my_clip = mp.VideoFileClip(r"C:/Users/HP/Desktop/Project/Video_Output/testout_simple.mp4")
my_clip.audio.write_audiofile(r"C:/Users/HP/Desktop/Project/my_result7.wav")

filepath = "C:/Users/HP/Desktop/Project/"     #Input audio file path
filename = "my_result7.wav"
output_filepath = "C:/Users/HP/Desktop/Project/Document/" #Final transcript path
bucketname = "test-summary-567" #Name of the bucket created in the step before
%env GOOGLE_APPLICATION_CREDENTIALS=C:/Users/HP/Desktop/Project/smiling-pact-324809- 
5103f4e831bf.json
def mp3_to_wav(audio_file_name):
  if audio_file_name.split('.')[1] == 'mp3':    
      sound = AudioSegment.from_mp3(audio_file_name)
      audio_file_name = audio_file_name.split('.')[0] + '.wav'
      sound.export(audio_file_name, format="wav")


def stereo_to_mono(audio_file_name):
    sound = AudioSegment.from_wav(audio_file_name)
    sound = sound.set_channels(1)
    sound.export(audio_file_name, format="wav")

def frame_rate_channel(audio_file_name):
    with wave.open(audio_file_name, "rb") as wave_file:
         frame_rate = wave_file.getframerate()
         channels = wave_file.getnchannels()
         return frame_rate,channels


def upload_blob(bucket_name, source_file_name, destination_blob_name):
   """Uploads a file to the bucket."""
   storage_client = storage.Client()
   bucket = storage_client.get_bucket(bucket_name)
   blob = bucket.blob(destination_blob_name)

   blob.upload_from_filename(source_file_name)


def delete_blob(bucket_name, blob_name):
   """Deletes a blob from the bucket."""
   storage_client = storage.Client()
   bucket = storage_client.get_bucket(bucket_name)
   blob = bucket.blob(blob_name)

   blob.delete()


def google_transcribe(audio_file_name):

    file_name = filepath + audio_file_name

    mp3_to_wav(file_name)

    # The name of the audio file to transcribe

    frame_rate, channels = frame_rate_channel(file_name)

    if channels > 1:
        stereo_to_mono(file_name)

    bucket_name = bucketname
    source_file_name = filepath + audio_file_name
    destination_blob_name = audio_file_name

    upload_blob(bucket_name, source_file_name, destination_blob_name)

    gcs_uri = 'gs://' + bucketname + '/' + audio_file_name
    transcript = ''

    client = speech.SpeechClient()
    audio = speech.RecognitionAudio(uri=gcs_uri)

    config = speech.RecognitionConfig(
    encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
    sample_rate_hertz=frame_rate,
    language_code='en-US')

    # Detects speech in the audio file
    operation = client.long_running_recognize(config=config, audio=audio)
    # operation = client.recognize(config, audio)
    response = operation.result(timeout=10000)

    for result in response.results:
        transcript += result.alternatives[0].transcript

    #delete_blob(bucket_name, destination_blob_name)
    return transcript

def write_transcripts(transcript_filename,transcript):
    f= open(output_filepath + transcript_filename,"w+")
    f.write(transcript)
    f.close()

def transcribe_audio():
   # use GCP to transcribe audio data
   transcript = google_transcribe(filename)
   transcript_filename = filename.split('.')[0] + '.txt'
   write_transcripts(transcript_filename,transcript)
   return transcript

from transformers import LEDForConditionalGeneration, LEDTokenizer
import torch
def generate_summary():
  tokenizer = LEDTokenizer.from_pretrained("patrickvonplaten/led-large-16384-pubmed")
  LONG_ARTICLE=transcribe_audio()
  input_ids = tokenizer(LONG_ARTICLE, return_tensors="pt").input_ids.to("cuda")
  global_attention_mask = torch.zeros_like(input_ids)
  # set global_attention_mask on first token
  global_attention_mask[:, 0] = 1

  model = LEDForConditionalGeneration.from_pretrained("patrickvonplaten/led-large-16384-pubmed", return_dict_in_generate=True).to("cuda")

  sequences = model.generate(input_ids, global_attention_mask=global_attention_mask).sequences

  summary = tokenizer.batch_decode(sequences)
  replaced_tags = data["summary"].replace("<s>","<p>").replace("</s>","</p>")
  print(summary)
 

  generate_summary()

标签: python

解决方案


推荐阅读