divakaivan's picture
Update app.py
d38a10a verified
raw
history blame
1.03 kB
from transformers import pipeline
import gradio as gr
from gtts import gTTS
# Load the Whisper model for speech-to-text
pipe = pipeline(model="openai/whisper-small")
# Load the text generation model
text_pipe = pipeline("text2text-generation", model="google/flan-t5-base")
def transcribe(audio):
# Transcribe the audio to text
text = pipe(audio)["text"]
# Generate a response from the transcribed text
lm_response = text_pipe(text)[0]["generated_text"]
# Convert the response text to speech
tts = gTTS(lm_response, lang='ko')
# Save the generated audio
out_audio = "output_audio.mp3"
tts.save(out_audio)
return out_audio
# Create the Gradio interface
iface = gr.Interface(
fn=transcribe,
inputs=gr.Audio(type="filepath"),
outputs=gr.Audio(type="filepath"),
title="Whisper Small Glaswegian",
description="Realtime demo for Glaswegian speech recognition using a fine-tuned Whisper small model."
)
# Launch the interface
iface.launch(share=True)