Spaces:

divakaivan
/

korean_voice_assistant

Running

File size: 1,020 Bytes

2370acc
 
 
 
 
 
 
 
c0daa4f
2370acc

from transformers import pipeline
import gradio as gr
from gtts import gTTS

# Load the Whisper model for speech-to-text
pipe = pipeline(model="openai/whisper-small")

# Load the text generation model
text_pipe = pipeline("text-generation", model="google/gemma-2-9b")

def transcribe(audio):
    # Transcribe the audio to text
    text = pipe(audio)["text"]
    
    # Generate a response from the transcribed text
    lm_response = text_pipe(text)[0]["generated_text"]
    
    # Convert the response text to speech
    tts = gTTS(lm_response, lang='ko')
    
    # Save the generated audio
    out_audio = "output_audio.mp3"
    tts.save(out_audio)
    
    return out_audio

# Create the Gradio interface
iface = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(type="filepath"),
    outputs=gr.Audio(type="filepath"),
    title="Whisper Small Glaswegian",
    description="Realtime demo for Glaswegian speech recognition using a fine-tuned Whisper small model."
)

# Launch the interface
iface.launch(share=True)