|
from transformers import pipeline |
|
import gradio as gr |
|
from gtts import gTTS |
|
|
|
|
|
pipe = pipeline(model="openai/whisper-small") |
|
|
|
|
|
text_pipe = pipeline("text-generation", model="google/gemma-2-9b") |
|
|
|
def transcribe(audio): |
|
|
|
text = pipe(audio)["text"] |
|
|
|
|
|
lm_response = text_pipe(text)[0]["generated_text"] |
|
|
|
|
|
tts = gTTS(lm_response, lang='ko') |
|
|
|
|
|
out_audio = "output_audio.mp3" |
|
tts.save(out_audio) |
|
|
|
return out_audio |
|
|
|
|
|
iface = gr.Interface( |
|
fn=transcribe, |
|
inputs=gr.Audio(type="filepath"), |
|
outputs=gr.Audio(type="filepath"), |
|
title="Whisper Small Glaswegian", |
|
description="Realtime demo for Glaswegian speech recognition using a fine-tuned Whisper small model." |
|
) |
|
|
|
|
|
iface.launch(share=True) |