|
from transformers import pipeline |
|
import gradio as gr |
|
from gtts import gTTS |
|
import openai |
|
|
|
|
|
pipe = pipeline(model="openai/whisper-small") |
|
|
|
|
|
|
|
|
|
def generate_gpt_response(text): |
|
response = openai.Completion.create( |
|
engine="text-davinci-003", |
|
prompt=text, |
|
max_tokens=150 |
|
) |
|
return response.choices[0].text.strip() |
|
|
|
def transcribe(audio): |
|
|
|
text = pipe(audio)["text"] |
|
|
|
|
|
|
|
lm_response = generate_gpt_response(text) |
|
|
|
tts = gTTS(lm_response, lang='ko') |
|
|
|
|
|
out_audio = "output_audio.mp3" |
|
tts.save(out_audio) |
|
|
|
return out_audio |
|
|
|
|
|
iface = gr.Interface( |
|
fn=transcribe, |
|
inputs=gr.Audio(type="filepath"), |
|
outputs=gr.Audio(type="filepath"), |
|
title="Whisper Small Glaswegian", |
|
description="Realtime demo for Glaswegian speech recognition using a fine-tuned Whisper small model." |
|
) |
|
|
|
|
|
iface.launch(share=True) |