Spaces:
Sleeping
Sleeping
File size: 1,502 Bytes
2370acc 1ede7b6 a20e4bb 2370acc 538698d a20e4bb 1ede7b6 538698d 1ede7b6 2370acc a20e4bb 2370acc 538698d a20e4bb 2370acc a20e4bb 2370acc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
from transformers import pipeline
import gradio as gr
from gtts import gTTS
from openai import OpenAI
# Load the Whisper model for speech-to-text
pipe = pipeline(model="openai/whisper-small")
# Load the text generation model
# text_pipe = pipeline("text2text-generation", model="google/flan-t5-base")
def generate_gpt_response(text, api_key):
client = OpenAI(api_key=api_key)
response = client.chat.completions.create(
model='gpt-3.5-turbo-0125',
messages=[{"role": "user", "content": text}]
)
return response.choices[0].message.content
def transcribe(audio, api_key):
# Transcribe the audio to text
text = pipe(audio)["text"]
# Generate a response from the transcribed text
# lm_response = text_pipe(text)[0]["generated_text"]
lm_response = generate_gpt_response(text, api_key)
# Convert the response text to speech
tts = gTTS(lm_response, lang='ko')
# Save the generated audio
out_audio = "output_audio.mp3"
tts.save(out_audio)
return out_audio
# Create the Gradio interface
iface = gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(type="filepath"),
gr.Textbox(label="OpenAI API Key", type="password") # Add a textbox for the API key
],
outputs=gr.Audio(type="filepath"),
title="Whisper Small Glaswegian",
description="Realtime demo for Glaswegian speech recognition using a fine-tuned Whisper small model."
)
# Launch the interface
iface.launch(share=True) |