Spaces:

divakaivan
/

korean_voice_assistant

Running

korean_voice_assistant / app.py

Update app.py

d38a10a verified 8 months ago

1.03 kB

	from transformers import pipeline
	import gradio as gr
	from gtts import gTTS

	# Load the Whisper model for speech-to-text
	pipe = pipeline(model="openai/whisper-small")

	# Load the text generation model
	text_pipe = pipeline("text2text-generation", model="google/flan-t5-base")

	def transcribe(audio):
	# Transcribe the audio to text
	text = pipe(audio)["text"]

	# Generate a response from the transcribed text
	lm_response = text_pipe(text)[0]["generated_text"]

	# Convert the response text to speech
	tts = gTTS(lm_response, lang='ko')

	# Save the generated audio
	out_audio = "output_audio.mp3"
	tts.save(out_audio)

	return out_audio

	# Create the Gradio interface
	iface = gr.Interface(
	fn=transcribe,
	inputs=gr.Audio(type="filepath"),
	outputs=gr.Audio(type="filepath"),
	title="Whisper Small Glaswegian",
	description="Realtime demo for Glaswegian speech recognition using a fine-tuned Whisper small model."
	)

	# Launch the interface
	iface.launch(share=True)