Spaces:

divakaivan
/

korean_voice_assistant

Sleeping

korean_voice_assistant / app.py

Update app.py

1ede7b6 verified 8 months ago

1.34 kB

	from transformers import pipeline
	import gradio as gr
	from gtts import gTTS
	from openai import OpenAI
	client = OpenAI()

	# Load the Whisper model for speech-to-text
	pipe = pipeline(model="openai/whisper-small")

	# Load the text generation model
	# text_pipe = pipeline("text2text-generation", model="google/flan-t5-base")

	def generate_gpt_response(text):
	response = client.chat.completions.create(
	model='gpt-3.5-turbo-0125',
	messages=[{"role": "user", "content": text}]
	)

	return response.choices[0].message.content

	def transcribe(audio):
	# Transcribe the audio to text
	text = pipe(audio)["text"]

	# Generate a response from the transcribed text
	# lm_response = text_pipe(text)[0]["generated_text"]
	lm_response = generate_gpt_response(text)
	# Convert the response text to speech
	tts = gTTS(lm_response, lang='ko')

	# Save the generated audio
	out_audio = "output_audio.mp3"
	tts.save(out_audio)

	return out_audio

	# Create the Gradio interface
	iface = gr.Interface(
	fn=transcribe,
	inputs=gr.Audio(type="filepath"),
	outputs=gr.Audio(type="filepath"),
	title="Whisper Small Glaswegian",
	description="Realtime demo for Glaswegian speech recognition using a fine-tuned Whisper small model."
	)

	# Launch the interface
	iface.launch(share=True)