Spaces:

divakaivan
/

korean_voice_assistant

Running

korean_voice_assistant / app.py

Update app.py

538698d verified 8 months ago

1.33 kB

	from transformers import pipeline
	import gradio as gr
	from gtts import gTTS
	import openai

	# Load the Whisper model for speech-to-text
	pipe = pipeline(model="openai/whisper-small")

	# Load the text generation model
	# text_pipe = pipeline("text2text-generation", model="google/flan-t5-base")

	def generate_gpt_response(text):
	response = openai.Completion.create(
	engine="text-davinci-003", # Use the appropriate GPT-3 engine
	prompt=text,
	max_tokens=150
	)
	return response.choices[0].text.strip()

	def transcribe(audio):
	# Transcribe the audio to text
	text = pipe(audio)["text"]

	# Generate a response from the transcribed text
	# lm_response = text_pipe(text)[0]["generated_text"]
	lm_response = generate_gpt_response(text)
	# Convert the response text to speech
	tts = gTTS(lm_response, lang='ko')

	# Save the generated audio
	out_audio = "output_audio.mp3"
	tts.save(out_audio)

	return out_audio

	# Create the Gradio interface
	iface = gr.Interface(
	fn=transcribe,
	inputs=gr.Audio(type="filepath"),
	outputs=gr.Audio(type="filepath"),
	title="Whisper Small Glaswegian",
	description="Realtime demo for Glaswegian speech recognition using a fine-tuned Whisper small model."
	)

	# Launch the interface
	iface.launch(share=True)