Spaces:

Abdullah-Basar
/

voice-to-voice-chatbot

Sleeping

Abdullah

Create app.py

cd3c843 verified 3 months ago

2.68 kB

	# app.py
	import os
	import gradio as gr
	from groq import Groq
	from gtts import gTTS
	import tempfile
	import whisper



	# Initialize Groq client
	GROQ_API_KEY = "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"
	client = Groq(api_key = GROQ_API_KEY)


	# Load the Whisper model
	whisper_model = whisper.load_model("base") # You can use "small", "medium", or "large" depending on your preference

	# Function to convert audio to text using OpenAI Whisper
	def audio_to_text(audio_file):
	audio = whisper.load_audio(audio_file)
	audio = whisper.pad_or_trim(audio)
	mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
	options = whisper.DecodingOptions(fp16=False)
	result = whisper.decode(whisper_model, mel, options)
	return result.text

	# Function to interact with Groq API and generate a response
	def interact_with_groq(user_input):
	try:
	chat_completion = client.chat.completions.create(
	messages=[{"role": "user", "content": user_input}],
	model="llama3-8b-8192", # Use the appropriate model
	stream=False,
	)
	return chat_completion.choices[0].message.content
	except Exception as e:
	return f"Error interacting with Groq API: {e}"

	# Function to convert text to speech using gTTS
	def text_to_audio(response_text):
	tts = gTTS(response_text)
	output_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
	tts.save(output_path)
	return output_path

	# Main function for the chatbot
	def voice_to_voice(audio_file):
	try:
	# Step 1: Convert voice input to text
	print("Transcribing audio...")
	transcribed_text = audio_to_text(audio_file)
	print(f"Transcribed Text: {transcribed_text}")

	# Step 2: Interact with LLM via Groq API
	print("Getting LLM response...")
	response_text = interact_with_groq(transcribed_text)
	print(f"LLM Response: {response_text}")

	# Step 3: Convert LLM response to audio
	print("Generating audio response...")
	audio_response = text_to_audio(response_text)
	return transcribed_text, audio_response
	except Exception as e:
	return f"Error processing request: {e}", None

	# Gradio Interface
	interface = gr.Interface(
	fn=voice_to_voice,
	inputs=gr.Audio(type="filepath"),
	outputs=[gr.Textbox(label="Transcribed Text"), gr.Audio(label="Response Audio")],
	title="Real-Time Voice-to-Voice Chatbot",
	description="A real-time voice-to-voice chatbot using Whisper for transcription, Groq API for LLM, and gTTS for audio response.",
	)

	# Launch the interface
	if __name__ == "__main__":
	interface.launch()