Spaces:
Build error
Build error
# chatbot_app.py | |
import streamlit as st | |
import openai | |
import requests | |
from gtts import gTTS | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import os | |
from io import BytesIO | |
from pydub import AudioSegment | |
from pydub.playback import play | |
import tempfile | |
# Configure API keys | |
HUGGING_FACE_API_KEY = "voicebot" | |
OPENAI_API_KEY = "your_openai_api_key" | |
openai.api_key = OPENAI_API_KEY | |
# Initialize the Hugging Face model and tokenizer | |
def load_model(): | |
model_name = "facebook/musicgen-small" | |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=HUGGING_FACE_API_KEY) | |
model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=HUGGING_FACE_API_KEY) | |
return model, tokenizer | |
model, tokenizer = load_model() | |
# Function to convert voice to text using OpenAI's Whisper API | |
def voice_to_text(audio_file): | |
with open(audio_file, "rb") as file: | |
transcript = openai.Audio.transcribe("whisper-1", file) | |
return transcript["text"] | |
# Function to generate chatbot response using Hugging Face's model | |
def generate_response(prompt): | |
inputs = tokenizer.encode(prompt, return_tensors="pt") | |
outputs = model.generate(inputs, max_length=100, num_return_sequences=1) | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return response | |
# Function to convert text to voice using gTTS | |
def text_to_speech(text): | |
tts = gTTS(text, lang="en") | |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") | |
tts.save(temp_file.name) | |
return temp_file.name | |
# Streamlit app layout | |
st.title("Voice-to-Text Chatbot") | |
st.write("Speak to the chatbot and get responses in both text and audio!") | |
# Upload audio file | |
audio_file = st.file_uploader("Upload your voice input", type=["mp3", "wav", "ogg"]) | |
if audio_file is not None: | |
# Convert voice to text | |
with open("input_audio.wav", "wb") as f: | |
f.write(audio_file.read()) | |
st.audio("input_audio.wav", format="audio/wav") | |
# Get text from audio | |
with st.spinner("Transcribing your voice..."): | |
user_input = voice_to_text("input_audio.wav") | |
st.write(f"**You said:** {user_input}") | |
# Generate chatbot response | |
with st.spinner("Generating response..."): | |
response_text = generate_response(user_input) | |
st.write(f"**Chatbot:** {response_text}") | |
# Convert response to audio | |
with st.spinner("Converting response to audio..."): | |
response_audio = text_to_speech(response_text) | |
audio_data = AudioSegment.from_mp3(response_audio) | |
# Display audio response | |
st.audio(response_audio, format="audio/mp3") | |
# Play audio | |
play(audio_data) | |
# Clean up temporary files | |
os.remove("input_audio.wav") | |
os.remove(response_audio) | |
else: | |
st.write("Please upload an audio file to get started.") | |