# chatbot_app.py import streamlit as st import openai import requests from gtts import gTTS from transformers import AutoModelForCausalLM, AutoTokenizer import os from io import BytesIO from pydub import AudioSegment from pydub.playback import play import tempfile # Configure API keys HUGGING_FACE_API_KEY = "voicebot" OPENAI_API_KEY = "your_openai_api_key" openai.api_key = OPENAI_API_KEY # Initialize the Hugging Face model and tokenizer def load_model(): model_name = "facebook/musicgen-small" tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=HUGGING_FACE_API_KEY) model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=HUGGING_FACE_API_KEY) return model, tokenizer model, tokenizer = load_model() # Function to convert voice to text using OpenAI's Whisper API def voice_to_text(audio_file): with open(audio_file, "rb") as file: transcript = openai.Audio.transcribe("whisper-1", file) return transcript["text"] # Function to generate chatbot response using Hugging Face's model def generate_response(prompt): inputs = tokenizer.encode(prompt, return_tensors="pt") outputs = model.generate(inputs, max_length=100, num_return_sequences=1) response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response # Function to convert text to voice using gTTS def text_to_speech(text): tts = gTTS(text, lang="en") temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") tts.save(temp_file.name) return temp_file.name # Streamlit app layout st.title("Voice-to-Text Chatbot") st.write("Speak to the chatbot and get responses in both text and audio!") # Upload audio file audio_file = st.file_uploader("Upload your voice input", type=["mp3", "wav", "ogg"]) if audio_file is not None: # Convert voice to text with open("input_audio.wav", "wb") as f: f.write(audio_file.read()) st.audio("input_audio.wav", format="audio/wav") # Get text from audio with st.spinner("Transcribing your voice..."): user_input = voice_to_text("input_audio.wav") st.write(f"**You said:** {user_input}") # Generate chatbot response with st.spinner("Generating response..."): response_text = generate_response(user_input) st.write(f"**Chatbot:** {response_text}") # Convert response to audio with st.spinner("Converting response to audio..."): response_audio = text_to_speech(response_text) audio_data = AudioSegment.from_mp3(response_audio) # Display audio response st.audio(response_audio, format="audio/mp3") # Play audio play(audio_data) # Clean up temporary files os.remove("input_audio.wav") os.remove(response_audio) else: st.write("Please upload an audio file to get started.")