voicebot / app.py
tarrasyed19472007's picture
Create app.py
55137ed verified
raw
history blame
2.84 kB
# chatbot_app.py
import streamlit as st
import openai
import requests
from gtts import gTTS
from transformers import AutoModelForCausalLM, AutoTokenizer
import os
from io import BytesIO
from pydub import AudioSegment
from pydub.playback import play
import tempfile
# Configure API keys
HUGGING_FACE_API_KEY = "voicebot"
OPENAI_API_KEY = "your_openai_api_key"
openai.api_key = OPENAI_API_KEY
# Initialize the Hugging Face model and tokenizer
def load_model():
model_name = "facebook/musicgen-small"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=HUGGING_FACE_API_KEY)
model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=HUGGING_FACE_API_KEY)
return model, tokenizer
model, tokenizer = load_model()
# Function to convert voice to text using OpenAI's Whisper API
def voice_to_text(audio_file):
with open(audio_file, "rb") as file:
transcript = openai.Audio.transcribe("whisper-1", file)
return transcript["text"]
# Function to generate chatbot response using Hugging Face's model
def generate_response(prompt):
inputs = tokenizer.encode(prompt, return_tensors="pt")
outputs = model.generate(inputs, max_length=100, num_return_sequences=1)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response
# Function to convert text to voice using gTTS
def text_to_speech(text):
tts = gTTS(text, lang="en")
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
tts.save(temp_file.name)
return temp_file.name
# Streamlit app layout
st.title("Voice-to-Text Chatbot")
st.write("Speak to the chatbot and get responses in both text and audio!")
# Upload audio file
audio_file = st.file_uploader("Upload your voice input", type=["mp3", "wav", "ogg"])
if audio_file is not None:
# Convert voice to text
with open("input_audio.wav", "wb") as f:
f.write(audio_file.read())
st.audio("input_audio.wav", format="audio/wav")
# Get text from audio
with st.spinner("Transcribing your voice..."):
user_input = voice_to_text("input_audio.wav")
st.write(f"**You said:** {user_input}")
# Generate chatbot response
with st.spinner("Generating response..."):
response_text = generate_response(user_input)
st.write(f"**Chatbot:** {response_text}")
# Convert response to audio
with st.spinner("Converting response to audio..."):
response_audio = text_to_speech(response_text)
audio_data = AudioSegment.from_mp3(response_audio)
# Display audio response
st.audio(response_audio, format="audio/mp3")
# Play audio
play(audio_data)
# Clean up temporary files
os.remove("input_audio.wav")
os.remove(response_audio)
else:
st.write("Please upload an audio file to get started.")