Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -1,91 +1,67 @@
|
|
1 |
-
# chatbot_app.py
|
2 |
-
|
3 |
import streamlit as st
|
4 |
-
import openai
|
5 |
import requests
|
|
|
|
|
6 |
from gtts import gTTS
|
7 |
-
from transformers import MusicgenForCausalLM, MusicgenConfig, AutoTokenizer
|
8 |
import os
|
9 |
-
from io import BytesIO
|
10 |
-
from pydub import AudioSegment
|
11 |
-
from pydub.playback import play
|
12 |
import tempfile
|
|
|
13 |
|
14 |
-
#
|
15 |
HUGGING_FACE_API_KEY = "voicebot"
|
16 |
-
OPENAI_API_KEY = "Testing API"
|
17 |
-
openai.api_key = OPENAI_API_KEY
|
18 |
-
|
19 |
-
# Initialize the Hugging Face model and tokenizer
|
20 |
-
@st.cache_resource # Cache the model to avoid reloading on every run
|
21 |
-
def load_model():
|
22 |
-
model_name = "facebook/musicgen-small"
|
23 |
-
config = MusicgenConfig.from_pretrained(model_name, use_auth_token=HUGGING_FACE_API_KEY)
|
24 |
-
model = MusicgenForCausalLM.from_pretrained(model_name, config=config, use_auth_token=HUGGING_FACE_API_KEY)
|
25 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=HUGGING_FACE_API_KEY)
|
26 |
-
return model, tokenizer
|
27 |
-
|
28 |
-
model, tokenizer = load_model()
|
29 |
|
30 |
-
#
|
31 |
-
|
32 |
-
|
33 |
-
transcript = openai.Audio.transcribe("whisper-1", file)
|
34 |
-
return transcript["text"]
|
35 |
|
36 |
-
# Function to
|
37 |
-
def
|
38 |
-
inputs = tokenizer.encode(
|
39 |
-
|
40 |
-
response = tokenizer.decode(
|
41 |
return response
|
42 |
|
43 |
-
# Function to convert text to
|
44 |
def text_to_speech(text):
|
45 |
-
tts = gTTS(text, lang=
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
st.title("Voice-to-Text Chatbot")
|
52 |
-
st.write("Speak to the chatbot and get responses in both text and audio!")
|
53 |
-
|
54 |
-
# Upload audio file
|
55 |
-
audio_file = st.file_uploader("Upload your voice input", type=["mp3", "wav", "ogg"])
|
56 |
-
|
57 |
-
if audio_file is not None:
|
58 |
-
# Convert voice to text
|
59 |
-
with open("input_audio.wav", "wb") as f:
|
60 |
-
f.write(audio_file.read())
|
61 |
-
st.audio("input_audio.wav", format="audio/wav")
|
62 |
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
# Generate chatbot response
|
69 |
-
with st.spinner("Generating response..."):
|
70 |
-
response_text = generate_response(user_input)
|
71 |
-
st.write(f"**Chatbot:** {response_text}")
|
72 |
-
|
73 |
-
# Convert response to audio
|
74 |
-
with st.spinner("Converting response to audio..."):
|
75 |
-
response_audio = text_to_speech(response_text)
|
76 |
-
audio_data = AudioSegment.from_mp3(response_audio)
|
77 |
-
|
78 |
-
# Display audio response
|
79 |
-
st.audio(response_audio, format="audio/mp3")
|
80 |
|
81 |
-
|
82 |
-
|
|
|
|
|
|
|
|
|
83 |
|
84 |
-
# Clean up temporary files
|
85 |
-
os.remove("input_audio.wav")
|
86 |
-
os.remove(response_audio)
|
87 |
-
else:
|
88 |
-
st.write("Please upload an audio file to get started.")
|
89 |
|
90 |
|
91 |
|
|
|
|
|
|
|
1 |
import streamlit as st
|
|
|
2 |
import requests
|
3 |
+
import torch
|
4 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
5 |
from gtts import gTTS
|
|
|
6 |
import os
|
|
|
|
|
|
|
7 |
import tempfile
|
8 |
+
import speech_recognition as sr
|
9 |
|
10 |
+
# Set your Hugging Face API key
|
11 |
HUGGING_FACE_API_KEY = "voicebot"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
+
# Load the model and tokenizer
|
14 |
+
tokenizer = AutoTokenizer.from_pretrained("declare-lab/tango-full")
|
15 |
+
model = AutoModelForCausalLM.from_pretrained("declare-lab/tango-full")
|
|
|
|
|
16 |
|
17 |
+
# Function to get a response from the chatbot
|
18 |
+
def get_response(input_text):
|
19 |
+
inputs = tokenizer.encode(input_text, return_tensors='pt')
|
20 |
+
response_ids = model.generate(inputs, max_length=50, num_return_sequences=1)
|
21 |
+
response = tokenizer.decode(response_ids[0], skip_special_tokens=True)
|
22 |
return response
|
23 |
|
24 |
+
# Function to convert text to speech
|
25 |
def text_to_speech(text):
|
26 |
+
tts = gTTS(text=text, lang='en')
|
27 |
+
with tempfile.NamedTemporaryFile(delete=True) as fp:
|
28 |
+
tts.save(f"{fp.name}.mp3")
|
29 |
+
os.system(f"start {fp.name}.mp3") # For Windows, use 'open' for macOS
|
30 |
+
|
31 |
+
# Speech Recognition Function
|
32 |
+
def recognize_speech():
|
33 |
+
r = sr.Recognizer()
|
34 |
+
with sr.Microphone() as source:
|
35 |
+
st.write("Listening...")
|
36 |
+
audio = r.listen(source)
|
37 |
+
st.write("Recognizing...")
|
38 |
+
try:
|
39 |
+
text = r.recognize_google(audio)
|
40 |
+
st.success(f"You said: {text}")
|
41 |
+
return text
|
42 |
+
except sr.UnknownValueError:
|
43 |
+
st.error("Sorry, I could not understand the audio.")
|
44 |
+
return None
|
45 |
+
except sr.RequestError:
|
46 |
+
st.error("Could not request results from Google Speech Recognition service.")
|
47 |
+
return None
|
48 |
+
|
49 |
+
# Streamlit Interface
|
50 |
st.title("Voice-to-Text Chatbot")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
+
# Recognize speech
|
53 |
+
if st.button("Speak"):
|
54 |
+
user_input = recognize_speech()
|
55 |
+
else:
|
56 |
+
user_input = st.text_input("Type your message here:")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
|
58 |
+
# Display response and convert to speech
|
59 |
+
if user_input:
|
60 |
+
st.write("You: ", user_input)
|
61 |
+
chatbot_response = get_response(user_input)
|
62 |
+
st.write("Chatbot: ", chatbot_response)
|
63 |
+
text_to_speech(chatbot_response)
|
64 |
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
|
67 |
|