tarrasyed19472007 commited on
Commit
b9da6aa
·
verified ·
1 Parent(s): 8d6cd88

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -72
app.py CHANGED
@@ -1,91 +1,67 @@
1
- # chatbot_app.py
2
-
3
  import streamlit as st
4
- import openai
5
  import requests
 
 
6
  from gtts import gTTS
7
- from transformers import MusicgenForCausalLM, MusicgenConfig, AutoTokenizer
8
  import os
9
- from io import BytesIO
10
- from pydub import AudioSegment
11
- from pydub.playback import play
12
  import tempfile
 
13
 
14
- # Configure API keys
15
  HUGGING_FACE_API_KEY = "voicebot"
16
- OPENAI_API_KEY = "Testing API"
17
- openai.api_key = OPENAI_API_KEY
18
-
19
- # Initialize the Hugging Face model and tokenizer
20
- @st.cache_resource # Cache the model to avoid reloading on every run
21
- def load_model():
22
- model_name = "facebook/musicgen-small"
23
- config = MusicgenConfig.from_pretrained(model_name, use_auth_token=HUGGING_FACE_API_KEY)
24
- model = MusicgenForCausalLM.from_pretrained(model_name, config=config, use_auth_token=HUGGING_FACE_API_KEY)
25
- tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=HUGGING_FACE_API_KEY)
26
- return model, tokenizer
27
-
28
- model, tokenizer = load_model()
29
 
30
- # Function to convert voice to text using OpenAI's Whisper API
31
- def voice_to_text(audio_file):
32
- with open(audio_file, "rb") as file:
33
- transcript = openai.Audio.transcribe("whisper-1", file)
34
- return transcript["text"]
35
 
36
- # Function to generate chatbot response using Hugging Face's model
37
- def generate_response(prompt):
38
- inputs = tokenizer.encode(prompt, return_tensors="pt")
39
- outputs = model.generate(inputs, max_length=100, num_return_sequences=1)
40
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
41
  return response
42
 
43
- # Function to convert text to voice using gTTS
44
  def text_to_speech(text):
45
- tts = gTTS(text, lang="en")
46
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
47
- tts.save(temp_file.name)
48
- return temp_file.name
49
-
50
- # Streamlit app layout
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  st.title("Voice-to-Text Chatbot")
52
- st.write("Speak to the chatbot and get responses in both text and audio!")
53
-
54
- # Upload audio file
55
- audio_file = st.file_uploader("Upload your voice input", type=["mp3", "wav", "ogg"])
56
-
57
- if audio_file is not None:
58
- # Convert voice to text
59
- with open("input_audio.wav", "wb") as f:
60
- f.write(audio_file.read())
61
- st.audio("input_audio.wav", format="audio/wav")
62
 
63
- # Get text from audio
64
- with st.spinner("Transcribing your voice..."):
65
- user_input = voice_to_text("input_audio.wav")
66
- st.write(f"**You said:** {user_input}")
67
-
68
- # Generate chatbot response
69
- with st.spinner("Generating response..."):
70
- response_text = generate_response(user_input)
71
- st.write(f"**Chatbot:** {response_text}")
72
-
73
- # Convert response to audio
74
- with st.spinner("Converting response to audio..."):
75
- response_audio = text_to_speech(response_text)
76
- audio_data = AudioSegment.from_mp3(response_audio)
77
-
78
- # Display audio response
79
- st.audio(response_audio, format="audio/mp3")
80
 
81
- # Play audio
82
- play(audio_data)
 
 
 
 
83
 
84
- # Clean up temporary files
85
- os.remove("input_audio.wav")
86
- os.remove(response_audio)
87
- else:
88
- st.write("Please upload an audio file to get started.")
89
 
90
 
91
 
 
 
 
1
  import streamlit as st
 
2
  import requests
3
+ import torch
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
  from gtts import gTTS
 
6
  import os
 
 
 
7
  import tempfile
8
+ import speech_recognition as sr
9
 
10
+ # Set your Hugging Face API key
11
  HUGGING_FACE_API_KEY = "voicebot"
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ # Load the model and tokenizer
14
+ tokenizer = AutoTokenizer.from_pretrained("declare-lab/tango-full")
15
+ model = AutoModelForCausalLM.from_pretrained("declare-lab/tango-full")
 
 
16
 
17
+ # Function to get a response from the chatbot
18
+ def get_response(input_text):
19
+ inputs = tokenizer.encode(input_text, return_tensors='pt')
20
+ response_ids = model.generate(inputs, max_length=50, num_return_sequences=1)
21
+ response = tokenizer.decode(response_ids[0], skip_special_tokens=True)
22
  return response
23
 
24
+ # Function to convert text to speech
25
  def text_to_speech(text):
26
+ tts = gTTS(text=text, lang='en')
27
+ with tempfile.NamedTemporaryFile(delete=True) as fp:
28
+ tts.save(f"{fp.name}.mp3")
29
+ os.system(f"start {fp.name}.mp3") # For Windows, use 'open' for macOS
30
+
31
+ # Speech Recognition Function
32
+ def recognize_speech():
33
+ r = sr.Recognizer()
34
+ with sr.Microphone() as source:
35
+ st.write("Listening...")
36
+ audio = r.listen(source)
37
+ st.write("Recognizing...")
38
+ try:
39
+ text = r.recognize_google(audio)
40
+ st.success(f"You said: {text}")
41
+ return text
42
+ except sr.UnknownValueError:
43
+ st.error("Sorry, I could not understand the audio.")
44
+ return None
45
+ except sr.RequestError:
46
+ st.error("Could not request results from Google Speech Recognition service.")
47
+ return None
48
+
49
+ # Streamlit Interface
50
  st.title("Voice-to-Text Chatbot")
 
 
 
 
 
 
 
 
 
 
51
 
52
+ # Recognize speech
53
+ if st.button("Speak"):
54
+ user_input = recognize_speech()
55
+ else:
56
+ user_input = st.text_input("Type your message here:")
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
+ # Display response and convert to speech
59
+ if user_input:
60
+ st.write("You: ", user_input)
61
+ chatbot_response = get_response(user_input)
62
+ st.write("Chatbot: ", chatbot_response)
63
+ text_to_speech(chatbot_response)
64
 
 
 
 
 
 
65
 
66
 
67