Update src/streamlit_app.py
Browse files- src/streamlit_app.py +252 -177
src/streamlit_app.py
CHANGED
@@ -1,217 +1,292 @@
|
|
1 |
import streamlit as st
|
2 |
from openai import OpenAI
|
3 |
-
import sounddevice as sd
|
4 |
-
import scipy.io.wavfile
|
5 |
import io
|
6 |
import base64
|
7 |
import os
|
8 |
-
import
|
|
|
9 |
|
10 |
-
|
11 |
-
st.set_page_config(
|
|
|
|
|
|
|
|
|
12 |
|
13 |
# Configuration
|
14 |
-
SAMPLE_RATE = 44100
|
15 |
-
RECORD_DURATION = 5
|
16 |
TEMP_AUDIO_FILE = "temp_audio.wav"
|
17 |
|
18 |
# Initialize OpenAI client
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
st.session_state
|
27 |
-
|
28 |
-
st.session_state
|
|
|
|
|
|
|
29 |
|
30 |
def load_context():
|
31 |
-
"""Load the context from file."""
|
32 |
try:
|
33 |
base_dir = os.path.dirname(os.path.abspath(__file__))
|
34 |
-
|
35 |
context_path = os.path.join(base_dir, 'context.txt')
|
36 |
|
37 |
-
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
-
except
|
41 |
-
st.error("
|
42 |
-
return ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
-
def transcribe_audio(
|
45 |
"""Transcribe audio using Whisper API."""
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
)
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
def get_ai_response(user_text, context):
|
57 |
"""Get AI response using GPT-4."""
|
58 |
-
|
59 |
-
|
60 |
-
You must respond **only using the following context**:
|
61 |
|
62 |
-
|
|
|
63 |
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
def text_to_speech(text):
|
78 |
"""Convert text to speech using OpenAI TTS."""
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
info_placeholder.empty()
|
93 |
-
st.session_state.recorded_audio = audio_buffer
|
94 |
-
|
95 |
-
def handle_recorded_audio(audio_bytes):
|
96 |
-
"""Handle the recorded audio data from browser"""
|
97 |
-
audio_buffer = io.BytesIO(base64.b64decode(audio_bytes))
|
98 |
-
st.session_state.recorded_audio = audio_buffer
|
99 |
-
st.session_state.processing = True
|
100 |
|
101 |
-
def
|
102 |
-
|
|
|
|
|
103 |
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
|
118 |
-
|
119 |
-
if st.session_state.get('browser_audio'):
|
120 |
-
handle_recorded_audio(st.session_state.browser_audio)
|
121 |
-
st.session_state.browser_audio = None
|
122 |
|
123 |
-
|
124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
-
|
127 |
-
|
128 |
-
with process_placeholder.container():
|
129 |
-
with st.spinner("Processing..."):
|
130 |
-
st.session_state.user_text = transcribe_audio(st.session_state.recorded_audio)
|
131 |
-
st.session_state.ai_reply = get_ai_response(st.session_state.user_text, st.session_state.context)
|
132 |
-
audio_b64 = text_to_speech(st.session_state.ai_reply)
|
133 |
-
st.session_state.ai_audio = audio_b64
|
134 |
-
st.session_state.processing = False
|
135 |
|
136 |
-
|
137 |
-
|
138 |
-
st.audio(st.session_state.recorded_audio, format="audio/wav")
|
139 |
-
if hasattr(st.session_state, 'ai_audio'):
|
140 |
-
st.audio(f"data:audio/mp3;base64,{st.session_state.ai_audio}", format="audio/mp3")
|
141 |
-
|
142 |
-
with script:
|
143 |
-
st.subheader("Conversation")
|
144 |
-
if st.session_state.user_text is not None:
|
145 |
-
st.markdown("**You said:**")
|
146 |
-
st.markdown(f"{st.session_state.user_text}")
|
147 |
-
st.markdown("**AI Response:**")
|
148 |
-
st.markdown(f"{st.session_state.ai_reply}")
|
149 |
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
st.text_area("Context", value=st.session_state.context, height=270, disabled=False)
|
154 |
-
st.markdown("You can update the context in the `context.txt` file.")
|
155 |
-
|
156 |
-
# Add JavaScript for audio recording
|
157 |
-
def get_audio_recorder_html():
|
158 |
-
return """
|
159 |
-
<script>
|
160 |
-
const audioRecorder = {
|
161 |
-
start: async function() {
|
162 |
-
this.mediaRecorder = null;
|
163 |
-
this.audioChunks = [];
|
164 |
-
|
165 |
-
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
166 |
-
this.mediaRecorder = new MediaRecorder(stream);
|
167 |
-
|
168 |
-
this.mediaRecorder.ondataavailable = (e) => {
|
169 |
-
if (e.data.size > 0) this.audioChunks.push(e.data);
|
170 |
-
};
|
171 |
-
|
172 |
-
this.mediaRecorder.onstop = () => {
|
173 |
-
const audioBlob = new Blob(this.audioChunks, { type: 'audio/wav' });
|
174 |
-
const reader = new FileReader();
|
175 |
-
reader.readAsDataURL(audioBlob);
|
176 |
-
reader.onloadend = () => {
|
177 |
-
const base64Audio = reader.result.split(',')[1];
|
178 |
-
window.parent.postMessage({type: 'AUDIO_DATA', data: base64Audio}, '*');
|
179 |
-
};
|
180 |
-
};
|
181 |
-
|
182 |
-
this.mediaRecorder.start();
|
183 |
-
},
|
184 |
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
'
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
214 |
|
215 |
if __name__ == "__main__":
|
216 |
-
main()
|
217 |
-
|
|
|
1 |
import streamlit as st
|
2 |
from openai import OpenAI
|
|
|
|
|
3 |
import io
|
4 |
import base64
|
5 |
import os
|
6 |
+
import tempfile
|
7 |
+
from audio_recorder_streamlit import audio_recorder
|
8 |
|
9 |
+
# Page configuration
|
10 |
+
st.set_page_config(
|
11 |
+
page_title="Voice Bot",
|
12 |
+
layout="wide",
|
13 |
+
initial_sidebar_state="collapsed"
|
14 |
+
)
|
15 |
|
16 |
# Configuration
|
|
|
|
|
17 |
TEMP_AUDIO_FILE = "temp_audio.wav"
|
18 |
|
19 |
# Initialize OpenAI client
|
20 |
+
@st.cache_resource
|
21 |
+
def init_openai_client():
|
22 |
+
try:
|
23 |
+
# Try to get API key from Streamlit secrets first (for HF Spaces)
|
24 |
+
api_key = st.secrets.get("OPENAI_API_KEY", None)
|
25 |
+
if not api_key:
|
26 |
+
# Fallback to environment variable
|
27 |
+
api_key = os.environ.get("OPENAI_API_KEY")
|
28 |
+
|
29 |
+
if not api_key:
|
30 |
+
st.error("β οΈ OpenAI API key not found. Please add OPENAI_API_KEY to your Hugging Face Spaces secrets.")
|
31 |
+
st.info("Go to Settings β Repository secrets β Add OPENAI_API_KEY")
|
32 |
+
st.stop()
|
33 |
+
|
34 |
+
return OpenAI(api_key=api_key)
|
35 |
+
except Exception as e:
|
36 |
+
st.error(f"Error initializing OpenAI client: {str(e)}")
|
37 |
+
st.stop()
|
38 |
|
39 |
+
client = init_openai_client()
|
40 |
+
|
41 |
+
# Initialize session state variables
|
42 |
+
def init_session_state():
|
43 |
+
if 'conversation_history' not in st.session_state:
|
44 |
+
st.session_state.conversation_history = []
|
45 |
+
if 'context' not in st.session_state:
|
46 |
+
st.session_state.context = load_context()
|
47 |
+
if 'processing' not in st.session_state:
|
48 |
+
st.session_state.processing = False
|
49 |
|
50 |
def load_context():
|
51 |
+
"""Load the context from file or return default."""
|
52 |
try:
|
53 |
base_dir = os.path.dirname(os.path.abspath(__file__))
|
|
|
54 |
context_path = os.path.join(base_dir, 'context.txt')
|
55 |
|
56 |
+
if os.path.exists(context_path):
|
57 |
+
with open(context_path, "r", encoding='utf-8') as f:
|
58 |
+
return f.read().strip()
|
59 |
+
else:
|
60 |
+
# Default context if file doesn't exist
|
61 |
+
return """I am Prakhar. I can help you with general questions and conversations.
|
62 |
+
I aim to be helpful, harmless, and honest in all my interactions."""
|
63 |
|
64 |
+
except Exception as e:
|
65 |
+
st.error(f"Error loading context: {str(e)}")
|
66 |
+
return "I am Prakhar, an AI assistant."
|
67 |
+
|
68 |
+
def save_context(context_text):
|
69 |
+
"""Save context to file."""
|
70 |
+
try:
|
71 |
+
base_dir = os.path.dirname(os.path.abspath(__file__))
|
72 |
+
context_path = os.path.join(base_dir, 'context.txt')
|
73 |
+
|
74 |
+
with open(context_path, "w", encoding='utf-8') as f:
|
75 |
+
f.write(context_text)
|
76 |
+
return True
|
77 |
+
except Exception as e:
|
78 |
+
st.error(f"Error saving context: {str(e)}")
|
79 |
+
return False
|
80 |
|
81 |
+
def transcribe_audio(audio_bytes):
|
82 |
"""Transcribe audio using Whisper API."""
|
83 |
+
try:
|
84 |
+
# Create a temporary file
|
85 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
|
86 |
+
tmp_file.write(audio_bytes)
|
87 |
+
tmp_file_path = tmp_file.name
|
88 |
+
|
89 |
+
# Transcribe using OpenAI Whisper
|
90 |
+
with open(tmp_file_path, "rb") as audio_file:
|
91 |
+
transcript = client.audio.transcriptions.create(
|
92 |
+
model="whisper-1",
|
93 |
+
file=audio_file,
|
94 |
+
language="en"
|
95 |
+
)
|
96 |
+
|
97 |
+
# Clean up temporary file
|
98 |
+
os.unlink(tmp_file_path)
|
99 |
+
|
100 |
+
return transcript.text.strip()
|
101 |
+
|
102 |
+
except Exception as e:
|
103 |
+
st.error(f"Error transcribing audio: {str(e)}")
|
104 |
+
return None
|
105 |
|
106 |
def get_ai_response(user_text, context):
|
107 |
"""Get AI response using GPT-4."""
|
108 |
+
try:
|
109 |
+
system_prompt = f"""You are Prakhar. You should respond naturally and helpfully.
|
|
|
110 |
|
111 |
+
Context about you:
|
112 |
+
{context}
|
113 |
|
114 |
+
Instructions:
|
115 |
+
- Use the context above to inform your responses
|
116 |
+
- If asked about something not covered in the context, you can use your general knowledge
|
117 |
+
- If you're not sure about something specific to your context, say "I'm not sure about that based on what I know about myself"
|
118 |
+
- Keep responses conversational and natural
|
119 |
+
- Be helpful and engaging"""
|
120 |
+
|
121 |
+
response = client.chat.completions.create(
|
122 |
+
model="gpt-4",
|
123 |
+
messages=[
|
124 |
+
{"role": "system", "content": system_prompt},
|
125 |
+
{"role": "user", "content": user_text}
|
126 |
+
],
|
127 |
+
max_tokens=500,
|
128 |
+
temperature=0.7
|
129 |
+
)
|
130 |
+
|
131 |
+
return response.choices[0].message.content.strip()
|
132 |
+
|
133 |
+
except Exception as e:
|
134 |
+
st.error(f"Error getting AI response: {str(e)}")
|
135 |
+
return "I'm sorry, I encountered an error while processing your request."
|
136 |
|
137 |
def text_to_speech(text):
|
138 |
"""Convert text to speech using OpenAI TTS."""
|
139 |
+
try:
|
140 |
+
response = client.audio.speech.create(
|
141 |
+
model="tts-1",
|
142 |
+
voice="onyx", # Available voices: alloy, echo, fable, onyx, nova, shimmer
|
143 |
+
input=text,
|
144 |
+
speed=1.0
|
145 |
+
)
|
146 |
+
|
147 |
+
return response.content
|
148 |
+
|
149 |
+
except Exception as e:
|
150 |
+
st.error(f"Error generating speech: {str(e)}")
|
151 |
+
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
|
153 |
+
def process_audio(audio_bytes):
|
154 |
+
"""Process recorded audio through the full pipeline."""
|
155 |
+
if not audio_bytes:
|
156 |
+
return None, None, None
|
157 |
|
158 |
+
# Transcribe audio
|
159 |
+
with st.spinner("π― Transcribing audio..."):
|
160 |
+
user_text = transcribe_audio(audio_bytes)
|
161 |
+
|
162 |
+
if not user_text:
|
163 |
+
return None, None, None
|
164 |
+
|
165 |
+
# Get AI response
|
166 |
+
with st.spinner("π€ Generating response..."):
|
167 |
+
ai_response = get_ai_response(user_text, st.session_state.context)
|
168 |
+
|
169 |
+
# Convert to speech
|
170 |
+
with st.spinner("π Converting to speech..."):
|
171 |
+
speech_audio = text_to_speech(ai_response)
|
172 |
+
|
173 |
+
return user_text, ai_response, speech_audio
|
174 |
|
175 |
+
def main():
|
176 |
+
st.title("ποΈ Voice Bot")
|
177 |
+
st.markdown("*Talk to Prakhar using your voice!*")
|
178 |
+
|
179 |
+
# Initialize session state
|
180 |
+
init_session_state()
|
181 |
+
|
182 |
+
# Create main layout
|
183 |
+
col1, col2 = st.columns([1, 1], gap="large")
|
184 |
+
|
185 |
+
with col1:
|
186 |
+
st.subheader("π€ Voice Input")
|
187 |
+
|
188 |
+
# Audio recorder
|
189 |
+
audio_bytes = audio_recorder(
|
190 |
+
text="Click to record",
|
191 |
+
recording_color="#e74c3c",
|
192 |
+
neutral_color="#34495e",
|
193 |
+
icon_name="microphone",
|
194 |
+
icon_size="2x",
|
195 |
+
pause_threshold=2.0,
|
196 |
+
sample_rate=44100
|
197 |
+
)
|
198 |
+
|
199 |
+
# Process audio when new recording is available
|
200 |
+
if audio_bytes and not st.session_state.processing:
|
201 |
+
st.session_state.processing = True
|
202 |
|
203 |
+
user_text, ai_response, speech_audio = process_audio(audio_bytes)
|
|
|
|
|
|
|
204 |
|
205 |
+
if user_text and ai_response:
|
206 |
+
# Add to conversation history
|
207 |
+
st.session_state.conversation_history.append({
|
208 |
+
"user": user_text,
|
209 |
+
"ai": ai_response,
|
210 |
+
"speech": speech_audio
|
211 |
+
})
|
212 |
+
|
213 |
+
st.session_state.processing = False
|
214 |
+
|
215 |
+
# Show current recording
|
216 |
+
if audio_bytes:
|
217 |
+
st.audio(audio_bytes, format="audio/wav")
|
218 |
+
|
219 |
+
with col2:
|
220 |
+
st.subheader("π¬ Conversation")
|
221 |
+
|
222 |
+
# Display conversation history
|
223 |
+
if st.session_state.conversation_history:
|
224 |
+
# Show the most recent conversation
|
225 |
+
latest = st.session_state.conversation_history[-1]
|
226 |
|
227 |
+
st.markdown("**You said:**")
|
228 |
+
st.info(latest["user"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
229 |
|
230 |
+
st.markdown("**Prakhar replied:**")
|
231 |
+
st.success(latest["ai"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
232 |
|
233 |
+
# Play AI response audio
|
234 |
+
if latest["speech"]:
|
235 |
+
st.audio(latest["speech"], format="audio/mp3")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
236 |
|
237 |
+
# Show conversation history
|
238 |
+
if len(st.session_state.conversation_history) > 1:
|
239 |
+
with st.expander("π Previous conversations"):
|
240 |
+
for i, conv in enumerate(reversed(st.session_state.conversation_history[:-1])):
|
241 |
+
st.markdown(f"**Conversation {len(st.session_state.conversation_history) - i - 1}:**")
|
242 |
+
st.markdown(f"π€ You: {conv['user']}")
|
243 |
+
st.markdown(f"π€ Prakhar: {conv['ai']}")
|
244 |
+
if conv["speech"]:
|
245 |
+
st.audio(conv["speech"], format="audio/mp3")
|
246 |
+
st.divider()
|
247 |
+
else:
|
248 |
+
st.info("π Start by recording your voice message above!")
|
249 |
+
|
250 |
+
# Context management section
|
251 |
+
st.divider()
|
252 |
+
|
253 |
+
with st.expander("βοΈ Manage Context", expanded=False):
|
254 |
+
st.markdown("**Current Context:**")
|
255 |
+
|
256 |
+
# Editable context
|
257 |
+
new_context = st.text_area(
|
258 |
+
"Edit Prakhar's context:",
|
259 |
+
value=st.session_state.context,
|
260 |
+
height=200,
|
261 |
+
help="This context defines who Prakhar is and how he should respond."
|
262 |
+
)
|
263 |
+
|
264 |
+
col1, col2, col3 = st.columns([1, 1, 2])
|
265 |
+
|
266 |
+
with col1:
|
267 |
+
if st.button("πΎ Save Context"):
|
268 |
+
if save_context(new_context):
|
269 |
+
st.session_state.context = new_context
|
270 |
+
st.success("Context saved!")
|
271 |
+
else:
|
272 |
+
st.error("Failed to save context")
|
273 |
+
|
274 |
+
with col2:
|
275 |
+
if st.button("π Reset Context"):
|
276 |
+
default_context = """I am Prakhar, an AI assistant. I can help you with general questions and conversations.
|
277 |
+
I aim to be helpful, harmless, and honest in all my interactions."""
|
278 |
+
st.session_state.context = default_context
|
279 |
+
save_context(default_context)
|
280 |
+
st.rerun()
|
281 |
+
|
282 |
+
with col3:
|
283 |
+
if st.button("ποΈ Clear Conversation"):
|
284 |
+
st.session_state.conversation_history = []
|
285 |
+
st.rerun()
|
286 |
+
|
287 |
+
# Status indicators
|
288 |
+
if st.session_state.processing:
|
289 |
+
st.info("π Processing your request...")
|
290 |
|
291 |
if __name__ == "__main__":
|
292 |
+
main()
|
|