Daaku-C5 commited on
Commit
72ad8d0
Β·
verified Β·
1 Parent(s): 30948ea

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +252 -177
src/streamlit_app.py CHANGED
@@ -1,217 +1,292 @@
1
  import streamlit as st
2
  from openai import OpenAI
3
- import sounddevice as sd
4
- import scipy.io.wavfile
5
  import io
6
  import base64
7
  import os
8
- import time
 
9
 
10
-
11
- st.set_page_config(page_title="Voice Bot", layout="wide")
 
 
 
 
12
 
13
  # Configuration
14
- SAMPLE_RATE = 44100
15
- RECORD_DURATION = 5
16
  TEMP_AUDIO_FILE = "temp_audio.wav"
17
 
18
  # Initialize OpenAI client
19
- api_key = os.environ.get("openai")
20
- client = OpenAI(api_key=api_key)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- # Initialize session state variables if they don't exist
23
- if 'recorded_audio' not in st.session_state:
24
- st.session_state.recorded_audio = None
25
- if 'user_text' not in st.session_state:
26
- st.session_state.user_text = None
27
- if 'ai_reply' not in st.session_state:
28
- st.session_state.ai_reply = None
 
 
 
29
 
30
  def load_context():
31
- """Load the context from file."""
32
  try:
33
  base_dir = os.path.dirname(os.path.abspath(__file__))
34
-
35
  context_path = os.path.join(base_dir, 'context.txt')
36
 
37
- with open(context_path, "r") as f:
38
- return f.read()
 
 
 
 
 
39
 
40
- except FileNotFoundError:
41
- st.error("Context file not found!")
42
- return ""
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
- def transcribe_audio(audio_buffer):
45
  """Transcribe audio using Whisper API."""
46
- with open(TEMP_AUDIO_FILE, "wb") as f:
47
- f.write(audio_buffer.getvalue())
48
-
49
- with open(TEMP_AUDIO_FILE, "rb") as audio_file:
50
- transcript = client.audio.transcriptions.create(
51
- model="whisper-1",
52
- file=audio_file
53
- )
54
- return transcript.text
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  def get_ai_response(user_text, context):
57
  """Get AI response using GPT-4."""
58
- system_prompt = f"""
59
- You are Prakhar.
60
- You must respond **only using the following context**:
61
 
62
- {context}
 
63
 
64
- If the user's question cannot be answered using this context, respond with:
65
- "I'm not sure about that based on what I know."
66
- """
67
-
68
- response = client.chat.completions.create(
69
- model="gpt-4",
70
- messages=[
71
- {"role": "system", "content": system_prompt},
72
- {"role": "user", "content": user_text}
73
- ]
74
- )
75
- return response.choices[0].message.content
 
 
 
 
 
 
 
 
 
 
76
 
77
  def text_to_speech(text):
78
  """Convert text to speech using OpenAI TTS."""
79
- speech = client.audio.speech.create(
80
- model="tts-1",
81
- voice="onyx",
82
- input=text
83
- )
84
- return base64.b64encode(speech.content).decode()
85
-
86
- def handle_record_button():
87
- """Handle recording button click"""
88
- st.session_state.processing = True
89
- info_placeholder = st.empty()
90
- info_placeholder.info("Recording...")
91
- audio_buffer = record_audio()
92
- info_placeholder.empty()
93
- st.session_state.recorded_audio = audio_buffer
94
-
95
- def handle_recorded_audio(audio_bytes):
96
- """Handle the recorded audio data from browser"""
97
- audio_buffer = io.BytesIO(base64.b64decode(audio_bytes))
98
- st.session_state.recorded_audio = audio_buffer
99
- st.session_state.processing = True
100
 
101
- def main():
102
- st.title("Voice Bot")
 
 
103
 
104
- if 'context' not in st.session_state:
105
- st.session_state.context = load_context()
106
- if 'processing' not in st.session_state:
107
- st.session_state.processing = False
108
-
109
- with st.container():
110
-
111
- audio, script = st.columns(2, border=True)
 
 
 
 
 
 
 
 
112
 
113
- with audio:
114
- st.subheader("Audio Input")
115
- # Replace button with HTML/JS audio recorder
116
- st.components.v1.html(get_audio_recorder_html(), height=100)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
- # Handle audio data from JavaScript
119
- if st.session_state.get('browser_audio'):
120
- handle_recorded_audio(st.session_state.browser_audio)
121
- st.session_state.browser_audio = None
122
 
123
- # Create placeholder for processing status
124
- process_placeholder = st.empty()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
- # Handle processing if recording just completed
127
- if st.session_state.processing:
128
- with process_placeholder.container():
129
- with st.spinner("Processing..."):
130
- st.session_state.user_text = transcribe_audio(st.session_state.recorded_audio)
131
- st.session_state.ai_reply = get_ai_response(st.session_state.user_text, st.session_state.context)
132
- audio_b64 = text_to_speech(st.session_state.ai_reply)
133
- st.session_state.ai_audio = audio_b64
134
- st.session_state.processing = False
135
 
136
- # Display recorded audio if exists
137
- if st.session_state.recorded_audio is not None:
138
- st.audio(st.session_state.recorded_audio, format="audio/wav")
139
- if hasattr(st.session_state, 'ai_audio'):
140
- st.audio(f"data:audio/mp3;base64,{st.session_state.ai_audio}", format="audio/mp3")
141
-
142
- with script:
143
- st.subheader("Conversation")
144
- if st.session_state.user_text is not None:
145
- st.markdown("**You said:**")
146
- st.markdown(f"{st.session_state.user_text}")
147
- st.markdown("**AI Response:**")
148
- st.markdown(f"{st.session_state.ai_reply}")
149
 
150
- st.divider()
151
-
152
- with st.container(border=True):
153
- st.text_area("Context", value=st.session_state.context, height=270, disabled=False)
154
- st.markdown("You can update the context in the `context.txt` file.")
155
-
156
- # Add JavaScript for audio recording
157
- def get_audio_recorder_html():
158
- return """
159
- <script>
160
- const audioRecorder = {
161
- start: async function() {
162
- this.mediaRecorder = null;
163
- this.audioChunks = [];
164
-
165
- const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
166
- this.mediaRecorder = new MediaRecorder(stream);
167
-
168
- this.mediaRecorder.ondataavailable = (e) => {
169
- if (e.data.size > 0) this.audioChunks.push(e.data);
170
- };
171
-
172
- this.mediaRecorder.onstop = () => {
173
- const audioBlob = new Blob(this.audioChunks, { type: 'audio/wav' });
174
- const reader = new FileReader();
175
- reader.readAsDataURL(audioBlob);
176
- reader.onloadend = () => {
177
- const base64Audio = reader.result.split(',')[1];
178
- window.parent.postMessage({type: 'AUDIO_DATA', data: base64Audio}, '*');
179
- };
180
- };
181
-
182
- this.mediaRecorder.start();
183
- },
184
 
185
- stop: function() {
186
- if (this.mediaRecorder) {
187
- this.mediaRecorder.stop();
188
- this.mediaRecorder.stream.getTracks().forEach(track => track.stop());
189
- }
190
- }
191
- };
192
-
193
- const startButton = document.getElementById('recordButton');
194
- startButton.addEventListener('mousedown', () => audioRecorder.start());
195
- startButton.addEventListener('mouseup', () => audioRecorder.stop());
196
- </script>
197
- <button id="recordButton" style="padding: 20px 40px; font-size: 16px;">πŸŽ™οΈ Hold to Record</button>
198
- """
199
-
200
- # Add JavaScript message handler
201
- js = """
202
- <script>
203
- window.addEventListener('message', function(e) {
204
- if (e.data.type === 'AUDIO_DATA') {
205
- window.parent.streamlit.setComponentValue({
206
- 'browser_audio': e.data.data
207
- });
208
- }
209
- }, false);
210
- </script>
211
- """
212
-
213
- st.components.v1.html(js, height=0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
 
215
  if __name__ == "__main__":
216
- main()
217
-
 
1
  import streamlit as st
2
  from openai import OpenAI
 
 
3
  import io
4
  import base64
5
  import os
6
+ import tempfile
7
+ from audio_recorder_streamlit import audio_recorder
8
 
9
+ # Page configuration
10
+ st.set_page_config(
11
+ page_title="Voice Bot",
12
+ layout="wide",
13
+ initial_sidebar_state="collapsed"
14
+ )
15
 
16
  # Configuration
 
 
17
  TEMP_AUDIO_FILE = "temp_audio.wav"
18
 
19
  # Initialize OpenAI client
20
+ @st.cache_resource
21
+ def init_openai_client():
22
+ try:
23
+ # Try to get API key from Streamlit secrets first (for HF Spaces)
24
+ api_key = st.secrets.get("OPENAI_API_KEY", None)
25
+ if not api_key:
26
+ # Fallback to environment variable
27
+ api_key = os.environ.get("OPENAI_API_KEY")
28
+
29
+ if not api_key:
30
+ st.error("⚠️ OpenAI API key not found. Please add OPENAI_API_KEY to your Hugging Face Spaces secrets.")
31
+ st.info("Go to Settings β†’ Repository secrets β†’ Add OPENAI_API_KEY")
32
+ st.stop()
33
+
34
+ return OpenAI(api_key=api_key)
35
+ except Exception as e:
36
+ st.error(f"Error initializing OpenAI client: {str(e)}")
37
+ st.stop()
38
 
39
+ client = init_openai_client()
40
+
41
+ # Initialize session state variables
42
+ def init_session_state():
43
+ if 'conversation_history' not in st.session_state:
44
+ st.session_state.conversation_history = []
45
+ if 'context' not in st.session_state:
46
+ st.session_state.context = load_context()
47
+ if 'processing' not in st.session_state:
48
+ st.session_state.processing = False
49
 
50
  def load_context():
51
+ """Load the context from file or return default."""
52
  try:
53
  base_dir = os.path.dirname(os.path.abspath(__file__))
 
54
  context_path = os.path.join(base_dir, 'context.txt')
55
 
56
+ if os.path.exists(context_path):
57
+ with open(context_path, "r", encoding='utf-8') as f:
58
+ return f.read().strip()
59
+ else:
60
+ # Default context if file doesn't exist
61
+ return """I am Prakhar. I can help you with general questions and conversations.
62
+ I aim to be helpful, harmless, and honest in all my interactions."""
63
 
64
+ except Exception as e:
65
+ st.error(f"Error loading context: {str(e)}")
66
+ return "I am Prakhar, an AI assistant."
67
+
68
+ def save_context(context_text):
69
+ """Save context to file."""
70
+ try:
71
+ base_dir = os.path.dirname(os.path.abspath(__file__))
72
+ context_path = os.path.join(base_dir, 'context.txt')
73
+
74
+ with open(context_path, "w", encoding='utf-8') as f:
75
+ f.write(context_text)
76
+ return True
77
+ except Exception as e:
78
+ st.error(f"Error saving context: {str(e)}")
79
+ return False
80
 
81
+ def transcribe_audio(audio_bytes):
82
  """Transcribe audio using Whisper API."""
83
+ try:
84
+ # Create a temporary file
85
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
86
+ tmp_file.write(audio_bytes)
87
+ tmp_file_path = tmp_file.name
88
+
89
+ # Transcribe using OpenAI Whisper
90
+ with open(tmp_file_path, "rb") as audio_file:
91
+ transcript = client.audio.transcriptions.create(
92
+ model="whisper-1",
93
+ file=audio_file,
94
+ language="en"
95
+ )
96
+
97
+ # Clean up temporary file
98
+ os.unlink(tmp_file_path)
99
+
100
+ return transcript.text.strip()
101
+
102
+ except Exception as e:
103
+ st.error(f"Error transcribing audio: {str(e)}")
104
+ return None
105
 
106
  def get_ai_response(user_text, context):
107
  """Get AI response using GPT-4."""
108
+ try:
109
+ system_prompt = f"""You are Prakhar. You should respond naturally and helpfully.
 
110
 
111
+ Context about you:
112
+ {context}
113
 
114
+ Instructions:
115
+ - Use the context above to inform your responses
116
+ - If asked about something not covered in the context, you can use your general knowledge
117
+ - If you're not sure about something specific to your context, say "I'm not sure about that based on what I know about myself"
118
+ - Keep responses conversational and natural
119
+ - Be helpful and engaging"""
120
+
121
+ response = client.chat.completions.create(
122
+ model="gpt-4",
123
+ messages=[
124
+ {"role": "system", "content": system_prompt},
125
+ {"role": "user", "content": user_text}
126
+ ],
127
+ max_tokens=500,
128
+ temperature=0.7
129
+ )
130
+
131
+ return response.choices[0].message.content.strip()
132
+
133
+ except Exception as e:
134
+ st.error(f"Error getting AI response: {str(e)}")
135
+ return "I'm sorry, I encountered an error while processing your request."
136
 
137
  def text_to_speech(text):
138
  """Convert text to speech using OpenAI TTS."""
139
+ try:
140
+ response = client.audio.speech.create(
141
+ model="tts-1",
142
+ voice="onyx", # Available voices: alloy, echo, fable, onyx, nova, shimmer
143
+ input=text,
144
+ speed=1.0
145
+ )
146
+
147
+ return response.content
148
+
149
+ except Exception as e:
150
+ st.error(f"Error generating speech: {str(e)}")
151
+ return None
 
 
 
 
 
 
 
 
152
 
153
+ def process_audio(audio_bytes):
154
+ """Process recorded audio through the full pipeline."""
155
+ if not audio_bytes:
156
+ return None, None, None
157
 
158
+ # Transcribe audio
159
+ with st.spinner("🎯 Transcribing audio..."):
160
+ user_text = transcribe_audio(audio_bytes)
161
+
162
+ if not user_text:
163
+ return None, None, None
164
+
165
+ # Get AI response
166
+ with st.spinner("πŸ€– Generating response..."):
167
+ ai_response = get_ai_response(user_text, st.session_state.context)
168
+
169
+ # Convert to speech
170
+ with st.spinner("πŸ”Š Converting to speech..."):
171
+ speech_audio = text_to_speech(ai_response)
172
+
173
+ return user_text, ai_response, speech_audio
174
 
175
+ def main():
176
+ st.title("πŸŽ™οΈ Voice Bot")
177
+ st.markdown("*Talk to Prakhar using your voice!*")
178
+
179
+ # Initialize session state
180
+ init_session_state()
181
+
182
+ # Create main layout
183
+ col1, col2 = st.columns([1, 1], gap="large")
184
+
185
+ with col1:
186
+ st.subheader("🎀 Voice Input")
187
+
188
+ # Audio recorder
189
+ audio_bytes = audio_recorder(
190
+ text="Click to record",
191
+ recording_color="#e74c3c",
192
+ neutral_color="#34495e",
193
+ icon_name="microphone",
194
+ icon_size="2x",
195
+ pause_threshold=2.0,
196
+ sample_rate=44100
197
+ )
198
+
199
+ # Process audio when new recording is available
200
+ if audio_bytes and not st.session_state.processing:
201
+ st.session_state.processing = True
202
 
203
+ user_text, ai_response, speech_audio = process_audio(audio_bytes)
 
 
 
204
 
205
+ if user_text and ai_response:
206
+ # Add to conversation history
207
+ st.session_state.conversation_history.append({
208
+ "user": user_text,
209
+ "ai": ai_response,
210
+ "speech": speech_audio
211
+ })
212
+
213
+ st.session_state.processing = False
214
+
215
+ # Show current recording
216
+ if audio_bytes:
217
+ st.audio(audio_bytes, format="audio/wav")
218
+
219
+ with col2:
220
+ st.subheader("πŸ’¬ Conversation")
221
+
222
+ # Display conversation history
223
+ if st.session_state.conversation_history:
224
+ # Show the most recent conversation
225
+ latest = st.session_state.conversation_history[-1]
226
 
227
+ st.markdown("**You said:**")
228
+ st.info(latest["user"])
 
 
 
 
 
 
 
229
 
230
+ st.markdown("**Prakhar replied:**")
231
+ st.success(latest["ai"])
 
 
 
 
 
 
 
 
 
 
 
232
 
233
+ # Play AI response audio
234
+ if latest["speech"]:
235
+ st.audio(latest["speech"], format="audio/mp3")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
 
237
+ # Show conversation history
238
+ if len(st.session_state.conversation_history) > 1:
239
+ with st.expander("πŸ“œ Previous conversations"):
240
+ for i, conv in enumerate(reversed(st.session_state.conversation_history[:-1])):
241
+ st.markdown(f"**Conversation {len(st.session_state.conversation_history) - i - 1}:**")
242
+ st.markdown(f"πŸ‘€ You: {conv['user']}")
243
+ st.markdown(f"πŸ€– Prakhar: {conv['ai']}")
244
+ if conv["speech"]:
245
+ st.audio(conv["speech"], format="audio/mp3")
246
+ st.divider()
247
+ else:
248
+ st.info("πŸ‘‹ Start by recording your voice message above!")
249
+
250
+ # Context management section
251
+ st.divider()
252
+
253
+ with st.expander("βš™οΈ Manage Context", expanded=False):
254
+ st.markdown("**Current Context:**")
255
+
256
+ # Editable context
257
+ new_context = st.text_area(
258
+ "Edit Prakhar's context:",
259
+ value=st.session_state.context,
260
+ height=200,
261
+ help="This context defines who Prakhar is and how he should respond."
262
+ )
263
+
264
+ col1, col2, col3 = st.columns([1, 1, 2])
265
+
266
+ with col1:
267
+ if st.button("πŸ’Ύ Save Context"):
268
+ if save_context(new_context):
269
+ st.session_state.context = new_context
270
+ st.success("Context saved!")
271
+ else:
272
+ st.error("Failed to save context")
273
+
274
+ with col2:
275
+ if st.button("πŸ”„ Reset Context"):
276
+ default_context = """I am Prakhar, an AI assistant. I can help you with general questions and conversations.
277
+ I aim to be helpful, harmless, and honest in all my interactions."""
278
+ st.session_state.context = default_context
279
+ save_context(default_context)
280
+ st.rerun()
281
+
282
+ with col3:
283
+ if st.button("πŸ—‘οΈ Clear Conversation"):
284
+ st.session_state.conversation_history = []
285
+ st.rerun()
286
+
287
+ # Status indicators
288
+ if st.session_state.processing:
289
+ st.info("πŸ”„ Processing your request...")
290
 
291
  if __name__ == "__main__":
292
+ main()