Spaces:

Daaku-C5
/

Voice-Bot

Sleeping

App Files Files Community

Daaku-C5 commited on Jun 17

Commit

72ad8d0

verified ·

1 Parent(s): 30948ea

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +252 -177

src/streamlit_app.py CHANGED Viewed

@@ -1,217 +1,292 @@
 import streamlit as st
 from openai import OpenAI
-import sounddevice as sd
-import scipy.io.wavfile
 import io
 import base64
 import os
-import time
-st.set_page_config(page_title="Voice Bot", layout="wide")
 # Configuration
-SAMPLE_RATE = 44100
-RECORD_DURATION = 5
 TEMP_AUDIO_FILE = "temp_audio.wav"
 # Initialize OpenAI client
-api_key = os.environ.get("openai")
-client = OpenAI(api_key=api_key)
-# Initialize session state variables if they don't exist
-if 'recorded_audio' not in st.session_state:
-    st.session_state.recorded_audio = None
-if 'user_text' not in st.session_state:
-    st.session_state.user_text = None
-if 'ai_reply' not in st.session_state:
-    st.session_state.ai_reply = None
 def load_context():
-    """Load the context from file."""
     try:
         base_dir = os.path.dirname(os.path.abspath(__file__))
         context_path = os.path.join(base_dir, 'context.txt')
-        with open(context_path, "r") as f:
-            return f.read()
-    except FileNotFoundError:
-        st.error("Context file not found!")
-        return ""
-def transcribe_audio(audio_buffer):
     """Transcribe audio using Whisper API."""
-    with open(TEMP_AUDIO_FILE, "wb") as f:
-        f.write(audio_buffer.getvalue())
-    with open(TEMP_AUDIO_FILE, "rb") as audio_file:
-        transcript = client.audio.transcriptions.create(
-            model="whisper-1",
-            file=audio_file
-        )
-    return transcript.text
 def get_ai_response(user_text, context):
     """Get AI response using GPT-4."""
-    system_prompt = f"""
-    You are Prakhar.
-    You must respond **only using the following context**:
-    {context}
-    If the user's question cannot be answered using this context, respond with:
-    "I'm not sure about that based on what I know."
-    """
-    response = client.chat.completions.create(
-        model="gpt-4",
-        messages=[
-            {"role": "system", "content": system_prompt},
-            {"role": "user", "content": user_text}
-        ]
-    )
-    return response.choices[0].message.content
 def text_to_speech(text):
     """Convert text to speech using OpenAI TTS."""
-    speech = client.audio.speech.create(
-        model="tts-1",
-        voice="onyx",
-        input=text
-    )
-    return base64.b64encode(speech.content).decode()
-def handle_record_button():
-    """Handle recording button click"""
-    st.session_state.processing = True
-    info_placeholder = st.empty()
-    info_placeholder.info("Recording...")
-    audio_buffer = record_audio()
-    info_placeholder.empty()
-    st.session_state.recorded_audio = audio_buffer
-def handle_recorded_audio(audio_bytes):
-    """Handle the recorded audio data from browser"""
-    audio_buffer = io.BytesIO(base64.b64decode(audio_bytes))
-    st.session_state.recorded_audio = audio_buffer
-    st.session_state.processing = True
-def main():
-    st.title("Voice Bot")
-    if 'context' not in st.session_state:
-        st.session_state.context = load_context()
-    if 'processing' not in st.session_state:
-        st.session_state.processing = False
-    with st.container():
-        audio, script = st.columns(2, border=True)
-        with audio:
-            st.subheader("Audio Input")
-            # Replace button with HTML/JS audio recorder
-            st.components.v1.html(get_audio_recorder_html(), height=100)
-            # Handle audio data from JavaScript
-            if st.session_state.get('browser_audio'):
-                handle_recorded_audio(st.session_state.browser_audio)
-                st.session_state.browser_audio = None
-            # Create placeholder for processing status
-            process_placeholder = st.empty()
-            # Handle processing if recording just completed
-            if st.session_state.processing:
-                with process_placeholder.container():
-                    with st.spinner("Processing..."):
-                        st.session_state.user_text = transcribe_audio(st.session_state.recorded_audio)
-                        st.session_state.ai_reply = get_ai_response(st.session_state.user_text, st.session_state.context)
-                        audio_b64 = text_to_speech(st.session_state.ai_reply)
-                        st.session_state.ai_audio = audio_b64
-                        st.session_state.processing = False
-            # Display recorded audio if exists
-            if st.session_state.recorded_audio is not None:
-                st.audio(st.session_state.recorded_audio, format="audio/wav")
-                if hasattr(st.session_state, 'ai_audio'):
-                    st.audio(f"data:audio/mp3;base64,{st.session_state.ai_audio}", format="audio/mp3")
-        with script:
-            st.subheader("Conversation")
-            if st.session_state.user_text is not None:
-                st.markdown("**You said:**")
-                st.markdown(f"{st.session_state.user_text}")
-                st.markdown("**AI Response:**")
-                st.markdown(f"{st.session_state.ai_reply}")
-            st.divider()
-    with st.container(border=True):
-        st.text_area("Context", value=st.session_state.context, height=270, disabled=False)
-        st.markdown("You can update the context in the `context.txt` file.")
-# Add JavaScript for audio recording
-def get_audio_recorder_html():
-    return """
-        <script>
-        const audioRecorder = {
-            start: async function() {
-                this.mediaRecorder = null;
-                this.audioChunks = [];
-                const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
-                this.mediaRecorder = new MediaRecorder(stream);
-                this.mediaRecorder.ondataavailable = (e) => {
-                    if (e.data.size > 0) this.audioChunks.push(e.data);
-                };
-                this.mediaRecorder.onstop = () => {
-                    const audioBlob = new Blob(this.audioChunks, { type: 'audio/wav' });
-                    const reader = new FileReader();
-                    reader.readAsDataURL(audioBlob);
-                    reader.onloadend = () => {
-                        const base64Audio = reader.result.split(',')[1];
-                        window.parent.postMessage({type: 'AUDIO_DATA', data: base64Audio}, '*');
-                    };
-                };
-                this.mediaRecorder.start();
-            },
-            stop: function() {
-                if (this.mediaRecorder) {
-                    this.mediaRecorder.stop();
-                    this.mediaRecorder.stream.getTracks().forEach(track => track.stop());
-                }
-            }
-        };
-        const startButton = document.getElementById('recordButton');
-        startButton.addEventListener('mousedown', () => audioRecorder.start());
-        startButton.addEventListener('mouseup', () => audioRecorder.stop());
-        </script>
-        <button id="recordButton" style="padding: 20px 40px; font-size: 16px;">🎙️ Hold to Record</button>
-    """
-# Add JavaScript message handler
-js = """
-<script>
-window.addEventListener('message', function(e) {
-    if (e.data.type === 'AUDIO_DATA') {
-        window.parent.streamlit.setComponentValue({
-            'browser_audio': e.data.data
-        });
-    }
-}, false);
-</script>
-"""
-st.components.v1.html(js, height=0)
 if __name__ == "__main__":
-    main()

 import streamlit as st
 from openai import OpenAI
 import io
 import base64
 import os
+import tempfile
+from audio_recorder_streamlit import audio_recorder
+# Page configuration
+st.set_page_config(
+    page_title="Voice Bot",
+    layout="wide",
+    initial_sidebar_state="collapsed"
+)
 # Configuration
 TEMP_AUDIO_FILE = "temp_audio.wav"
 # Initialize OpenAI client
+@st.cache_resource
+def init_openai_client():
+    try:
+        # Try to get API key from Streamlit secrets first (for HF Spaces)
+        api_key = st.secrets.get("OPENAI_API_KEY", None)
+        if not api_key:
+            # Fallback to environment variable
+            api_key = os.environ.get("OPENAI_API_KEY")
+        if not api_key:
+            st.error("⚠️ OpenAI API key not found. Please add OPENAI_API_KEY to your Hugging Face Spaces secrets.")
+            st.info("Go to Settings → Repository secrets → Add OPENAI_API_KEY")
+            st.stop()
+        return OpenAI(api_key=api_key)
+    except Exception as e:
+        st.error(f"Error initializing OpenAI client: {str(e)}")
+        st.stop()
+client = init_openai_client()
+# Initialize session state variables
+def init_session_state():
+    if 'conversation_history' not in st.session_state:
+        st.session_state.conversation_history = []
+    if 'context' not in st.session_state:
+        st.session_state.context = load_context()
+    if 'processing' not in st.session_state:
+        st.session_state.processing = False
 def load_context():
+    """Load the context from file or return default."""
     try:
         base_dir = os.path.dirname(os.path.abspath(__file__))
         context_path = os.path.join(base_dir, 'context.txt')
+        if os.path.exists(context_path):
+            with open(context_path, "r", encoding='utf-8') as f:
+                return f.read().strip()
+        else:
+            # Default context if file doesn't exist
+            return """I am Prakhar. I can help you with general questions and conversations.
+I aim to be helpful, harmless, and honest in all my interactions."""
+    except Exception as e:
+        st.error(f"Error loading context: {str(e)}")
+        return "I am Prakhar, an AI assistant."
+def save_context(context_text):
+    """Save context to file."""
+    try:
+        base_dir = os.path.dirname(os.path.abspath(__file__))
+        context_path = os.path.join(base_dir, 'context.txt')
+        with open(context_path, "w", encoding='utf-8') as f:
+            f.write(context_text)
+        return True
+    except Exception as e:
+        st.error(f"Error saving context: {str(e)}")
+        return False
+def transcribe_audio(audio_bytes):
     """Transcribe audio using Whisper API."""
+    try:
+        # Create a temporary file
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
+            tmp_file.write(audio_bytes)
+            tmp_file_path = tmp_file.name
+        # Transcribe using OpenAI Whisper
+        with open(tmp_file_path, "rb") as audio_file:
+            transcript = client.audio.transcriptions.create(
+                model="whisper-1",
+                file=audio_file,
+                language="en"
+            )
+        # Clean up temporary file
+        os.unlink(tmp_file_path)
+        return transcript.text.strip()
+    except Exception as e:
+        st.error(f"Error transcribing audio: {str(e)}")
+        return None
 def get_ai_response(user_text, context):
     """Get AI response using GPT-4."""
+    try:
+        system_prompt = f"""You are Prakhar. You should respond naturally and helpfully.
+Context about you:
+{context}
+Instructions:
+- Use the context above to inform your responses
+- If asked about something not covered in the context, you can use your general knowledge
+- If you're not sure about something specific to your context, say "I'm not sure about that based on what I know about myself"
+- Keep responses conversational and natural
+- Be helpful and engaging"""
+        response = client.chat.completions.create(
+            model="gpt-4",
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_text}
+            ],
+            max_tokens=500,
+            temperature=0.7
+        )
+        return response.choices[0].message.content.strip()
+    except Exception as e:
+        st.error(f"Error getting AI response: {str(e)}")
+        return "I'm sorry, I encountered an error while processing your request."
 def text_to_speech(text):
     """Convert text to speech using OpenAI TTS."""
+    try:
+        response = client.audio.speech.create(
+            model="tts-1",
+            voice="onyx",  # Available voices: alloy, echo, fable, onyx, nova, shimmer
+            input=text,
+            speed=1.0
+        )
+        return response.content
+    except Exception as e:
+        st.error(f"Error generating speech: {str(e)}")
+        return None
+def process_audio(audio_bytes):
+    """Process recorded audio through the full pipeline."""
+    if not audio_bytes:
+        return None, None, None
+    # Transcribe audio
+    with st.spinner("🎯 Transcribing audio..."):
+        user_text = transcribe_audio(audio_bytes)
+    if not user_text:
+        return None, None, None
+    # Get AI response
+    with st.spinner("🤖 Generating response..."):
+        ai_response = get_ai_response(user_text, st.session_state.context)
+    # Convert to speech
+    with st.spinner("🔊 Converting to speech..."):
+        speech_audio = text_to_speech(ai_response)
+    return user_text, ai_response, speech_audio
+def main():
+    st.title("🎙️ Voice Bot")
+    st.markdown("*Talk to Prakhar using your voice!*")
+    # Initialize session state
+    init_session_state()
+    # Create main layout
+    col1, col2 = st.columns([1, 1], gap="large")
+    with col1:
+        st.subheader("🎤 Voice Input")
+        # Audio recorder
+        audio_bytes = audio_recorder(
+            text="Click to record",
+            recording_color="#e74c3c",
+            neutral_color="#34495e",
+            icon_name="microphone",
+            icon_size="2x",
+            pause_threshold=2.0,
+            sample_rate=44100
+        )
+        # Process audio when new recording is available
+        if audio_bytes and not st.session_state.processing:
+            st.session_state.processing = True
+            user_text, ai_response, speech_audio = process_audio(audio_bytes)
+            if user_text and ai_response:
+                # Add to conversation history
+                st.session_state.conversation_history.append({
+                    "user": user_text,
+                    "ai": ai_response,
+                    "speech": speech_audio
+                })
+            st.session_state.processing = False
+        # Show current recording
+        if audio_bytes:
+            st.audio(audio_bytes, format="audio/wav")
+    with col2:
+        st.subheader("💬 Conversation")
+        # Display conversation history
+        if st.session_state.conversation_history:
+            # Show the most recent conversation
+            latest = st.session_state.conversation_history[-1]
+            st.markdown("**You said:**")
+            st.info(latest["user"])
+            st.markdown("**Prakhar replied:**")
+            st.success(latest["ai"])
+            # Play AI response audio
+            if latest["speech"]:
+                st.audio(latest["speech"], format="audio/mp3")
+            # Show conversation history
+            if len(st.session_state.conversation_history) > 1:
+                with st.expander("📜 Previous conversations"):
+                    for i, conv in enumerate(reversed(st.session_state.conversation_history[:-1])):
+                        st.markdown(f"**Conversation {len(st.session_state.conversation_history) - i - 1}:**")
+                        st.markdown(f"👤 You: {conv['user']}")
+                        st.markdown(f"🤖 Prakhar: {conv['ai']}")
+                        if conv["speech"]:
+                            st.audio(conv["speech"], format="audio/mp3")
+                        st.divider()
+        else:
+            st.info("👋 Start by recording your voice message above!")
+    # Context management section
+    st.divider()
+    with st.expander("⚙️ Manage Context", expanded=False):
+        st.markdown("**Current Context:**")
+        # Editable context
+        new_context = st.text_area(
+            "Edit Prakhar's context:",
+            value=st.session_state.context,
+            height=200,
+            help="This context defines who Prakhar is and how he should respond."
+        )
+        col1, col2, col3 = st.columns([1, 1, 2])
+        with col1:
+            if st.button("💾 Save Context"):
+                if save_context(new_context):
+                    st.session_state.context = new_context
+                    st.success("Context saved!")
+                else:
+                    st.error("Failed to save context")
+        with col2:
+            if st.button("🔄 Reset Context"):
+                default_context = """I am Prakhar, an AI assistant. I can help you with general questions and conversations.
+I aim to be helpful, harmless, and honest in all my interactions."""
+                st.session_state.context = default_context
+                save_context(default_context)
+                st.rerun()
+        with col3:
+            if st.button("🗑️ Clear Conversation"):
+                st.session_state.conversation_history = []
+                st.rerun()
+    # Status indicators
+    if st.session_state.processing:
+        st.info("🔄 Processing your request...")
 if __name__ == "__main__":
+    main()