DeepSeekR1-LIVE

Running on Zero

App Files Files Community

ginipick commited on Feb 3

Commit

ad59ac8

verified ·

1 Parent(s): 8366798

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -137

app.py CHANGED Viewed

@@ -1,43 +1,51 @@
-import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer
-import spaces
-from duckduckgo_search import DDGS
 import time
 import torch
-from datetime import datetime
-import os
-import subprocess
 import numpy as np
-# Install required dependencies for Kokoro with better error handling
 try:
     subprocess.run(['git', 'lfs', 'install'], check=True)
     if not os.path.exists('Kokoro-82M'):
         subprocess.run(['git', 'clone', 'https://huggingface.co/hexgrad/Kokoro-82M'], check=True)
-    # Try installing espeak with proper package manager commands
     try:
-        # Update package list first
         subprocess.run(['apt-get', 'update'], check=True)
-        # Try installing espeak first (more widely available)
         subprocess.run(['apt-get', 'install', '-y', 'espeak'], check=True)
     except subprocess.CalledProcessError:
-        print("Warning: Could not install espeak. Attempting espeak-ng...")
         try:
             subprocess.run(['apt-get', 'install', '-y', 'espeak-ng'], check=True)
         except subprocess.CalledProcessError:
             print("Warning: Could not install espeak or espeak-ng. TTS functionality may be limited.")
 except Exception as e:
     print(f"Warning: Initial setup error: {str(e)}")
     print("Continuing with limited functionality...")
-# Initialize models and tokenizers
 model_name = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 tokenizer.pad_token = tokenizer.eos_token
-# Move model initialization inside a function to prevent CUDA initialization in main process
 def init_models():
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
@@ -48,27 +56,20 @@ def init_models():
     )
     return model
-# Initialize Kokoro TTS with better error handling
 try:
     import sys
     sys.path.append('Kokoro-82M')
     from models import build_model
     from kokoro import generate
-    # Don't initialize models/voices in main process for ZeroGPU compatibility
-    VOICE_CHOICES = {
-        '🇺🇸 Female (Default)': 'af',
-        '🇺🇸 Bella': 'af_bella',
-        '🇺🇸 Sarah': 'af_sarah',
-        '🇺🇸 Nicole': 'af_nicole'
-    }
     TTS_ENABLED = True
 except Exception as e:
     print(f"Warning: Could not initialize Kokoro TTS: {str(e)}")
     TTS_ENABLED = False
-def get_web_results(query, max_results=5):  # Increased to 5 for better context
-    """Get web search results using DuckDuckGo"""
     try:
         with DDGS() as ddgs:
             results = list(ddgs.text(query, max_results=max_results))
@@ -82,7 +83,6 @@ def get_web_results(query, max_results=5):  # Increased to 5 for better context
         return []
 def format_prompt(query, context):
-    """Format the prompt with web context"""
     current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
     context_lines = '\n'.join([f'- [{res["title"]}]: {res["snippet"]}' for res in context])
     return f"""You are an intelligent search assistant. Answer the user's query using the provided web context.
@@ -99,7 +99,6 @@ Provide a detailed answer in markdown format. Include relevant information from
 Answer:"""
 def format_sources(web_results):
-    """Format sources with more details"""
     if not web_results:
         return "<div class='no-sources'>No sources available</div>"
@@ -120,11 +119,9 @@ def format_sources(web_results):
     sources_html += "</div>"
     return sources_html
-# Wrap the answer generation with spaces.GPU decorator
 @spaces.GPU(duration=30)
 def generate_answer(prompt):
-    """Generate answer using the DeepSeek model"""
-    # Initialize model inside the GPU-decorated function
     model = init_models()
     inputs = tokenizer(
@@ -148,28 +145,21 @@ def generate_answer(prompt):
     )
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
-# Similarly wrap TTS generation with spaces.GPU
 @spaces.GPU(duration=60)
 def generate_speech_with_gpu(text, voice_name='af'):
-    """Generate speech from text using Kokoro TTS model with GPU handling"""
     try:
-        # Initialize TTS model and voice inside GPU function
         device = 'cuda'
         TTS_MODEL = build_model('Kokoro-82M/kokoro-v0_19.pth', device)
         VOICEPACK = torch.load(f'Kokoro-82M/voices/{voice_name}.pt', weights_only=True).to(device)
-        # Clean the text
         clean_text = ' '.join([line for line in text.split('\n') if not line.startswith('#')])
         clean_text = clean_text.replace('[', '').replace(']', '').replace('*', '')
-        # Split long text into chunks
         max_chars = 1000
-        chunks = []
         if len(clean_text) > max_chars:
             sentences = clean_text.split('.')
             current_chunk = ""
             for sentence in sentences:
                 if len(current_chunk) + len(sentence) < max_chars:
                     current_chunk += sentence + "."
@@ -182,21 +172,16 @@ def generate_speech_with_gpu(text, voice_name='af'):
         else:
             chunks = [clean_text]
-        # Generate audio for each chunk
         audio_chunks = []
         for chunk in chunks:
-            if chunk.strip():  # Only process non-empty chunks
                 chunk_audio, _ = generate(TTS_MODEL, chunk.strip(), VOICEPACK, lang='a')
                 if isinstance(chunk_audio, torch.Tensor):
                     chunk_audio = chunk_audio.cpu().numpy()
                 audio_chunks.append(chunk_audio)
-        # Concatenate chunks if we have any
         if audio_chunks:
-            if len(audio_chunks) > 1:
-                final_audio = np.concatenate(audio_chunks)
-            else:
-                final_audio = audio_chunks[0]
             return (24000, final_audio)
         return None
@@ -207,12 +192,10 @@ def generate_speech_with_gpu(text, voice_name='af'):
         return None
 def process_query(query, history, selected_voice='af'):
-    """Process user query with streaming effect"""
     try:
         if history is None:
             history = []
-        # Get web results first
         web_results = get_web_results(query)
         sources_html = format_sources(web_results)
@@ -225,12 +208,10 @@ def process_query(query, history, selected_voice='af'):
             audio_output: None
         }
-        # Generate answer
-        prompt = format_prompt(query, web_results)
-        answer = generate_answer(prompt)
         final_answer = answer.split("Answer:")[-1].strip()
-        # Generate speech from the answer
         if TTS_ENABLED:
             try:
                 yield {
@@ -240,10 +221,7 @@ def process_query(query, history, selected_voice='af'):
                     chat_history_display: history + [[query, final_answer]],
                     audio_output: None
                 }
                 audio = generate_speech_with_gpu(final_answer, selected_voice)
-                if audio is None:
-                    print("Failed to generate audio")
             except Exception as e:
                 print(f"Error in speech generation: {str(e)}")
                 audio = None
@@ -271,89 +249,82 @@ def process_query(query, history, selected_voice='af'):
             audio_output: None
         }
-# Update the CSS for better contrast and readability
 css = """
 .gradio-container {
     max-width: 1200px !important;
-    background-color: #f7f7f8 !important;
 }
 #header {
     text-align: center;
-    margin-bottom: 2rem;
     padding: 2rem 0;
-    background: #1a1b1e;
     border-radius: 12px;
-    color: white;
 }
 #header h1 {
-    color: white;
     font-size: 2.5rem;
     margin-bottom: 0.5rem;
 }
-#header h3 {
-    color: #a8a9ab;
-}
 .search-container {
-    background: #1a1b1e;
     border-radius: 12px;
-    box-shadow: 0 4px 12px rgba(0,0,0,0.1);
-    padding: 1rem;
     margin-bottom: 1rem;
 }
 .search-box {
     padding: 1rem;
-    background: #2c2d30;
     border-radius: 8px;
     margin-bottom: 1rem;
 }
-/* Style the input textbox */
 .search-box input[type="text"] {
-    background: #3a3b3e !important;
-    border: 1px solid #4a4b4e !important;
-    color: white !important;
     border-radius: 8px !important;
 }
 .search-box input[type="text"]::placeholder {
-    color: #a8a9ab !important;
 }
-/* Style the search button */
 .search-box button {
     background: #2563eb !important;
     border: none !important;
 }
-/* Results area styling */
 .results-container {
-    background: #2c2d30;
     border-radius: 8px;
-    padding: 1rem;
     margin-top: 1rem;
 }
 .answer-box {
-    background: #3a3b3e;
     border-radius: 8px;
     padding: 1.5rem;
-    color: white;
     margin-bottom: 1rem;
 }
 .answer-box p {
-    color: #e5e7eb;
     line-height: 1.6;
 }
 .sources-container {
     margin-top: 1rem;
-    background: #2c2d30;
     border-radius: 8px;
     padding: 1rem;
 }
@@ -362,13 +333,13 @@ css = """
     display: flex;
     padding: 12px;
     margin: 8px 0;
-    background: #3a3b3e;
     border-radius: 8px;
     transition: all 0.2s;
 }
 .source-item:hover {
-    background: #4a4b4e;
 }
 .source-number {
@@ -390,13 +361,13 @@ css = """
 }
 .source-date {
-    color: #a8a9ab;
     font-size: 0.9em;
     margin-left: 8px;
 }
 .source-snippet {
-    color: #e5e7eb;
     font-size: 0.9em;
     line-height: 1.4;
 }
@@ -405,63 +376,36 @@ css = """
     max-height: 400px;
     overflow-y: auto;
     padding: 1rem;
-    background: #2c2d30;
-    border-radius: 8px;
-    margin-top: 1rem;
-}
-.examples-container {
-    background: #2c2d30;
     border-radius: 8px;
-    padding: 1rem;
     margin-top: 1rem;
 }
-.examples-container button {
-    background: #3a3b3e !important;
-    border: 1px solid #4a4b4e !important;
-    color: #e5e7eb !important;
-}
-/* Markdown content styling */
-.markdown-content {
-    color: #e5e7eb !important;
-}
-.markdown-content h1, .markdown-content h2, .markdown-content h3 {
-    color: white !important;
-}
-.markdown-content a {
-    color: #60a5fa !important;
-}
-/* Accordion styling */
-.accordion {
-    background: #2c2d30 !important;
-    border-radius: 8px !important;
-    margin-top: 1rem !important;
-}
 .voice-selector {
     margin-top: 1rem;
-    background: #2c2d30;
     border-radius: 8px;
     padding: 0.5rem;
 }
 .voice-selector select {
-    background: #3a3b3e !important;
-    color: white !important;
-    border: 1px solid #4a4b4e !important;
 }
 """
-# Update the Gradio interface layout
-with gr.Blocks(title="AI Search Assistant", css=css, theme="dark") as demo:
     chat_history = gr.State([])
-    with gr.Column(elem_id="header"):
         gr.Markdown("# 🔍 AI Search Assistant")
         gr.Markdown("### Powered by DeepSeek & Real-time Web Results with Voice")
@@ -484,17 +428,16 @@ with gr.Blocks(title="AI Search Assistant", css=css, theme="dark") as demo:
         with gr.Row(elem_classes="results-container"):
             with gr.Column(scale=2):
                 with gr.Column(elem_classes="answer-box"):
-                    answer_output = gr.Markdown(elem_classes="markdown-content")
-                    with gr.Row():
-                        audio_output = gr.Audio(label="Voice Response", elem_classes="audio-player")
-                with gr.Accordion("Chat History", open=False, elem_classes="accordion"):
                     chat_history_display = gr.Chatbot(elem_classes="chat-history")
             with gr.Column(scale=1):
-                with gr.Column(elem_classes="sources-box"):
                     gr.Markdown("### Sources")
                     sources_output = gr.HTML()
-        with gr.Row(elem_classes="examples-container"):
             gr.Examples(
                 examples=[
                     "musk explores blockchain for doge",
@@ -505,15 +448,12 @@ with gr.Blocks(title="AI Search Assistant", css=css, theme="dark") as demo:
                 inputs=search_input,
                 label="Try these examples"
             )
-    # Handle interactions
     search_btn.click(
         fn=process_query,
         inputs=[search_input, chat_history, voice_select],
         outputs=[answer_output, sources_output, search_btn, chat_history_display, audio_output]
     )
-    # Also trigger search on Enter key
     search_input.submit(
         fn=process_query,
         inputs=[search_input, chat_history, voice_select],
@@ -521,4 +461,4 @@ with gr.Blocks(title="AI Search Assistant", css=css, theme="dark") as demo:
     )
 if __name__ == "__main__":
-    demo.launch(share=True)

+import subprocess  # 🥲
+import os
 import time
 import torch
 import numpy as np
+import gradio as gr
+import spaces
+import re
+import json
+from datetime import datetime
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from duckduckgo_search import DDGS
+from pydantic import BaseModel
+# ----------------------- Setup & Dependency Installation ----------------------- #
 try:
     subprocess.run(['git', 'lfs', 'install'], check=True)
     if not os.path.exists('Kokoro-82M'):
         subprocess.run(['git', 'clone', 'https://huggingface.co/hexgrad/Kokoro-82M'], check=True)
     try:
         subprocess.run(['apt-get', 'update'], check=True)
         subprocess.run(['apt-get', 'install', '-y', 'espeak'], check=True)
     except subprocess.CalledProcessError:
+        print("Warning: Could not install espeak. Trying espeak-ng...")
         try:
             subprocess.run(['apt-get', 'install', '-y', 'espeak-ng'], check=True)
         except subprocess.CalledProcessError:
             print("Warning: Could not install espeak or espeak-ng. TTS functionality may be limited.")
 except Exception as e:
     print(f"Warning: Initial setup error: {str(e)}")
     print("Continuing with limited functionality...")
+# ----------------------- Global Variables ----------------------- #
+# VOICE_CHOICES 정의 (TTS가 초기화되지 않더라도 기본값 제공)
+VOICE_CHOICES = {
+    '🇺🇸 Female (Default)': 'af',
+    '🇺🇸 Bella': 'af_bella',
+    '🇺🇸 Sarah': 'af_sarah',
+    '🇺🇸 Nicole': 'af_nicole'
+}
+TTS_ENABLED = False  # 초기 TTS 모듈 불러오기 실패 시 기본적으로 비활성화
+# ----------------------- Model and Tokenizer Initialization ----------------------- #
 model_name = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 tokenizer.pad_token = tokenizer.eos_token
 def init_models():
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
     )
     return model
+# ----------------------- Kokoro TTS Initialization ----------------------- #
 try:
     import sys
     sys.path.append('Kokoro-82M')
     from models import build_model
     from kokoro import generate
     TTS_ENABLED = True
 except Exception as e:
     print(f"Warning: Could not initialize Kokoro TTS: {str(e)}")
     TTS_ENABLED = False
+# ----------------------- Web Search Functions ----------------------- #
+def get_web_results(query, max_results=5):
     try:
         with DDGS() as ddgs:
             results = list(ddgs.text(query, max_results=max_results))
         return []
 def format_prompt(query, context):
     current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
     context_lines = '\n'.join([f'- [{res["title"]}]: {res["snippet"]}' for res in context])
     return f"""You are an intelligent search assistant. Answer the user's query using the provided web context.
 Answer:"""
 def format_sources(web_results):
     if not web_results:
         return "<div class='no-sources'>No sources available</div>"
     sources_html += "</div>"
     return sources_html
+# ----------------------- Answer Generation ----------------------- #
 @spaces.GPU(duration=30)
 def generate_answer(prompt):
     model = init_models()
     inputs = tokenizer(
     )
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
 @spaces.GPU(duration=60)
 def generate_speech_with_gpu(text, voice_name='af'):
     try:
         device = 'cuda'
         TTS_MODEL = build_model('Kokoro-82M/kokoro-v0_19.pth', device)
         VOICEPACK = torch.load(f'Kokoro-82M/voices/{voice_name}.pt', weights_only=True).to(device)
         clean_text = ' '.join([line for line in text.split('\n') if not line.startswith('#')])
         clean_text = clean_text.replace('[', '').replace(']', '').replace('*', '')
         max_chars = 1000
         if len(clean_text) > max_chars:
             sentences = clean_text.split('.')
+            chunks = []
             current_chunk = ""
             for sentence in sentences:
                 if len(current_chunk) + len(sentence) < max_chars:
                     current_chunk += sentence + "."
         else:
             chunks = [clean_text]
         audio_chunks = []
         for chunk in chunks:
+            if chunk.strip():
                 chunk_audio, _ = generate(TTS_MODEL, chunk.strip(), VOICEPACK, lang='a')
                 if isinstance(chunk_audio, torch.Tensor):
                     chunk_audio = chunk_audio.cpu().numpy()
                 audio_chunks.append(chunk_audio)
         if audio_chunks:
+            final_audio = np.concatenate(audio_chunks) if len(audio_chunks) > 1 else audio_chunks[0]
             return (24000, final_audio)
         return None
         return None
 def process_query(query, history, selected_voice='af'):
     try:
         if history is None:
             history = []
         web_results = get_web_results(query)
         sources_html = format_sources(web_results)
             audio_output: None
         }
+        prompt_text = format_prompt(query, web_results)
+        answer = generate_answer(prompt_text)
         final_answer = answer.split("Answer:")[-1].strip()
         if TTS_ENABLED:
             try:
                 yield {
                     chat_history_display: history + [[query, final_answer]],
                     audio_output: None
                 }
                 audio = generate_speech_with_gpu(final_answer, selected_voice)
             except Exception as e:
                 print(f"Error in speech generation: {str(e)}")
                 audio = None
             audio_output: None
         }
+# ----------------------- Custom CSS for Improved UI ----------------------- #
 css = """
 .gradio-container {
     max-width: 1200px !important;
+    background-color: #1e1e1e !important;
+    padding: 20px;
+    border-radius: 12px;
 }
 #header {
     text-align: center;
     padding: 2rem 0;
+    background: #272727;
     border-radius: 12px;
+    color: #ffffff;
+    margin-bottom: 2rem;
 }
 #header h1 {
     font-size: 2.5rem;
     margin-bottom: 0.5rem;
 }
 .search-container {
+    background: #272727;
     border-radius: 12px;
+    padding: 1.5rem;
     margin-bottom: 1rem;
 }
 .search-box {
     padding: 1rem;
+    background: #333333;
     border-radius: 8px;
     margin-bottom: 1rem;
 }
 .search-box input[type="text"] {
+    background: #444444 !important;
+    border: 1px solid #555555 !important;
+    color: #ffffff !important;
     border-radius: 8px !important;
 }
 .search-box input[type="text"]::placeholder {
+    color: #bbbbbb !important;
 }
 .search-box button {
     background: #2563eb !important;
     border: none !important;
 }
 .results-container {
+    background: #2c2c2c;
     border-radius: 8px;
+    padding: 1.5rem;
     margin-top: 1rem;
 }
 .answer-box {
+    background: #3a3a3a;
     border-radius: 8px;
     padding: 1.5rem;
+    color: #ffffff;
     margin-bottom: 1rem;
 }
 .answer-box p {
+    color: #e0e0e0;
     line-height: 1.6;
 }
 .sources-container {
     margin-top: 1rem;
+    background: #2c2c2c;
     border-radius: 8px;
     padding: 1rem;
 }
     display: flex;
     padding: 12px;
     margin: 8px 0;
+    background: #3a3a3a;
     border-radius: 8px;
     transition: all 0.2s;
 }
 .source-item:hover {
+    background: #4a4a4a;
 }
 .source-number {
 }
 .source-date {
+    color: #bbbbbb;
     font-size: 0.9em;
     margin-left: 8px;
 }
 .source-snippet {
+    color: #e0e0e0;
     font-size: 0.9em;
     line-height: 1.4;
 }
     max-height: 400px;
     overflow-y: auto;
     padding: 1rem;
+    background: #2c2c2c;
     border-radius: 8px;
     margin-top: 1rem;
 }
 .voice-selector {
     margin-top: 1rem;
+    background: #333333;
     border-radius: 8px;
     padding: 0.5rem;
 }
 .voice-selector select {
+    background: #444444 !important;
+    color: #ffffff !important;
+    border: 1px solid #555555 !important;
+}
+footer {
+    text-align: center;
+    padding: 1rem 0;
+    font-size: 0.9em;
+    color: #bbbbbb;
 }
 """
+# ----------------------- Gradio Interface ----------------------- #
+with gr.Blocks(title="AI Search Assistant", css=css) as demo:
     chat_history = gr.State([])
+    with gr.Column(id="header"):
         gr.Markdown("# 🔍 AI Search Assistant")
         gr.Markdown("### Powered by DeepSeek & Real-time Web Results with Voice")
         with gr.Row(elem_classes="results-container"):
             with gr.Column(scale=2):
                 with gr.Column(elem_classes="answer-box"):
+                    answer_output = gr.Markdown()
+                    audio_output = gr.Audio(label="Voice Response")
+                with gr.Accordion("Chat History", open=False):
                     chat_history_display = gr.Chatbot(elem_classes="chat-history")
             with gr.Column(scale=1):
+                with gr.Column():
                     gr.Markdown("### Sources")
                     sources_output = gr.HTML()
+        with gr.Row():
             gr.Examples(
                 examples=[
                     "musk explores blockchain for doge",
                 inputs=search_input,
                 label="Try these examples"
             )
     search_btn.click(
         fn=process_query,
         inputs=[search_input, chat_history, voice_select],
         outputs=[answer_output, sources_output, search_btn, chat_history_display, audio_output]
     )
     search_input.submit(
         fn=process_query,
         inputs=[search_input, chat_history, voice_select],
     )
 if __name__ == "__main__":
+    demo.launch(share=True)