Spaces:

luminoussg
/

choupijiang

Sleeping

App Files Files Community

luminoussg commited on about 1 month ago

Commit

eebaa87

verified ·

1 Parent(s): 8190eb3

Update app.py

Browse files

Files changed (1) hide show

app.py +117 -110

app.py CHANGED Viewed

@@ -1,12 +1,12 @@
 import gradio as gr
 import os
 import requests
-import threading
 from datetime import datetime
-from typing import List, Dict, Any, Generator
-from session_manager import SessionManager
-# Initialize session manager and get HF API key
 session_manager = SessionManager()
 HF_API_KEY = os.getenv("HF_API_KEY")
@@ -18,25 +18,27 @@ MODEL_ENDPOINTS = {
 }
 def query_model(model_name: str, messages: List[Dict[str, str]]) -> str:
-    """Query a single model with the chat history"""
     endpoint = MODEL_ENDPOINTS[model_name]
     headers = {
         "Authorization": f"Bearer {HF_API_KEY}",
         "Content-Type": "application/json"
     }
-    # Build full conversation history for context
-    conversation = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])
-    # Model-specific prompt formatting with full history
     model_prompts = {
         "Qwen2.5-72B-Instruct": (
-            f"<|im_start|>system\nCollaborate with other experts. Previous discussion:\n{conversation}<|im_end|>\n"
             "<|im_start|>assistant\nMy analysis:"
         ),
         "Llama3.3-70B-Instruct": (
             "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n"
-            f"Build upon this discussion:\n{conversation}<|eot_id|>\n"
             "<|start_header_id|>assistant<|end_header_id|>\nMy contribution:"
         ),
         "Qwen2.5-Coder-32B-Instruct": (
@@ -45,7 +47,6 @@ def query_model(model_name: str, messages: List[Dict[str, str]]) -> str:
         )
     }
-    # Model-specific stop sequences
     stop_sequences = {
         "Qwen2.5-72B-Instruct": ["<|im_end|>", "<|endoftext|>"],
         "Llama3.3-70B-Instruct": ["<|eot_id|>", "\nuser:"],
@@ -55,126 +56,132 @@ def query_model(model_name: str, messages: List[Dict[str, str]]) -> str:
     payload = {
         "inputs": model_prompts[model_name],
         "parameters": {
-            "max_tokens": 2048,
             "temperature": 0.7,
             "stop_sequences": stop_sequences[model_name],
             "return_full_text": False
         }
     }
     try:
         response = requests.post(endpoint, json=payload, headers=headers)
         response.raise_for_status()
-        result = response.json()[0]['generated_text']
-        # Clean up response formatting
-        result = result.split('<|')[0]  # Remove any remaining special tokens
-        result = result.replace('**', '').replace('##', '')  # Remove markdown emphasis
-        result = result.strip()  # Remove leading/trailing whitespace
-        return result
     except Exception as e:
         return f"{model_name} error: {str(e)}"
-def respond(message: str, history: List[List[str]], session_id: str) -> Generator[str, None, None]:
-    """Handle sequential model responses with context preservation"""
-    # Load or initialize session
-    session = session_manager.load_session(session_id)
-    if not isinstance(session, dict) or "history" not in session:
         session = {"history": []}
-    # Build context from session history
-    messages = []
-    for entry in session["history"]:
-        if entry["type"] == "user":
-            messages.append({"role": "user", "content": entry["content"]})
-        else:
-            messages.append({"role": "assistant", "content": f"{entry['model']}: {entry['content']}"})
-    # Add current message
-    messages.append({"role": "user", "content": message})
-    session["history"].append({
-        "timestamp": datetime.now().isoformat(),
-        "type": "user",
-        "content": message
-    })
-    # First model
-    yield "🔵 Qwen2.5-Coder-32B-Instruct is thinking..."
-    response1 = query_model("Qwen2.5-Coder-32B-Instruct", messages)
-    session["history"].append({
-        "timestamp": datetime.now().isoformat(),
-        "type": "assistant",
-        "model": "Qwen2.5-Coder-32B-Instruct",
-        "content": response1
-    })
-    messages.append({"role": "assistant", "content": f"Qwen2.5-Coder-32B-Instruct: {response1}"})
-    yield f"🔵 **Qwen2.5-Coder-32B-Instruct**\n{response1}"
-    # Second model
-    yield f"🔵 **Qwen2.5-Coder-32B-Instruct**\n{response1}\n\n🟣 Qwen2.5-72B-Instruct is thinking..."
-    response2 = query_model("Qwen2.5-72B-Instruct", messages)
-    session["history"].append({
-        "timestamp": datetime.now().isoformat(),
-        "type": "assistant",
-        "model": "Qwen2.5-72B-Instruct",
-        "content": response2
-    })
-    messages.append({"role": "assistant", "content": f"Qwen2.5-72B-Instruct: {response2}"})
-    yield f"🔵 **Qwen2.5-Coder-32B-Instruct**\n{response1}\n\n🟣 **Qwen2.5-72B-Instruct**\n{response2}"
-    # Final model
-    yield f"🔵 **Qwen2.5-Coder-32B-Instruct**\n{response1}\n\n🟣 **Qwen2.5-72B-Instruct**\n{response2}\n\n🟡 Llama3.3-70B-Instruct is thinking..."
-    response3 = query_model("Llama3.3-70B-Instruct", messages)
-    session["history"].append({
-        "timestamp": datetime.now().isoformat(),
-        "type": "assistant",
-        "model": "Llama3.3-70B-Instruct",
-        "content": response3
-    })
-    messages.append({"role": "assistant", "content": f"Llama3.3-70B-Instruct: {response3}"})
-    # Save final session state
     session_manager.save_session(session_id, session)
-    # Return final combined response
-    yield f"🔵 **Qwen2.5-Coder-32B-Instruct**\n{response1}\n\n🟣 **Qwen2.5-72B-Instruct**\n{response2}\n\n🟡 **Llama3.3-70B-Instruct**\n{response3}"
-# Create the Gradio interface
 with gr.Blocks() as demo:
-    gr.Markdown("## Multi-LLM Collaboration Chat")
     with gr.Row():
         session_id = gr.State(session_manager.create_session)
-        new_session = gr.Button("🔄 New Session")
-    # Add latex_delimiters to enable LaTeX rendering
     chatbot = gr.Chatbot(
-        height=600,
         latex_delimiters=[
             {"left": "$", "right": "$", "display": False},  # inline math
             {"left": "$$", "right": "$$", "display": True}   # display math
         ]
     )
-    msg = gr.Textbox(label="Message")
-    def on_new_session():
-        new_id = session_manager.create_session()
-        return new_id, []
-    def user(message, history, session_id):
-        return "", history + [[message, None]]
-    def bot(history, session_id):
-        if history and history[-1][1] is None:
-            message = history[-1][0]
-            for response in respond(message, history[:-1], session_id):
-                history[-1][1] = response
-                yield history
-    msg.submit(user, [msg, chatbot, session_id], [msg, chatbot]).then(
-        bot, [chatbot, session_id], [chatbot]
     )
-    new_session.click(on_new_session, None, [session_id, chatbot])
 if __name__ == "__main__":
-    demo.launch(share=True)

 import gradio as gr
 import os
 import requests
+import time
 from datetime import datetime
+from typing import List, Dict
+from session_manager import SessionManager  # only if you need sessions
+# Initialize session manager and get HF API key (adjust if not using sessions)
 session_manager = SessionManager()
 HF_API_KEY = os.getenv("HF_API_KEY")
 }
 def query_model(model_name: str, messages: List[Dict[str, str]]) -> str:
+    """
+    Query a single model with the conversation so far (list of dicts with 'role' and 'content').
+    """
     endpoint = MODEL_ENDPOINTS[model_name]
     headers = {
         "Authorization": f"Bearer {HF_API_KEY}",
         "Content-Type": "application/json"
     }
+    # Combine conversation into a single string (simple example)
+    conversation = "\n".join(f"{m['role']}: {m['content']}" for m in messages)
+    # Model-specific prompt formatting
     model_prompts = {
         "Qwen2.5-72B-Instruct": (
+            f"<|im_start|>system\nCollaborate with other experts:\n{conversation}<|im_end|>\n"
             "<|im_start|>assistant\nMy analysis:"
         ),
         "Llama3.3-70B-Instruct": (
             "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n"
+            f"Build on the conversation:\n{conversation}<|eot_id|>\n"
             "<|start_header_id|>assistant<|end_header_id|>\nMy contribution:"
         ),
         "Qwen2.5-Coder-32B-Instruct": (
         )
     }
     stop_sequences = {
         "Qwen2.5-72B-Instruct": ["<|im_end|>", "<|endoftext|>"],
         "Llama3.3-70B-Instruct": ["<|eot_id|>", "\nuser:"],
     payload = {
         "inputs": model_prompts[model_name],
         "parameters": {
+            "max_tokens": 1024,
             "temperature": 0.7,
             "stop_sequences": stop_sequences[model_name],
             "return_full_text": False
         }
     }
     try:
         response = requests.post(endpoint, json=payload, headers=headers)
         response.raise_for_status()
+        generated = response.json()[0]["generated_text"]
+        # Clean up possible leftover tokens
+        generated = generated.split("<|")[0].strip()
+        return generated
     except Exception as e:
         return f"{model_name} error: {str(e)}"
+def on_new_session():
+    """Create a new session and clear the chat."""
+    new_id = session_manager.create_session()
+    return new_id, []
+def user_message(user_msg, history, session_id):
+    """
+    After the user hits enter, append the user's message to the conversation.
+    Return updated conversation so the UI can display it.
+    """
+    if not user_msg.strip():
+        return "", history  # if user didn't type anything
+    # Append the new user message to the conversation
+    history.append({"role": "user", "content": user_msg})
+    return "", history
+def bot_reply(history, session_id):
+    """
+    Stream the multi-model response. We rely on the *last* user message in `history`,
+    then call each model in turn, appending partial updates. Yields updated conversation each time.
+    """
+    if not history or history[-1]["role"] != "user":
+        return  # There's no new user message to respond to
+    # Optionally load existing session, if you have session logic
+    session = session_manager.load_session(session_id) if session_id else None
+    if session is None:
         session = {"history": []}
+    # 1) Qwen2.5-Coder-32B
+    # Add an assistant message placeholder
+    history.append({"role": "assistant", "content": "🔵 Qwen2.5-Coder-32B-Instruct is thinking..."})
+    yield history
+    resp1 = query_model("Qwen2.5-Coder-32B-Instruct", history)
+    updated_content = f"🔵 **Qwen2.5-Coder-32B-Instruct**\n{resp1}"
+    history[-1]["content"] = updated_content
+    yield history
+    # 2) Qwen2.5-72B
+    updated_content += "\n\n🟣 Qwen2.5-72B-Instruct is thinking..."
+    history[-1]["content"] = updated_content
+    yield history
+    resp2 = query_model("Qwen2.5-72B-Instruct", history)
+    updated_content += f"\n\n🟣 **Qwen2.5-72B-Instruct**\n{resp2}"
+    history[-1]["content"] = updated_content
+    yield history
+    # 3) Llama3.3-70B
+    updated_content += "\n\n🟡 Llama3.3-70B-Instruct is thinking..."
+    history[-1]["content"] = updated_content
+    yield history
+    resp3 = query_model("Llama3.3-70B-Instruct", history)
+    updated_content += f"\n\n🟡 **Llama3.3-70B-Instruct**\n{resp3}"
+    history[-1]["content"] = updated_content
+    yield history
+    # Save session, if needed
+    session["history"] = history
     session_manager.save_session(session_id, session)
+def clear_chat():
+    """
+    Clears the Chatbot entirely (set it to an empty list).
+    """
+    return []
+# Build the Gradio Blocks interface
 with gr.Blocks() as demo:
+    gr.Markdown("## Multi-LLM Collaboration Chat (Streaming)")
     with gr.Row():
         session_id = gr.State(session_manager.create_session)
+        new_session_btn = gr.Button("🔄 New Session")
+    # Chatbot with "type='messages'" for streaming messages and LaTeX delimiters
     chatbot = gr.Chatbot(
+        type="messages",
+        height=550,
         latex_delimiters=[
             {"left": "$", "right": "$", "display": False},  # inline math
             {"left": "$$", "right": "$$", "display": True}   # display math
         ]
     )
+    msg = gr.Textbox(label="Your Message")
+    clear_btn = gr.Button("Clear")
+    # Wire up the events:
+    # 1) On user submit:
+    msg.submit(
+        fn=user_message,
+        inputs=[msg, chatbot, session_id],
+        outputs=[msg, chatbot],
+        queue=False
+    ).then(
+        fn=bot_reply,
+        inputs=[chatbot, session_id],
+        outputs=[chatbot]
     )
+    # 2) On "Clear" click, empty the chat:
+    clear_btn.click(fn=clear_chat, outputs=chatbot, queue=False)
+    # 3) On "New Session" click, get a fresh session ID and clear chat:
+    new_session_btn.click(fn=on_new_session, outputs=[session_id, chatbot], queue=False)
 if __name__ == "__main__":
+    demo.launch()