Spaces:

luminoussg
/

choupijiang

Sleeping

App Files Files Community

luminoussg commited on about 1 month ago

Commit

9b382da

verified ·

1 Parent(s): eebaa87

Update app.py

Browse files

Files changed (1) hide show

app.py +106 -141

app.py CHANGED Viewed

@@ -1,12 +1,12 @@
 import gradio as gr
 import os
-import requests
-import time
 from datetime import datetime
-from typing import List, Dict
-from session_manager import SessionManager  # only if you need sessions
-# Initialize session manager and get HF API key (adjust if not using sessions)
 session_manager = SessionManager()
 HF_API_KEY = os.getenv("HF_API_KEY")
@@ -17,28 +17,22 @@ MODEL_ENDPOINTS = {
     "Qwen2.5-Coder-32B-Instruct": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B-Instruct",
 }
-def query_model(model_name: str, messages: List[Dict[str, str]]) -> str:
-    """
-    Query a single model with the conversation so far (list of dicts with 'role' and 'content').
-    """
     endpoint = MODEL_ENDPOINTS[model_name]
-    headers = {
-        "Authorization": f"Bearer {HF_API_KEY}",
-        "Content-Type": "application/json"
-    }
-    # Combine conversation into a single string (simple example)
-    conversation = "\n".join(f"{m['role']}: {m['content']}" for m in messages)
-    # Model-specific prompt formatting
     model_prompts = {
         "Qwen2.5-72B-Instruct": (
-            f"<|im_start|>system\nCollaborate with other experts:\n{conversation}<|im_end|>\n"
             "<|im_start|>assistant\nMy analysis:"
         ),
         "Llama3.3-70B-Instruct": (
             "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n"
-            f"Build on the conversation:\n{conversation}<|eot_id|>\n"
             "<|start_header_id|>assistant<|end_header_id|>\nMy contribution:"
         ),
         "Qwen2.5-Coder-32B-Instruct": (
@@ -47,141 +41,112 @@ def query_model(model_name: str, messages: List[Dict[str, str]]) -> str:
         )
     }
-    stop_sequences = {
-        "Qwen2.5-72B-Instruct": ["<|im_end|>", "<|endoftext|>"],
-        "Llama3.3-70B-Instruct": ["<|eot_id|>", "\nuser:"],
-        "Qwen2.5-Coder-32B-Instruct": ["<|im_end|>", "<|endoftext|>"]
-    }
-    payload = {
-        "inputs": model_prompts[model_name],
-        "parameters": {
-            "max_tokens": 1024,
-            "temperature": 0.7,
-            "stop_sequences": stop_sequences[model_name],
-            "return_full_text": False
-        }
-    }
     try:
-        response = requests.post(endpoint, json=payload, headers=headers)
-        response.raise_for_status()
-        generated = response.json()[0]["generated_text"]
-        # Clean up possible leftover tokens
-        generated = generated.split("<|")[0].strip()
-        return generated
     except Exception as e:
-        return f"{model_name} error: {str(e)}"
-def on_new_session():
-    """Create a new session and clear the chat."""
-    new_id = session_manager.create_session()
-    return new_id, []
-def user_message(user_msg, history, session_id):
-    """
-    After the user hits enter, append the user's message to the conversation.
-    Return updated conversation so the UI can display it.
-    """
-    if not user_msg.strip():
-        return "", history  # if user didn't type anything
-    # Append the new user message to the conversation
-    history.append({"role": "user", "content": user_msg})
-    return "", history
-def bot_reply(history, session_id):
-    """
-    Stream the multi-model response. We rely on the *last* user message in `history`,
-    then call each model in turn, appending partial updates. Yields updated conversation each time.
-    """
-    if not history or history[-1]["role"] != "user":
-        return  # There's no new user message to respond to
-    # Optionally load existing session, if you have session logic
-    session = session_manager.load_session(session_id) if session_id else None
-    if session is None:
         session = {"history": []}
-    # 1) Qwen2.5-Coder-32B
-    # Add an assistant message placeholder
-    history.append({"role": "assistant", "content": "🔵 Qwen2.5-Coder-32B-Instruct is thinking..."})
-    yield history
-    resp1 = query_model("Qwen2.5-Coder-32B-Instruct", history)
-    updated_content = f"🔵 **Qwen2.5-Coder-32B-Instruct**\n{resp1}"
-    history[-1]["content"] = updated_content
-    yield history
-    # 2) Qwen2.5-72B
-    updated_content += "\n\n🟣 Qwen2.5-72B-Instruct is thinking..."
-    history[-1]["content"] = updated_content
-    yield history
-    resp2 = query_model("Qwen2.5-72B-Instruct", history)
-    updated_content += f"\n\n🟣 **Qwen2.5-72B-Instruct**\n{resp2}"
-    history[-1]["content"] = updated_content
-    yield history
-    # 3) Llama3.3-70B
-    updated_content += "\n\n🟡 Llama3.3-70B-Instruct is thinking..."
-    history[-1]["content"] = updated_content
-    yield history
-    resp3 = query_model("Llama3.3-70B-Instruct", history)
-    updated_content += f"\n\n🟡 **Llama3.3-70B-Instruct**\n{resp3}"
-    history[-1]["content"] = updated_content
-    yield history
-    # Save session, if needed
-    session["history"] = history
     session_manager.save_session(session_id, session)
-def clear_chat():
-    """
-    Clears the Chatbot entirely (set it to an empty list).
-    """
-    return []
-# Build the Gradio Blocks interface
 with gr.Blocks() as demo:
-    gr.Markdown("## Multi-LLM Collaboration Chat (Streaming)")
     with gr.Row():
         session_id = gr.State(session_manager.create_session)
-        new_session_btn = gr.Button("🔄 New Session")
-    # Chatbot with "type='messages'" for streaming messages and LaTeX delimiters
-    chatbot = gr.Chatbot(
-        type="messages",
-        height=550,
-        latex_delimiters=[
-            {"left": "$", "right": "$", "display": False},  # inline math
-            {"left": "$$", "right": "$$", "display": True}   # display math
-        ]
-    )
-    msg = gr.Textbox(label="Your Message")
-    clear_btn = gr.Button("Clear")
-    # Wire up the events:
-    # 1) On user submit:
-    msg.submit(
-        fn=user_message,
-        inputs=[msg, chatbot, session_id],
-        outputs=[msg, chatbot],
-        queue=False
-    ).then(
-        fn=bot_reply,
-        inputs=[chatbot, session_id],
-        outputs=[chatbot]
-    )
-    # 2) On "Clear" click, empty the chat:
-    clear_btn.click(fn=clear_chat, outputs=chatbot, queue=False)
-    # 3) On "New Session" click, get a fresh session ID and clear chat:
-    new_session_btn.click(fn=on_new_session, outputs=[session_id, chatbot], queue=False)
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import os
+import threading
 from datetime import datetime
+from typing import List, Dict, Any, Generator
+from session_manager import SessionManager
+from huggingface_hub import InferenceClient
+# Initialize session manager and get HF API key
 session_manager = SessionManager()
 HF_API_KEY = os.getenv("HF_API_KEY")
     "Qwen2.5-Coder-32B-Instruct": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B-Instruct",
 }
+def query_model(model_name: str, messages: List[Dict[str, str]]) -> Generator[str, None, None]:
+    """Query a single model with the chat history and stream the response"""
     endpoint = MODEL_ENDPOINTS[model_name]
+    # Build full conversation history for context
+    conversation = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])
+    # Model-specific prompt formatting with full history
     model_prompts = {
         "Qwen2.5-72B-Instruct": (
+            f"<|im_start|>system\nCollaborate with other experts. Previous discussion:\n{conversation}<|im_end|>\n"
             "<|im_start|>assistant\nMy analysis:"
         ),
         "Llama3.3-70B-Instruct": (
             "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n"
+            f"Build upon this discussion:\n{conversation}<|eot_id|>\n"
             "<|start_header_id|>assistant<|end_header_id|>\nMy contribution:"
         ),
         "Qwen2.5-Coder-32B-Instruct": (
         )
     }
+    client = InferenceClient(base_url=endpoint, token=HF_API_KEY)
     try:
+        stream = client.chat.completions.create(
+            messages=[{"role": "system", "content": model_prompts[model_name]}],
+            stream=True,
+            max_tokens=2048,
+            temperature=0.7,
+        )
+        for chunk in stream:
+            content = chunk.choices[0].delta.content or ""
+            yield content
     except Exception as e:
+        yield f"{model_name} error: {str(e)}"
+def respond(message: str, history: List[List[str]], session_id: str) -> Generator[str, None, None]:
+    """Handle sequential model responses with context preservation and streaming"""
+    # Load or initialize session
+    session = session_manager.load_session(session_id)
+    if not isinstance(session, dict) or "history" not in session:
         session = {"history": []}
+    # Build context from session history
+    messages = []
+    for entry in session["history"]:
+        if entry["type"] == "user":
+            messages.append({"role": "user", "content": entry["content"]})
+        else:
+            messages.append({"role": "assistant", "content": f"{entry['model']}: {entry['content']}"})
+    # Add current message
+    messages.append({"role": "user", "content": message})
+    session["history"].append({
+        "timestamp": datetime.now().isoformat(),
+        "type": "user",
+        "content": message
+    })
+    # Model responses
+    model_names = ["Qwen2.5-Coder-32B-Instruct", "Qwen2.5-72B-Instruct", "Llama3.3-70B-Instruct"]
+    model_colors = ["🔵", "🟣", "🟡"]
+    responses = {}
+    # Initialize responses
+    for model_name in model_names:
+        responses[model_name] = ""
+    # Stream responses from each model
+    for i, model_name in enumerate(model_names):
+        yield f"{model_colors[i]} {model_name} is thinking..."
+        full_response = ""
+        for chunk in query_model(model_name, messages):
+            full_response += chunk
+            yield f"{model_colors[i]} **{model_name}**\n{full_response}"
+        # Update session history and messages
+        session["history"].append({
+            "timestamp": datetime.now().isoformat(),
+            "type": "assistant",
+            "model": model_name,
+            "content": full_response
+        })
+        messages.append({"role": "assistant", "content": f"{model_name}: {full_response}"})
+        responses[model_name] = full_response
+    # Save final session state
     session_manager.save_session(session_id, session)
+    # Return final combined response (optional)
+    combined_response = ""
+    for i, model_name in enumerate(model_names):
+        combined_response += f"{model_colors[i]} **{model_name}**\n{responses[model_name]}\n\n"
+    yield combined_response
+# Create the Gradio interface
 with gr.Blocks() as demo:
+    gr.Markdown("## Multi-LLM Collaboration Chat")
     with gr.Row():
         session_id = gr.State(session_manager.create_session)
+        new_session = gr.Button("🔄 New Session")
+    chatbot = gr.Chatbot(height=600)
+    msg = gr.Textbox(label="Message")
+    def on_new_session():
+        new_id = session_manager.create_session()
+        return new_id, []
+    def user(message, history, session_id):
+        return "", history + [[message, None]]
+    def bot(history, session_id):
+        if history and history[-1][1] is None:
+            message = history[-1][0]
+            for response in respond(message, history[:-1], session_id):
+                history[-1][1] = response
+                yield history
+    msg.submit(user, [msg, chatbot, session_id], [msg, chatbot]).then(
+        bot, [chatbot, session_id], [chatbot]
+    )
+    new_session.click(on_new_session, None, [session_id, chatbot])
 if __name__ == "__main__":
+    demo.launch(share=True)