luminoussg commited on
Commit
9b382da
Β·
verified Β·
1 Parent(s): eebaa87

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +106 -141
app.py CHANGED
@@ -1,12 +1,12 @@
1
  import gradio as gr
2
  import os
3
- import requests
4
- import time
5
  from datetime import datetime
6
- from typing import List, Dict
7
- from session_manager import SessionManager # only if you need sessions
 
8
 
9
- # Initialize session manager and get HF API key (adjust if not using sessions)
10
  session_manager = SessionManager()
11
  HF_API_KEY = os.getenv("HF_API_KEY")
12
 
@@ -17,28 +17,22 @@ MODEL_ENDPOINTS = {
17
  "Qwen2.5-Coder-32B-Instruct": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B-Instruct",
18
  }
19
 
20
- def query_model(model_name: str, messages: List[Dict[str, str]]) -> str:
21
- """
22
- Query a single model with the conversation so far (list of dicts with 'role' and 'content').
23
- """
24
  endpoint = MODEL_ENDPOINTS[model_name]
25
- headers = {
26
- "Authorization": f"Bearer {HF_API_KEY}",
27
- "Content-Type": "application/json"
28
- }
29
-
30
- # Combine conversation into a single string (simple example)
31
- conversation = "\n".join(f"{m['role']}: {m['content']}" for m in messages)
32
-
33
- # Model-specific prompt formatting
34
  model_prompts = {
35
  "Qwen2.5-72B-Instruct": (
36
- f"<|im_start|>system\nCollaborate with other experts:\n{conversation}<|im_end|>\n"
37
  "<|im_start|>assistant\nMy analysis:"
38
  ),
39
  "Llama3.3-70B-Instruct": (
40
  "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n"
41
- f"Build on the conversation:\n{conversation}<|eot_id|>\n"
42
  "<|start_header_id|>assistant<|end_header_id|>\nMy contribution:"
43
  ),
44
  "Qwen2.5-Coder-32B-Instruct": (
@@ -47,141 +41,112 @@ def query_model(model_name: str, messages: List[Dict[str, str]]) -> str:
47
  )
48
  }
49
 
50
- stop_sequences = {
51
- "Qwen2.5-72B-Instruct": ["<|im_end|>", "<|endoftext|>"],
52
- "Llama3.3-70B-Instruct": ["<|eot_id|>", "\nuser:"],
53
- "Qwen2.5-Coder-32B-Instruct": ["<|im_end|>", "<|endoftext|>"]
54
- }
55
-
56
- payload = {
57
- "inputs": model_prompts[model_name],
58
- "parameters": {
59
- "max_tokens": 1024,
60
- "temperature": 0.7,
61
- "stop_sequences": stop_sequences[model_name],
62
- "return_full_text": False
63
- }
64
- }
65
 
66
  try:
67
- response = requests.post(endpoint, json=payload, headers=headers)
68
- response.raise_for_status()
69
- generated = response.json()[0]["generated_text"]
70
- # Clean up possible leftover tokens
71
- generated = generated.split("<|")[0].strip()
72
- return generated
 
 
 
 
 
73
  except Exception as e:
74
- return f"{model_name} error: {str(e)}"
75
-
76
-
77
- def on_new_session():
78
- """Create a new session and clear the chat."""
79
- new_id = session_manager.create_session()
80
- return new_id, []
81
-
82
- def user_message(user_msg, history, session_id):
83
- """
84
- After the user hits enter, append the user's message to the conversation.
85
- Return updated conversation so the UI can display it.
86
- """
87
- if not user_msg.strip():
88
- return "", history # if user didn't type anything
89
- # Append the new user message to the conversation
90
- history.append({"role": "user", "content": user_msg})
91
- return "", history
92
-
93
- def bot_reply(history, session_id):
94
- """
95
- Stream the multi-model response. We rely on the *last* user message in `history`,
96
- then call each model in turn, appending partial updates. Yields updated conversation each time.
97
- """
98
- if not history or history[-1]["role"] != "user":
99
- return # There's no new user message to respond to
100
-
101
- # Optionally load existing session, if you have session logic
102
- session = session_manager.load_session(session_id) if session_id else None
103
- if session is None:
104
  session = {"history": []}
105
 
106
- # 1) Qwen2.5-Coder-32B
107
- # Add an assistant message placeholder
108
- history.append({"role": "assistant", "content": "πŸ”΅ Qwen2.5-Coder-32B-Instruct is thinking..."})
109
- yield history
110
-
111
- resp1 = query_model("Qwen2.5-Coder-32B-Instruct", history)
112
- updated_content = f"πŸ”΅ **Qwen2.5-Coder-32B-Instruct**\n{resp1}"
113
- history[-1]["content"] = updated_content
114
- yield history
115
-
116
- # 2) Qwen2.5-72B
117
- updated_content += "\n\n🟣 Qwen2.5-72B-Instruct is thinking..."
118
- history[-1]["content"] = updated_content
119
- yield history
120
-
121
- resp2 = query_model("Qwen2.5-72B-Instruct", history)
122
- updated_content += f"\n\n🟣 **Qwen2.5-72B-Instruct**\n{resp2}"
123
- history[-1]["content"] = updated_content
124
- yield history
125
-
126
- # 3) Llama3.3-70B
127
- updated_content += "\n\n🟑 Llama3.3-70B-Instruct is thinking..."
128
- history[-1]["content"] = updated_content
129
- yield history
130
-
131
- resp3 = query_model("Llama3.3-70B-Instruct", history)
132
- updated_content += f"\n\n🟑 **Llama3.3-70B-Instruct**\n{resp3}"
133
- history[-1]["content"] = updated_content
134
- yield history
135
-
136
- # Save session, if needed
137
- session["history"] = history
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  session_manager.save_session(session_id, session)
139
 
140
- def clear_chat():
141
- """
142
- Clears the Chatbot entirely (set it to an empty list).
143
- """
144
- return []
145
 
146
- # Build the Gradio Blocks interface
147
  with gr.Blocks() as demo:
148
- gr.Markdown("## Multi-LLM Collaboration Chat (Streaming)")
149
 
150
  with gr.Row():
151
  session_id = gr.State(session_manager.create_session)
152
- new_session_btn = gr.Button("πŸ”„ New Session")
153
-
154
- # Chatbot with "type='messages'" for streaming messages and LaTeX delimiters
155
- chatbot = gr.Chatbot(
156
- type="messages",
157
- height=550,
158
- latex_delimiters=[
159
- {"left": "$", "right": "$", "display": False}, # inline math
160
- {"left": "$$", "right": "$$", "display": True} # display math
161
- ]
162
- )
163
 
164
- msg = gr.Textbox(label="Your Message")
165
- clear_btn = gr.Button("Clear")
166
-
167
- # Wire up the events:
168
- # 1) On user submit:
169
- msg.submit(
170
- fn=user_message,
171
- inputs=[msg, chatbot, session_id],
172
- outputs=[msg, chatbot],
173
- queue=False
174
- ).then(
175
- fn=bot_reply,
176
- inputs=[chatbot, session_id],
177
- outputs=[chatbot]
178
- )
179
 
180
- # 2) On "Clear" click, empty the chat:
181
- clear_btn.click(fn=clear_chat, outputs=chatbot, queue=False)
 
182
 
183
- # 3) On "New Session" click, get a fresh session ID and clear chat:
184
- new_session_btn.click(fn=on_new_session, outputs=[session_id, chatbot], queue=False)
 
 
 
 
 
 
 
 
 
 
 
 
185
 
186
  if __name__ == "__main__":
187
- demo.launch()
 
1
  import gradio as gr
2
  import os
3
+ import threading
 
4
  from datetime import datetime
5
+ from typing import List, Dict, Any, Generator
6
+ from session_manager import SessionManager
7
+ from huggingface_hub import InferenceClient
8
 
9
+ # Initialize session manager and get HF API key
10
  session_manager = SessionManager()
11
  HF_API_KEY = os.getenv("HF_API_KEY")
12
 
 
17
  "Qwen2.5-Coder-32B-Instruct": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B-Instruct",
18
  }
19
 
20
+ def query_model(model_name: str, messages: List[Dict[str, str]]) -> Generator[str, None, None]:
21
+ """Query a single model with the chat history and stream the response"""
 
 
22
  endpoint = MODEL_ENDPOINTS[model_name]
23
+
24
+ # Build full conversation history for context
25
+ conversation = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])
26
+
27
+ # Model-specific prompt formatting with full history
 
 
 
 
28
  model_prompts = {
29
  "Qwen2.5-72B-Instruct": (
30
+ f"<|im_start|>system\nCollaborate with other experts. Previous discussion:\n{conversation}<|im_end|>\n"
31
  "<|im_start|>assistant\nMy analysis:"
32
  ),
33
  "Llama3.3-70B-Instruct": (
34
  "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n"
35
+ f"Build upon this discussion:\n{conversation}<|eot_id|>\n"
36
  "<|start_header_id|>assistant<|end_header_id|>\nMy contribution:"
37
  ),
38
  "Qwen2.5-Coder-32B-Instruct": (
 
41
  )
42
  }
43
 
44
+ client = InferenceClient(base_url=endpoint, token=HF_API_KEY)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  try:
47
+ stream = client.chat.completions.create(
48
+ messages=[{"role": "system", "content": model_prompts[model_name]}],
49
+ stream=True,
50
+ max_tokens=2048,
51
+ temperature=0.7,
52
+ )
53
+
54
+ for chunk in stream:
55
+ content = chunk.choices[0].delta.content or ""
56
+ yield content
57
+
58
  except Exception as e:
59
+ yield f"{model_name} error: {str(e)}"
60
+
61
+ def respond(message: str, history: List[List[str]], session_id: str) -> Generator[str, None, None]:
62
+ """Handle sequential model responses with context preservation and streaming"""
63
+ # Load or initialize session
64
+ session = session_manager.load_session(session_id)
65
+ if not isinstance(session, dict) or "history" not in session:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  session = {"history": []}
67
 
68
+ # Build context from session history
69
+ messages = []
70
+ for entry in session["history"]:
71
+ if entry["type"] == "user":
72
+ messages.append({"role": "user", "content": entry["content"]})
73
+ else:
74
+ messages.append({"role": "assistant", "content": f"{entry['model']}: {entry['content']}"})
75
+
76
+ # Add current message
77
+ messages.append({"role": "user", "content": message})
78
+ session["history"].append({
79
+ "timestamp": datetime.now().isoformat(),
80
+ "type": "user",
81
+ "content": message
82
+ })
83
+
84
+ # Model responses
85
+ model_names = ["Qwen2.5-Coder-32B-Instruct", "Qwen2.5-72B-Instruct", "Llama3.3-70B-Instruct"]
86
+ model_colors = ["πŸ”΅", "🟣", "🟑"]
87
+ responses = {}
88
+
89
+ # Initialize responses
90
+ for model_name in model_names:
91
+ responses[model_name] = ""
92
+
93
+ # Stream responses from each model
94
+ for i, model_name in enumerate(model_names):
95
+ yield f"{model_colors[i]} {model_name} is thinking..."
96
+
97
+ full_response = ""
98
+ for chunk in query_model(model_name, messages):
99
+ full_response += chunk
100
+ yield f"{model_colors[i]} **{model_name}**\n{full_response}"
101
+
102
+ # Update session history and messages
103
+ session["history"].append({
104
+ "timestamp": datetime.now().isoformat(),
105
+ "type": "assistant",
106
+ "model": model_name,
107
+ "content": full_response
108
+ })
109
+ messages.append({"role": "assistant", "content": f"{model_name}: {full_response}"})
110
+ responses[model_name] = full_response
111
+
112
+ # Save final session state
113
  session_manager.save_session(session_id, session)
114
 
115
+ # Return final combined response (optional)
116
+ combined_response = ""
117
+ for i, model_name in enumerate(model_names):
118
+ combined_response += f"{model_colors[i]} **{model_name}**\n{responses[model_name]}\n\n"
119
+ yield combined_response
120
 
121
+ # Create the Gradio interface
122
  with gr.Blocks() as demo:
123
+ gr.Markdown("## Multi-LLM Collaboration Chat")
124
 
125
  with gr.Row():
126
  session_id = gr.State(session_manager.create_session)
127
+ new_session = gr.Button("πŸ”„ New Session")
 
 
 
 
 
 
 
 
 
 
128
 
129
+ chatbot = gr.Chatbot(height=600)
130
+ msg = gr.Textbox(label="Message")
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
+ def on_new_session():
133
+ new_id = session_manager.create_session()
134
+ return new_id, []
135
 
136
+ def user(message, history, session_id):
137
+ return "", history + [[message, None]]
138
+
139
+ def bot(history, session_id):
140
+ if history and history[-1][1] is None:
141
+ message = history[-1][0]
142
+ for response in respond(message, history[:-1], session_id):
143
+ history[-1][1] = response
144
+ yield history
145
+
146
+ msg.submit(user, [msg, chatbot, session_id], [msg, chatbot]).then(
147
+ bot, [chatbot, session_id], [chatbot]
148
+ )
149
+ new_session.click(on_new_session, None, [session_id, chatbot])
150
 
151
  if __name__ == "__main__":
152
+ demo.launch(share=True)