luminoussg commited on
Commit
eebaa87
Β·
verified Β·
1 Parent(s): 8190eb3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -110
app.py CHANGED
@@ -1,12 +1,12 @@
1
  import gradio as gr
2
  import os
3
  import requests
4
- import threading
5
  from datetime import datetime
6
- from typing import List, Dict, Any, Generator
7
- from session_manager import SessionManager
8
 
9
- # Initialize session manager and get HF API key
10
  session_manager = SessionManager()
11
  HF_API_KEY = os.getenv("HF_API_KEY")
12
 
@@ -18,25 +18,27 @@ MODEL_ENDPOINTS = {
18
  }
19
 
20
  def query_model(model_name: str, messages: List[Dict[str, str]]) -> str:
21
- """Query a single model with the chat history"""
 
 
22
  endpoint = MODEL_ENDPOINTS[model_name]
23
  headers = {
24
  "Authorization": f"Bearer {HF_API_KEY}",
25
  "Content-Type": "application/json"
26
  }
27
-
28
- # Build full conversation history for context
29
- conversation = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])
30
-
31
- # Model-specific prompt formatting with full history
32
  model_prompts = {
33
  "Qwen2.5-72B-Instruct": (
34
- f"<|im_start|>system\nCollaborate with other experts. Previous discussion:\n{conversation}<|im_end|>\n"
35
  "<|im_start|>assistant\nMy analysis:"
36
  ),
37
  "Llama3.3-70B-Instruct": (
38
  "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n"
39
- f"Build upon this discussion:\n{conversation}<|eot_id|>\n"
40
  "<|start_header_id|>assistant<|end_header_id|>\nMy contribution:"
41
  ),
42
  "Qwen2.5-Coder-32B-Instruct": (
@@ -45,7 +47,6 @@ def query_model(model_name: str, messages: List[Dict[str, str]]) -> str:
45
  )
46
  }
47
 
48
- # Model-specific stop sequences
49
  stop_sequences = {
50
  "Qwen2.5-72B-Instruct": ["<|im_end|>", "<|endoftext|>"],
51
  "Llama3.3-70B-Instruct": ["<|eot_id|>", "\nuser:"],
@@ -55,126 +56,132 @@ def query_model(model_name: str, messages: List[Dict[str, str]]) -> str:
55
  payload = {
56
  "inputs": model_prompts[model_name],
57
  "parameters": {
58
- "max_tokens": 2048,
59
  "temperature": 0.7,
60
  "stop_sequences": stop_sequences[model_name],
61
  "return_full_text": False
62
  }
63
  }
64
-
65
  try:
66
  response = requests.post(endpoint, json=payload, headers=headers)
67
  response.raise_for_status()
68
- result = response.json()[0]['generated_text']
69
- # Clean up response formatting
70
- result = result.split('<|')[0] # Remove any remaining special tokens
71
- result = result.replace('**', '').replace('##', '') # Remove markdown emphasis
72
- result = result.strip() # Remove leading/trailing whitespace
73
- return result
74
  except Exception as e:
75
  return f"{model_name} error: {str(e)}"
76
 
77
- def respond(message: str, history: List[List[str]], session_id: str) -> Generator[str, None, None]:
78
- """Handle sequential model responses with context preservation"""
79
- # Load or initialize session
80
- session = session_manager.load_session(session_id)
81
- if not isinstance(session, dict) or "history" not in session:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  session = {"history": []}
83
-
84
- # Build context from session history
85
- messages = []
86
- for entry in session["history"]:
87
- if entry["type"] == "user":
88
- messages.append({"role": "user", "content": entry["content"]})
89
- else:
90
- messages.append({"role": "assistant", "content": f"{entry['model']}: {entry['content']}"})
91
-
92
- # Add current message
93
- messages.append({"role": "user", "content": message})
94
- session["history"].append({
95
- "timestamp": datetime.now().isoformat(),
96
- "type": "user",
97
- "content": message
98
- })
99
-
100
- # First model
101
- yield "πŸ”΅ Qwen2.5-Coder-32B-Instruct is thinking..."
102
- response1 = query_model("Qwen2.5-Coder-32B-Instruct", messages)
103
- session["history"].append({
104
- "timestamp": datetime.now().isoformat(),
105
- "type": "assistant",
106
- "model": "Qwen2.5-Coder-32B-Instruct",
107
- "content": response1
108
- })
109
- messages.append({"role": "assistant", "content": f"Qwen2.5-Coder-32B-Instruct: {response1}"})
110
- yield f"πŸ”΅ **Qwen2.5-Coder-32B-Instruct**\n{response1}"
111
-
112
- # Second model
113
- yield f"πŸ”΅ **Qwen2.5-Coder-32B-Instruct**\n{response1}\n\n🟣 Qwen2.5-72B-Instruct is thinking..."
114
- response2 = query_model("Qwen2.5-72B-Instruct", messages)
115
- session["history"].append({
116
- "timestamp": datetime.now().isoformat(),
117
- "type": "assistant",
118
- "model": "Qwen2.5-72B-Instruct",
119
- "content": response2
120
- })
121
- messages.append({"role": "assistant", "content": f"Qwen2.5-72B-Instruct: {response2}"})
122
- yield f"πŸ”΅ **Qwen2.5-Coder-32B-Instruct**\n{response1}\n\n🟣 **Qwen2.5-72B-Instruct**\n{response2}"
123
-
124
- # Final model
125
- yield f"πŸ”΅ **Qwen2.5-Coder-32B-Instruct**\n{response1}\n\n🟣 **Qwen2.5-72B-Instruct**\n{response2}\n\n🟑 Llama3.3-70B-Instruct is thinking..."
126
- response3 = query_model("Llama3.3-70B-Instruct", messages)
127
- session["history"].append({
128
- "timestamp": datetime.now().isoformat(),
129
- "type": "assistant",
130
- "model": "Llama3.3-70B-Instruct",
131
- "content": response3
132
- })
133
- messages.append({"role": "assistant", "content": f"Llama3.3-70B-Instruct: {response3}"})
134
-
135
- # Save final session state
136
  session_manager.save_session(session_id, session)
137
-
138
- # Return final combined response
139
- yield f"πŸ”΅ **Qwen2.5-Coder-32B-Instruct**\n{response1}\n\n🟣 **Qwen2.5-72B-Instruct**\n{response2}\n\n🟑 **Llama3.3-70B-Instruct**\n{response3}"
140
 
141
- # Create the Gradio interface
 
 
 
 
 
 
142
  with gr.Blocks() as demo:
143
- gr.Markdown("## Multi-LLM Collaboration Chat")
144
-
145
  with gr.Row():
146
  session_id = gr.State(session_manager.create_session)
147
- new_session = gr.Button("πŸ”„ New Session")
148
-
149
- # Add latex_delimiters to enable LaTeX rendering
150
  chatbot = gr.Chatbot(
151
- height=600,
 
152
  latex_delimiters=[
153
  {"left": "$", "right": "$", "display": False}, # inline math
154
  {"left": "$$", "right": "$$", "display": True} # display math
155
  ]
156
  )
157
-
158
- msg = gr.Textbox(label="Message")
159
-
160
- def on_new_session():
161
- new_id = session_manager.create_session()
162
- return new_id, []
163
-
164
- def user(message, history, session_id):
165
- return "", history + [[message, None]]
166
-
167
- def bot(history, session_id):
168
- if history and history[-1][1] is None:
169
- message = history[-1][0]
170
- for response in respond(message, history[:-1], session_id):
171
- history[-1][1] = response
172
- yield history
173
-
174
- msg.submit(user, [msg, chatbot, session_id], [msg, chatbot]).then(
175
- bot, [chatbot, session_id], [chatbot]
176
  )
177
- new_session.click(on_new_session, None, [session_id, chatbot])
 
 
 
 
 
178
 
179
  if __name__ == "__main__":
180
- demo.launch(share=True)
 
1
  import gradio as gr
2
  import os
3
  import requests
4
+ import time
5
  from datetime import datetime
6
+ from typing import List, Dict
7
+ from session_manager import SessionManager # only if you need sessions
8
 
9
+ # Initialize session manager and get HF API key (adjust if not using sessions)
10
  session_manager = SessionManager()
11
  HF_API_KEY = os.getenv("HF_API_KEY")
12
 
 
18
  }
19
 
20
  def query_model(model_name: str, messages: List[Dict[str, str]]) -> str:
21
+ """
22
+ Query a single model with the conversation so far (list of dicts with 'role' and 'content').
23
+ """
24
  endpoint = MODEL_ENDPOINTS[model_name]
25
  headers = {
26
  "Authorization": f"Bearer {HF_API_KEY}",
27
  "Content-Type": "application/json"
28
  }
29
+
30
+ # Combine conversation into a single string (simple example)
31
+ conversation = "\n".join(f"{m['role']}: {m['content']}" for m in messages)
32
+
33
+ # Model-specific prompt formatting
34
  model_prompts = {
35
  "Qwen2.5-72B-Instruct": (
36
+ f"<|im_start|>system\nCollaborate with other experts:\n{conversation}<|im_end|>\n"
37
  "<|im_start|>assistant\nMy analysis:"
38
  ),
39
  "Llama3.3-70B-Instruct": (
40
  "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n"
41
+ f"Build on the conversation:\n{conversation}<|eot_id|>\n"
42
  "<|start_header_id|>assistant<|end_header_id|>\nMy contribution:"
43
  ),
44
  "Qwen2.5-Coder-32B-Instruct": (
 
47
  )
48
  }
49
 
 
50
  stop_sequences = {
51
  "Qwen2.5-72B-Instruct": ["<|im_end|>", "<|endoftext|>"],
52
  "Llama3.3-70B-Instruct": ["<|eot_id|>", "\nuser:"],
 
56
  payload = {
57
  "inputs": model_prompts[model_name],
58
  "parameters": {
59
+ "max_tokens": 1024,
60
  "temperature": 0.7,
61
  "stop_sequences": stop_sequences[model_name],
62
  "return_full_text": False
63
  }
64
  }
65
+
66
  try:
67
  response = requests.post(endpoint, json=payload, headers=headers)
68
  response.raise_for_status()
69
+ generated = response.json()[0]["generated_text"]
70
+ # Clean up possible leftover tokens
71
+ generated = generated.split("<|")[0].strip()
72
+ return generated
 
 
73
  except Exception as e:
74
  return f"{model_name} error: {str(e)}"
75
 
76
+
77
+ def on_new_session():
78
+ """Create a new session and clear the chat."""
79
+ new_id = session_manager.create_session()
80
+ return new_id, []
81
+
82
+ def user_message(user_msg, history, session_id):
83
+ """
84
+ After the user hits enter, append the user's message to the conversation.
85
+ Return updated conversation so the UI can display it.
86
+ """
87
+ if not user_msg.strip():
88
+ return "", history # if user didn't type anything
89
+ # Append the new user message to the conversation
90
+ history.append({"role": "user", "content": user_msg})
91
+ return "", history
92
+
93
+ def bot_reply(history, session_id):
94
+ """
95
+ Stream the multi-model response. We rely on the *last* user message in `history`,
96
+ then call each model in turn, appending partial updates. Yields updated conversation each time.
97
+ """
98
+ if not history or history[-1]["role"] != "user":
99
+ return # There's no new user message to respond to
100
+
101
+ # Optionally load existing session, if you have session logic
102
+ session = session_manager.load_session(session_id) if session_id else None
103
+ if session is None:
104
  session = {"history": []}
105
+
106
+ # 1) Qwen2.5-Coder-32B
107
+ # Add an assistant message placeholder
108
+ history.append({"role": "assistant", "content": "πŸ”΅ Qwen2.5-Coder-32B-Instruct is thinking..."})
109
+ yield history
110
+
111
+ resp1 = query_model("Qwen2.5-Coder-32B-Instruct", history)
112
+ updated_content = f"πŸ”΅ **Qwen2.5-Coder-32B-Instruct**\n{resp1}"
113
+ history[-1]["content"] = updated_content
114
+ yield history
115
+
116
+ # 2) Qwen2.5-72B
117
+ updated_content += "\n\n🟣 Qwen2.5-72B-Instruct is thinking..."
118
+ history[-1]["content"] = updated_content
119
+ yield history
120
+
121
+ resp2 = query_model("Qwen2.5-72B-Instruct", history)
122
+ updated_content += f"\n\n🟣 **Qwen2.5-72B-Instruct**\n{resp2}"
123
+ history[-1]["content"] = updated_content
124
+ yield history
125
+
126
+ # 3) Llama3.3-70B
127
+ updated_content += "\n\n🟑 Llama3.3-70B-Instruct is thinking..."
128
+ history[-1]["content"] = updated_content
129
+ yield history
130
+
131
+ resp3 = query_model("Llama3.3-70B-Instruct", history)
132
+ updated_content += f"\n\n🟑 **Llama3.3-70B-Instruct**\n{resp3}"
133
+ history[-1]["content"] = updated_content
134
+ yield history
135
+
136
+ # Save session, if needed
137
+ session["history"] = history
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  session_manager.save_session(session_id, session)
 
 
 
139
 
140
+ def clear_chat():
141
+ """
142
+ Clears the Chatbot entirely (set it to an empty list).
143
+ """
144
+ return []
145
+
146
+ # Build the Gradio Blocks interface
147
  with gr.Blocks() as demo:
148
+ gr.Markdown("## Multi-LLM Collaboration Chat (Streaming)")
149
+
150
  with gr.Row():
151
  session_id = gr.State(session_manager.create_session)
152
+ new_session_btn = gr.Button("πŸ”„ New Session")
153
+
154
+ # Chatbot with "type='messages'" for streaming messages and LaTeX delimiters
155
  chatbot = gr.Chatbot(
156
+ type="messages",
157
+ height=550,
158
  latex_delimiters=[
159
  {"left": "$", "right": "$", "display": False}, # inline math
160
  {"left": "$$", "right": "$$", "display": True} # display math
161
  ]
162
  )
163
+
164
+ msg = gr.Textbox(label="Your Message")
165
+ clear_btn = gr.Button("Clear")
166
+
167
+ # Wire up the events:
168
+ # 1) On user submit:
169
+ msg.submit(
170
+ fn=user_message,
171
+ inputs=[msg, chatbot, session_id],
172
+ outputs=[msg, chatbot],
173
+ queue=False
174
+ ).then(
175
+ fn=bot_reply,
176
+ inputs=[chatbot, session_id],
177
+ outputs=[chatbot]
 
 
 
 
178
  )
179
+
180
+ # 2) On "Clear" click, empty the chat:
181
+ clear_btn.click(fn=clear_chat, outputs=chatbot, queue=False)
182
+
183
+ # 3) On "New Session" click, get a fresh session ID and clear chat:
184
+ new_session_btn.click(fn=on_new_session, outputs=[session_id, chatbot], queue=False)
185
 
186
  if __name__ == "__main__":
187
+ demo.launch()