Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,9 +3,11 @@ import os
|
|
3 |
import requests
|
4 |
import threading
|
5 |
from datetime import datetime
|
6 |
-
from typing import List, Dict, Any
|
|
|
7 |
|
8 |
-
#
|
|
|
9 |
HF_API_KEY = os.getenv("HF_API_KEY")
|
10 |
|
11 |
# Model endpoints configuration
|
@@ -72,21 +74,29 @@ def query_model(model_name: str, messages: List[Dict[str, str]]) -> str:
|
|
72 |
except Exception as e:
|
73 |
return f"{model_name} error: {str(e)}"
|
74 |
|
75 |
-
def respond(message: str, history: List[List[str]], session_id: str) -> str:
|
76 |
"""Handle sequential model responses with session tracking"""
|
77 |
# Load session history
|
78 |
session = session_manager.load_session(session_id)
|
79 |
messages = [{"role": "user", "content": message}]
|
80 |
|
81 |
-
# Store user message
|
82 |
session["history"].append({
|
83 |
"timestamp": datetime.now().isoformat(),
|
84 |
"type": "user",
|
85 |
"content": message
|
86 |
})
|
|
|
87 |
|
88 |
# Get first model's response
|
89 |
response1 = query_model("Qwen2.5-Coder-32B-Instruct", messages)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
yield f"**Qwen2.5-Coder-32B-Instruct**:\n{response1}"
|
91 |
|
92 |
# Add first response to context
|
@@ -97,6 +107,13 @@ def respond(message: str, history: List[List[str]], session_id: str) -> str:
|
|
97 |
|
98 |
# Get second model's response
|
99 |
response2 = query_model("Qwen2.5-72B-Instruct", messages)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
yield f"**Qwen2.5-72B-Instruct**:\n{response2}"
|
101 |
|
102 |
# Add second response to context
|
@@ -107,6 +124,13 @@ def respond(message: str, history: List[List[str]], session_id: str) -> str:
|
|
107 |
|
108 |
# Get final model's response
|
109 |
response3 = query_model("Llama3.3-70B-Instruct", messages)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
yield f"**Llama3.3-70B-Instruct**:\n{response3}"
|
111 |
|
112 |
# Create the Gradio interface with session management
|
@@ -137,4 +161,4 @@ with gr.Blocks(title="Multi-LLM Collaboration Chat") as demo:
|
|
137 |
)
|
138 |
|
139 |
if __name__ == "__main__":
|
140 |
-
|
|
|
3 |
import requests
|
4 |
import threading
|
5 |
from datetime import datetime
|
6 |
+
from typing import List, Dict, Any, Generator
|
7 |
+
from session_manager import SessionManager
|
8 |
|
9 |
+
# Initialize session manager and get HF API key
|
10 |
+
session_manager = SessionManager()
|
11 |
HF_API_KEY = os.getenv("HF_API_KEY")
|
12 |
|
13 |
# Model endpoints configuration
|
|
|
74 |
except Exception as e:
|
75 |
return f"{model_name} error: {str(e)}"
|
76 |
|
77 |
+
def respond(message: str, history: List[List[str]], session_id: str) -> Generator[str, None, None]:
|
78 |
"""Handle sequential model responses with session tracking"""
|
79 |
# Load session history
|
80 |
session = session_manager.load_session(session_id)
|
81 |
messages = [{"role": "user", "content": message}]
|
82 |
|
83 |
+
# Store user message and update session
|
84 |
session["history"].append({
|
85 |
"timestamp": datetime.now().isoformat(),
|
86 |
"type": "user",
|
87 |
"content": message
|
88 |
})
|
89 |
+
session_manager.save_session(session_id, session)
|
90 |
|
91 |
# Get first model's response
|
92 |
response1 = query_model("Qwen2.5-Coder-32B-Instruct", messages)
|
93 |
+
session["history"].append({
|
94 |
+
"timestamp": datetime.now().isoformat(),
|
95 |
+
"type": "assistant",
|
96 |
+
"model": "Qwen2.5-Coder-32B-Instruct",
|
97 |
+
"content": response1
|
98 |
+
})
|
99 |
+
session_manager.save_session(session_id, session)
|
100 |
yield f"**Qwen2.5-Coder-32B-Instruct**:\n{response1}"
|
101 |
|
102 |
# Add first response to context
|
|
|
107 |
|
108 |
# Get second model's response
|
109 |
response2 = query_model("Qwen2.5-72B-Instruct", messages)
|
110 |
+
session["history"].append({
|
111 |
+
"timestamp": datetime.now().isoformat(),
|
112 |
+
"type": "assistant",
|
113 |
+
"model": "Qwen2.5-72B-Instruct",
|
114 |
+
"content": response2
|
115 |
+
})
|
116 |
+
session_manager.save_session(session_id, session)
|
117 |
yield f"**Qwen2.5-72B-Instruct**:\n{response2}"
|
118 |
|
119 |
# Add second response to context
|
|
|
124 |
|
125 |
# Get final model's response
|
126 |
response3 = query_model("Llama3.3-70B-Instruct", messages)
|
127 |
+
session["history"].append({
|
128 |
+
"timestamp": datetime.now().isoformat(),
|
129 |
+
"type": "assistant",
|
130 |
+
"model": "Llama3.3-70B-Instruct",
|
131 |
+
"content": response3
|
132 |
+
})
|
133 |
+
session_manager.save_session(session_id, session)
|
134 |
yield f"**Llama3.3-70B-Instruct**:\n{response3}"
|
135 |
|
136 |
# Create the Gradio interface with session management
|
|
|
161 |
)
|
162 |
|
163 |
if __name__ == "__main__":
|
164 |
+
demo.launch(share=True)
|