Spaces:
Sleeping
Sleeping
File size: 7,920 Bytes
43b5bef c1e5d4c bdddd02 23497a1 156fde9 9b382da 0d6849e 9b382da b9b13af 9b382da cc3006a cf38aa5 43b5bef 156fde9 42b5787 43b5bef 9b382da c1e5d4c 9b382da fa9f9e5 dcc233f 36f31c3 dcc233f c9870b1 9b382da eebaa87 c1e5d4c fa9f9e5 9b382da fa9f9e5 9b382da fa9f9e5 9b382da c1e5d4c 9b382da bdddd02 9b382da 57a76f2 eebaa87 9b382da bdddd02 9b382da 3df9d67 9b382da cc3006a 43b5bef 9b382da eebaa87 9b382da fdaf591 9b382da eebaa87 fdaf591 9b382da eebaa87 156fde9 27ecf43 eebaa87 9b382da eebaa87 23497a1 bdddd02 27ecf43 bdddd02 9b382da 156fde9 23497a1 7cbd468 156fde9 9b382da bdddd02 c1e5d4c 9b382da |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 |
import gradio as gr
import os
import io
import pdfplumber
import docx
from openpyxl import load_workbook
import shutil
import threading
from datetime import datetime
from typing import List, Dict, Any, Generator
from session_manager import SessionManager
from huggingface_hub import InferenceClient
# Initialize session manager and get HF API key
session_manager = SessionManager()
HF_API_KEY = os.getenv("HF_API_KEY")
# Create uploads directory if it doesn't exist
os.makedirs("uploads", exist_ok=True)
# Model endpoints configuration
MODEL_ENDPOINTS = {
"Qwen2.5-72B-Instruct": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-72B-Instruct",
"Llama3.3-70B-Instruct": "https://api-inference.huggingface.co/models/meta-llama/Llama-3.3-70B-Instruct",
"Qwen2.5-Coder-32B-Instruct": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B-Instruct",
}
def query_model(model_name: str, messages: List[Dict[str, str]]) -> Generator[str, None, None]:
"""Query a single model with the chat history and stream the response"""
endpoint = MODEL_ENDPOINTS[model_name]
# Build full conversation history for context
conversation = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])
# System prompt configuration
system_prompts = {
"Qwen2.5-72B-Instruct": "Collaborate with other experts. Previous discussion:\n{conversation}",
"Llama3.3-70B-Instruct": (
"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n"
f"Build upon this discussion:\n{conversation}<|eot_id|>\n"
"<|start_header_id|>assistant<|end_header_id|>\nMy contribution:"
),
"Qwen2.5-Coder-32B-Instruct": (
f"<|im_start|>system\nTechnical discussion context:\n{conversation}<|im_end|>\n"
"<|im_start|>assistant\nTechnical perspective:"
)
}
client = InferenceClient(base_url=endpoint, token=HF_API_KEY)
try:
messages = [
{"role": "system", "content": system_prompts[model_name].format(conversation=conversation)},
{"role": "user", "content": "Continue the expert discussion"}
]
stream = client.chat.completions.create(
messages=messages,
stream=True,
max_tokens=2048,
temperature=0.5,
top_p=0.7
)
for chunk in stream:
content = chunk.choices[0].delta.content or ""
yield content
except Exception as e:
yield f"{model_name} error: {str(e)}"
def respond(message: str, history: List[List[str]], session_id: str) -> Generator[str, None, None]:
"""Handle sequential model responses with context preservation and streaming"""
# Load or initialize session
session = session_manager.load_session(session_id)
if not isinstance(session, dict) or "history" not in session:
session = {"history": []}
# Build context from session history
messages = []
for entry in session["history"]:
if entry["type"] == "user":
messages.append({"role": "user", "content": entry["content"]})
else:
messages.append({"role": "assistant", "content": f"{entry['model']}: {entry['content']}"})
# Add current message
messages.append({"role": "user", "content": message})
# Add file content to message
session["history"].append({
"timestamp": datetime.now().isoformat(),
"type": "user",
"content": message
})
# Model responses
model_names = ["Qwen2.5-Coder-32B-Instruct", "Qwen2.5-72B-Instruct", "Llama3.3-70B-Instruct"]
model_colors = ["π΅", "π£", "π‘"]
responses = {}
# Initialize responses
for model_name in model_names:
responses[model_name] = ""
# Stream responses from each model
for i, model_name in enumerate(model_names):
yield f"{model_colors[i]} {model_name} is thinking..."
full_response = ""
for chunk in query_model(model_name, messages):
full_response += chunk
yield f"{model_colors[i]} **{model_name}**\n{full_response}"
# Update session history and messages
session["history"].append({
"timestamp": datetime.now().isoformat(),
"type": "assistant",
"model": model_name,
"content": full_response
})
messages.append({"role": "assistant", "content": f"{model_name}: {full_response}"})
responses[model_name] = full_response
# Save final session state
session_manager.save_session(session_id, session)
# Return final combined response (optional)
combined_response = ""
for i, model_name in enumerate(model_names):
combined_response += f"{model_colors[i]} **{model_name}**\n{responses[model_name]}\n\n"
yield combined_response
# Create the Gradio interface
with gr.Blocks() as demo:
gr.Markdown("## Multi-LLM Collaboration Chat")
with gr.Row():
session_id = gr.State(session_manager.create_session)
new_session = gr.Button("π New Session")
chatbot = gr.Chatbot(height=600)
save_history = gr.Checkbox(label="Save Conversation History", value=True)
def on_new_session():
new_id = session_manager.create_session()
return new_id, []
def user(message, files, history, session_id, save_history):
if files:
for file_path in files:
try:
file_extension = os.path.splitext(file_path)[1].lower()
file_content = ""
if file_extension == ".pdf":
with pdfplumber.open(file_path) as pdf:
for page in pdf.pages:
file_content += page.extract_text()
elif file_extension == ".docx":
doc = docx.Document(file_path)
for paragraph in doc.paragraphs:
file_content += paragraph.text + "\n"
elif file_extension == ".xlsx":
workbook = load_workbook(file_path)
for sheet in workbook.sheetnames:
worksheet = workbook[sheet]
for row in worksheet.iter_rows():
row_values = [str(cell.value) for cell in row]
file_content += ", ".join(row_values) + "\n"
else:
message += f"\nUnsupported file type: {file_extension}"
continue
message += f"\nFile content from {file_path}:\n{file_content}"
except Exception as e:
message += f"\nError processing {file_path}: {str(e)}"
if save_history:
session = session_manager.load_session(session_id)
session["history"].append({
"timestamp": datetime.now().isoformat(),
"type": "user",
"content": message
})
session_manager.save_session(session_id,session)
return "", history + [[message, None]]
def bot(history, session_id):
if history and history[-1][1] is None:
message = history[-1][0]
for response in respond(message, history[:-1], session_id):
history[-1][1] = response
yield history
with gr.Row():
msg = gr.Textbox(label="Message")
file_upload = gr.File(file_types=[".pdf", ".docx", ".xlsx"], file_count="multiple")
msg.submit(user, [msg, file_upload, chatbot, session_id, save_history], [msg, chatbot]).then(
bot, [chatbot, session_id], [chatbot]
)
new_session.click(on_new_session, None, [session_id, chatbot])
if __name__ == "__main__":
demo.launch(share=True)
|