File size: 7,920 Bytes
43b5bef
c1e5d4c
bdddd02
23497a1
 
 
156fde9
9b382da
0d6849e
9b382da
 
 
b9b13af
9b382da
cc3006a
cf38aa5
43b5bef
156fde9
 
 
42b5787
43b5bef
 
 
 
 
 
9b382da
 
c1e5d4c
9b382da
 
 
 
fa9f9e5
 
 
dcc233f
36f31c3
 
 
 
 
 
 
dcc233f
c9870b1
 
9b382da
eebaa87
c1e5d4c
fa9f9e5
 
 
 
 
9b382da
fa9f9e5
9b382da
 
fa9f9e5
 
9b382da
 
 
 
 
 
c1e5d4c
9b382da
 
 
 
 
bdddd02
9b382da
 
57a76f2
eebaa87
9b382da
 
 
 
 
 
 
 
 
 
bdddd02
 
 
 
9b382da
 
 
 
 
 
 
3df9d67
 
9b382da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc3006a
43b5bef
9b382da
 
 
 
 
eebaa87
9b382da
fdaf591
9b382da
eebaa87
fdaf591
 
9b382da
eebaa87
156fde9
27ecf43
eebaa87
9b382da
 
 
eebaa87
23497a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bdddd02
27ecf43
 
 
 
 
 
 
bdddd02
 
 
9b382da
 
 
 
 
 
 
 
156fde9
 
23497a1
7cbd468
156fde9
9b382da
 
 
bdddd02
c1e5d4c
9b382da
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
import gradio as gr
import os
import io
import pdfplumber
import docx
from openpyxl import load_workbook
import shutil
import threading
from datetime import datetime
from typing import List, Dict, Any, Generator
from session_manager import SessionManager
from huggingface_hub import InferenceClient

# Initialize session manager and get HF API key
session_manager = SessionManager()
HF_API_KEY = os.getenv("HF_API_KEY")

# Create uploads directory if it doesn't exist
os.makedirs("uploads", exist_ok=True)

# Model endpoints configuration
MODEL_ENDPOINTS = {
    "Qwen2.5-72B-Instruct": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-72B-Instruct",
    "Llama3.3-70B-Instruct": "https://api-inference.huggingface.co/models/meta-llama/Llama-3.3-70B-Instruct",
    "Qwen2.5-Coder-32B-Instruct": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B-Instruct",
}

def query_model(model_name: str, messages: List[Dict[str, str]]) -> Generator[str, None, None]:
    """Query a single model with the chat history and stream the response"""
    endpoint = MODEL_ENDPOINTS[model_name]
    
    # Build full conversation history for context
    conversation = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])
    
    # System prompt configuration
    system_prompts = {
        "Qwen2.5-72B-Instruct": "Collaborate with other experts. Previous discussion:\n{conversation}",
        "Llama3.3-70B-Instruct": (
            "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n"
            f"Build upon this discussion:\n{conversation}<|eot_id|>\n"
            "<|start_header_id|>assistant<|end_header_id|>\nMy contribution:"
        ),
        "Qwen2.5-Coder-32B-Instruct": (
            f"<|im_start|>system\nTechnical discussion context:\n{conversation}<|im_end|>\n"
            "<|im_start|>assistant\nTechnical perspective:"
        )
    }

    client = InferenceClient(base_url=endpoint, token=HF_API_KEY)

    try:
        messages = [
            {"role": "system", "content": system_prompts[model_name].format(conversation=conversation)},
            {"role": "user", "content": "Continue the expert discussion"}
        ]
        
        stream = client.chat.completions.create(
            messages=messages,
            stream=True,
            max_tokens=2048,
            temperature=0.5,
            top_p=0.7
        )

        for chunk in stream:
            content = chunk.choices[0].delta.content or ""
            yield content

    except Exception as e:
        yield f"{model_name} error: {str(e)}"

def respond(message: str, history: List[List[str]], session_id: str) -> Generator[str, None, None]:
    """Handle sequential model responses with context preservation and streaming"""
    # Load or initialize session

    session = session_manager.load_session(session_id)
    if not isinstance(session, dict) or "history" not in session:
        session = {"history": []}

    # Build context from session history
    messages = []
    for entry in session["history"]:
        if entry["type"] == "user":
            messages.append({"role": "user", "content": entry["content"]})
        else:
            messages.append({"role": "assistant", "content": f"{entry['model']}: {entry['content']}"})

    # Add current message
    messages.append({"role": "user", "content": message})

    # Add file content to message


    session["history"].append({
        "timestamp": datetime.now().isoformat(),
        "type": "user",
        "content": message
    })

    # Model responses
    model_names = ["Qwen2.5-Coder-32B-Instruct", "Qwen2.5-72B-Instruct", "Llama3.3-70B-Instruct"]
    model_colors = ["πŸ”΅", "🟣", "🟑"]
    responses = {}

    # Initialize responses
    for model_name in model_names:
        responses[model_name] = ""

    # Stream responses from each model
    for i, model_name in enumerate(model_names):
        yield f"{model_colors[i]} {model_name} is thinking..."
        
        full_response = ""
        for chunk in query_model(model_name, messages):
            full_response += chunk
            yield f"{model_colors[i]} **{model_name}**\n{full_response}"

        # Update session history and messages
        session["history"].append({
            "timestamp": datetime.now().isoformat(),
            "type": "assistant",
            "model": model_name,
            "content": full_response
        })
        messages.append({"role": "assistant", "content": f"{model_name}: {full_response}"})
        responses[model_name] = full_response

    # Save final session state
    session_manager.save_session(session_id, session)

    # Return final combined response (optional)
    combined_response = ""
    for i, model_name in enumerate(model_names):
        combined_response += f"{model_colors[i]} **{model_name}**\n{responses[model_name]}\n\n"
    yield combined_response

# Create the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("## Multi-LLM Collaboration Chat")

    with gr.Row():
        session_id = gr.State(session_manager.create_session)
        new_session = gr.Button("πŸ”„ New Session")

    chatbot = gr.Chatbot(height=600)
    save_history = gr.Checkbox(label="Save Conversation History", value=True)

    def on_new_session():
        new_id = session_manager.create_session()
        return new_id, []

    def user(message, files, history, session_id, save_history):
        if files:
            for file_path in files:
                try:
                    file_extension = os.path.splitext(file_path)[1].lower()
                    file_content = ""

                    if file_extension == ".pdf":
                        with pdfplumber.open(file_path) as pdf:
                            for page in pdf.pages:
                                file_content += page.extract_text()
                    elif file_extension == ".docx":
                        doc = docx.Document(file_path)
                        for paragraph in doc.paragraphs:
                            file_content += paragraph.text + "\n"
                    elif file_extension == ".xlsx":
                        workbook = load_workbook(file_path)
                        for sheet in workbook.sheetnames:
                            worksheet = workbook[sheet]
                            for row in worksheet.iter_rows():
                                row_values = [str(cell.value) for cell in row]
                                file_content += ", ".join(row_values) + "\n"
                    else:
                        message += f"\nUnsupported file type: {file_extension}"
                        continue

                    message += f"\nFile content from {file_path}:\n{file_content}"

                except Exception as e:
                    message += f"\nError processing {file_path}: {str(e)}"

        if save_history:
            session = session_manager.load_session(session_id)
            session["history"].append({
                "timestamp": datetime.now().isoformat(),
                "type": "user",
                "content": message
            })
            session_manager.save_session(session_id,session)
            return "", history + [[message, None]]


    def bot(history, session_id):
        if history and history[-1][1] is None:
            message = history[-1][0]
            for response in respond(message, history[:-1], session_id):
                history[-1][1] = response
                yield history

    with gr.Row():
        msg = gr.Textbox(label="Message")
        file_upload = gr.File(file_types=[".pdf", ".docx", ".xlsx"], file_count="multiple")

    msg.submit(user, [msg, file_upload, chatbot, session_id, save_history], [msg, chatbot]).then(
        bot, [chatbot, session_id], [chatbot]
    )
    new_session.click(on_new_session, None, [session_id, chatbot])

if __name__ == "__main__":
    demo.launch(share=True)