Spaces:
Sleeping
Sleeping
File size: 13,465 Bytes
95d538c 1e6d961 95d538c 1e6d961 95d538c 1e6d961 95d538c 1e6d961 95d538c 1e6d961 95d538c 1e6d961 95d538c 1e6d961 95d538c 1e6d961 95d538c 1e6d961 95d538c 1e6d961 95d538c 1e6d961 95d538c 1e6d961 95d538c 1e6d961 95d538c 1e6d961 f5fe376 1e6d961 95d538c 1e6d961 95d538c f5fe376 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 |
import gradio as gr
import os
from typing import List, Tuple
import json
import time
# Configure the model and provider
MODEL_ID = "openai/gpt-oss-120b"
DEFAULT_PROVIDER = "groq" # Can be changed to fireworks, hyperbolic, etc.
# System prompts for different modes
SYSTEM_PROMPTS = {
"default": "You are a helpful AI assistant.",
"creative": "You are a creative and imaginative AI that thinks outside the box.",
"technical": "You are a technical expert AI that provides detailed, accurate technical information.",
"concise": "You are a concise AI that provides brief, to-the-point responses.",
"teacher": "You are a patient teacher who explains concepts clearly with examples.",
"coder": "You are an expert programmer who writes clean, efficient, well-commented code.",
}
# CSS for dark theme and custom styling
custom_css = """
#chatbot {
height: 600px !important;
background: #0a0a0a;
}
#chatbot .message {
font-size: 14px;
line-height: 1.6;
}
.dark {
background: #0a0a0a;
}
.user-message {
background: rgba(0, 255, 136, 0.1) !important;
border-left: 3px solid #00ff88;
}
.assistant-message {
background: rgba(0, 255, 255, 0.05) !important;
border-left: 3px solid #00ffff;
}
.footer {
text-align: center;
padding: 20px;
color: #666;
}
"""
def format_message_history(history: List[Tuple[str, str]], system_prompt: str) -> List[dict]:
"""Format chat history for the model"""
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
for user_msg, assistant_msg in history:
if user_msg:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
return messages
def stream_response(message: str, history: List[Tuple[str, str]],
system_prompt: str, temperature: float, max_tokens: int,
top_p: float, provider: str):
"""Generate streaming response from the model"""
# Format messages for the model
messages = format_message_history(history, system_prompt)
messages.append({"role": "user", "content": message})
# Simulate streaming for demo (replace with actual API call)
# In production, you'd use the actual provider API here
demo_response = f"""I'm GPT-OSS-120B running on {provider}!
I received your message: "{message}"
With these settings:
- Temperature: {temperature}
- Max tokens: {max_tokens}
- Top-p: {top_p}
- System prompt: {system_prompt[:50]}...
This is where the actual model response would appear. In production, this would connect to the {provider} API to generate real responses from the 120B parameter model.
The model would analyze your input and provide a detailed, thoughtful response based on its massive 120 billion parameters of knowledge."""
# Simulate streaming effect
words = demo_response.split()
response = ""
for i in range(0, len(words), 3):
chunk = " ".join(words[i:i+3])
response += chunk + " "
time.sleep(0.05) # Simulate streaming delay
yield response.strip()
def clear_chat():
"""Clear the chat history"""
return None, []
def undo_last(history):
"""Remove the last message from history"""
if history:
return history[:-1]
return history
def retry_last(message, history):
"""Retry the last message"""
if history and history[-1][0]:
last_message = history[-1][0]
return last_message, history[:-1]
return message, history
def load_example(example):
"""Load an example prompt"""
return example
# Create the Gradio interface
with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="GPT-OSS-120B Chat") as demo:
# Header
gr.Markdown(
"""
# π§ GPT-OSS-120B Mega Chat
### 120 Billion Parameters of Pure Intelligence π
Chat with OpenAI's massive GPT-OSS-120B model - one of the largest open-weight models available!
"""
)
# Main chat interface
with gr.Row():
# Chat column
with gr.Column(scale=3):
chatbot = gr.Chatbot(
label="Chat",
elem_id="chatbot",
bubble_full_width=False,
show_copy_button=True,
height=500,
type="tuples"
)
# Input area
with gr.Row():
msg = gr.Textbox(
label="Message",
placeholder="Ask anything... (Shift+Enter for new line, Enter to send)",
lines=3,
max_lines=10,
scale=5,
elem_classes="user-input"
)
with gr.Column(scale=1, min_width=80):
send_btn = gr.Button("Send π€", variant="primary", size="lg")
stop_btn = gr.Button("Stop βΉοΈ", variant="stop", size="lg", visible=False)
# Action buttons
with gr.Row():
clear_btn = gr.Button("ποΈ Clear", size="sm")
undo_btn = gr.Button("β©οΈ Undo", size="sm")
retry_btn = gr.Button("π Retry", size="sm")
# Settings column
with gr.Column(scale=1):
# Provider selection
with gr.Accordion("π Inference Provider", open=True):
provider = gr.Dropdown(
label="Provider",
choices=["groq", "fireworks", "hyperbolic", "together", "anyscale"],
value=DEFAULT_PROVIDER,
info="Choose your inference provider"
)
login_btn = gr.Button("π Sign in with HuggingFace", size="sm")
# Model settings
with gr.Accordion("βοΈ Model Settings", open=True):
system_mode = gr.Dropdown(
label="System Mode",
choices=list(SYSTEM_PROMPTS.keys()),
value="default",
info="Preset system prompts"
)
system_prompt = gr.Textbox(
label="Custom System Prompt",
value=SYSTEM_PROMPTS["default"],
lines=3,
info="Override with custom instructions"
)
temperature = gr.Slider(
label="Temperature",
minimum=0.0,
maximum=2.0,
value=0.7,
step=0.05,
info="Higher = more creative, Lower = more focused"
)
max_tokens = gr.Slider(
label="Max Tokens",
minimum=64,
maximum=8192,
value=2048,
step=64,
info="Maximum response length"
)
top_p = gr.Slider(
label="Top-p (Nucleus Sampling)",
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
info="Controls response diversity"
)
with gr.Row():
seed = gr.Number(
label="Seed",
value=-1,
info="Set for reproducible outputs (-1 for random)"
)
# Advanced settings
with gr.Accordion("π¬ Advanced", open=False):
stream_output = gr.Checkbox(
label="Stream Output",
value=True,
info="Show response as it's generated"
)
show_reasoning = gr.Checkbox(
label="Show Reasoning Process",
value=False,
info="Display chain-of-thought if available"
)
reasoning_lang = gr.Dropdown(
label="Reasoning Language",
choices=["English", "Spanish", "French", "German", "Chinese", "Japanese"],
value="English",
info="Language for reasoning process"
)
# Model info
with gr.Accordion("π Model Info", open=False):
gr.Markdown(
"""
**Model**: openai/gpt-oss-120b
- **Parameters**: 120 Billion
- **Architecture**: Transformer + MoE
- **Context**: 128K tokens
- **Training**: Multi-lingual, code, reasoning
- **License**: Open weight
**Capabilities**:
- Complex reasoning
- Code generation
- Creative writing
- Technical analysis
- Multi-lingual support
- Function calling
"""
)
# Examples section
with gr.Accordion("π‘ Example Prompts", open=True):
examples = gr.Examples(
examples=[
"Explain quantum computing to a 10-year-old",
"Write a Python function to detect palindromes with O(1) space complexity",
"What are the implications of AGI for society?",
"Create a detailed business plan for a sustainable energy startup",
"Translate 'Hello, how are you?' to 10 different languages",
"Debug this code: `def fib(n): return fib(n-1) + fib(n-2)`",
"Write a haiku about machine learning",
"Compare and contrast transformers vs RNNs for NLP tasks",
],
inputs=msg,
label="Click to load an example"
)
# Stats and info
with gr.Row():
with gr.Column():
token_count = gr.Textbox(
label="Token Count",
value="0 tokens",
interactive=False,
scale=1
)
with gr.Column():
response_time = gr.Textbox(
label="Response Time",
value="0.0s",
interactive=False,
scale=1
)
with gr.Column():
model_status = gr.Textbox(
label="Status",
value="π’ Ready",
interactive=False,
scale=1
)
# Event handlers
def update_system_prompt(mode):
return SYSTEM_PROMPTS.get(mode, SYSTEM_PROMPTS["default"])
def user_submit(message, history):
if not message.strip():
return "", history
return "", history + [(message, None)]
def bot_respond(history, system_prompt, temperature, max_tokens, top_p, provider):
if not history or history[-1][1] is not None:
return history
message = history[-1][0]
# Generate response (streaming)
bot_message = ""
for chunk in stream_response(
message,
history[:-1],
system_prompt,
temperature,
max_tokens,
top_p,
provider
):
bot_message = chunk
history[-1] = (message, bot_message)
yield history
# Connect event handlers
system_mode.change(
update_system_prompt,
inputs=[system_mode],
outputs=[system_prompt]
)
# Message submission
msg.submit(
user_submit,
[msg, chatbot],
[msg, chatbot],
queue=False
).then(
bot_respond,
[chatbot, system_prompt, temperature, max_tokens, top_p, provider],
chatbot
)
send_btn.click(
user_submit,
[msg, chatbot],
[msg, chatbot],
queue=False
).then(
bot_respond,
[chatbot, system_prompt, temperature, max_tokens, top_p, provider],
chatbot
)
# Action buttons
clear_btn.click(
lambda: (None, ""),
outputs=[chatbot, msg],
queue=False
)
undo_btn.click(
undo_last,
inputs=[chatbot],
outputs=[chatbot],
queue=False
)
retry_btn.click(
retry_last,
inputs=[msg, chatbot],
outputs=[msg, chatbot],
queue=False
).then(
bot_respond,
[chatbot, system_prompt, temperature, max_tokens, top_p, provider],
chatbot
)
# Login button
login_btn.click(
lambda: gr.Info("Please implement HuggingFace OAuth login"),
queue=False
)
# Footer
gr.Markdown(
"""
<div class='footer'>
<p>Built with π₯ for the GPT-OSS-120B community | Model: openai/gpt-oss-120b</p>
<p>Remember: This is a 120 billion parameter model - expect incredible responses!</p>
</div>
"""
)
# Launch configuration
if __name__ == "__main__":
demo.launch() |