Spaces:
Sleeping
Sleeping
import gradio as gr | |
import os | |
from typing import List, Tuple | |
import json | |
import time | |
# Configure the model and provider | |
MODEL_ID = "openai/gpt-oss-120b" | |
DEFAULT_PROVIDER = "groq" # Can be changed to fireworks, hyperbolic, etc. | |
# System prompts for different modes | |
SYSTEM_PROMPTS = { | |
"default": "You are a helpful AI assistant.", | |
"creative": "You are a creative and imaginative AI that thinks outside the box.", | |
"technical": "You are a technical expert AI that provides detailed, accurate technical information.", | |
"concise": "You are a concise AI that provides brief, to-the-point responses.", | |
"teacher": "You are a patient teacher who explains concepts clearly with examples.", | |
"coder": "You are an expert programmer who writes clean, efficient, well-commented code.", | |
} | |
# CSS for dark theme and custom styling | |
custom_css = """ | |
#chatbot { | |
height: 600px !important; | |
background: #0a0a0a; | |
} | |
#chatbot .message { | |
font-size: 14px; | |
line-height: 1.6; | |
} | |
.dark { | |
background: #0a0a0a; | |
} | |
.user-message { | |
background: rgba(0, 255, 136, 0.1) !important; | |
border-left: 3px solid #00ff88; | |
} | |
.assistant-message { | |
background: rgba(0, 255, 255, 0.05) !important; | |
border-left: 3px solid #00ffff; | |
} | |
.footer { | |
text-align: center; | |
padding: 20px; | |
color: #666; | |
} | |
""" | |
def format_message_history(history: List[Tuple[str, str]], system_prompt: str) -> List[dict]: | |
"""Format chat history for the model""" | |
messages = [] | |
if system_prompt: | |
messages.append({"role": "system", "content": system_prompt}) | |
for user_msg, assistant_msg in history: | |
if user_msg: | |
messages.append({"role": "user", "content": user_msg}) | |
if assistant_msg: | |
messages.append({"role": "assistant", "content": assistant_msg}) | |
return messages | |
def stream_response(message: str, history: List[Tuple[str, str]], | |
system_prompt: str, temperature: float, max_tokens: int, | |
top_p: float, provider: str): | |
"""Generate streaming response from the model""" | |
# Format messages for the model | |
messages = format_message_history(history, system_prompt) | |
messages.append({"role": "user", "content": message}) | |
# Simulate streaming for demo (replace with actual API call) | |
# In production, you'd use the actual provider API here | |
demo_response = f"""I'm GPT-OSS-120B running on {provider}! | |
I received your message: "{message}" | |
With these settings: | |
- Temperature: {temperature} | |
- Max tokens: {max_tokens} | |
- Top-p: {top_p} | |
- System prompt: {system_prompt[:50]}... | |
This is where the actual model response would appear. In production, this would connect to the {provider} API to generate real responses from the 120B parameter model. | |
The model would analyze your input and provide a detailed, thoughtful response based on its massive 120 billion parameters of knowledge.""" | |
# Simulate streaming effect | |
words = demo_response.split() | |
response = "" | |
for i in range(0, len(words), 3): | |
chunk = " ".join(words[i:i+3]) | |
response += chunk + " " | |
time.sleep(0.05) # Simulate streaming delay | |
yield response.strip() | |
def clear_chat(): | |
"""Clear the chat history""" | |
return None, [] | |
def undo_last(history): | |
"""Remove the last message from history""" | |
if history: | |
return history[:-1] | |
return history | |
def retry_last(message, history): | |
"""Retry the last message""" | |
if history and history[-1][0]: | |
last_message = history[-1][0] | |
return last_message, history[:-1] | |
return message, history | |
def load_example(example): | |
"""Load an example prompt""" | |
return example | |
# Create the Gradio interface | |
with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="GPT-OSS-120B Chat") as demo: | |
# Header | |
gr.Markdown( | |
""" | |
# 🧠 GPT-OSS-120B Mega Chat | |
### 120 Billion Parameters of Pure Intelligence 🚀 | |
Chat with OpenAI's massive GPT-OSS-120B model - one of the largest open-weight models available! | |
""" | |
) | |
# Main chat interface | |
with gr.Row(): | |
# Chat column | |
with gr.Column(scale=3): | |
chatbot = gr.Chatbot( | |
label="Chat", | |
elem_id="chatbot", | |
bubble_full_width=False, | |
show_copy_button=True, | |
height=500, | |
type="tuples" | |
) | |
# Input area | |
with gr.Row(): | |
msg = gr.Textbox( | |
label="Message", | |
placeholder="Ask anything... (Shift+Enter for new line, Enter to send)", | |
lines=3, | |
max_lines=10, | |
scale=5, | |
elem_classes="user-input" | |
) | |
with gr.Column(scale=1, min_width=80): | |
send_btn = gr.Button("Send 📤", variant="primary", size="lg") | |
stop_btn = gr.Button("Stop ⏹️", variant="stop", size="lg", visible=False) | |
# Action buttons | |
with gr.Row(): | |
clear_btn = gr.Button("🗑️ Clear", size="sm") | |
undo_btn = gr.Button("↩️ Undo", size="sm") | |
retry_btn = gr.Button("🔄 Retry", size="sm") | |
# Settings column | |
with gr.Column(scale=1): | |
# Provider selection | |
with gr.Accordion("🔌 Inference Provider", open=True): | |
provider = gr.Dropdown( | |
label="Provider", | |
choices=["groq", "fireworks", "hyperbolic", "together", "anyscale"], | |
value=DEFAULT_PROVIDER, | |
info="Choose your inference provider" | |
) | |
login_btn = gr.Button("🔐 Sign in with HuggingFace", size="sm") | |
# Model settings | |
with gr.Accordion("⚙️ Model Settings", open=True): | |
system_mode = gr.Dropdown( | |
label="System Mode", | |
choices=list(SYSTEM_PROMPTS.keys()), | |
value="default", | |
info="Preset system prompts" | |
) | |
system_prompt = gr.Textbox( | |
label="Custom System Prompt", | |
value=SYSTEM_PROMPTS["default"], | |
lines=3, | |
info="Override with custom instructions" | |
) | |
temperature = gr.Slider( | |
label="Temperature", | |
minimum=0.0, | |
maximum=2.0, | |
value=0.7, | |
step=0.05, | |
info="Higher = more creative, Lower = more focused" | |
) | |
max_tokens = gr.Slider( | |
label="Max Tokens", | |
minimum=64, | |
maximum=8192, | |
value=2048, | |
step=64, | |
info="Maximum response length" | |
) | |
top_p = gr.Slider( | |
label="Top-p (Nucleus Sampling)", | |
minimum=0.1, | |
maximum=1.0, | |
value=0.95, | |
step=0.05, | |
info="Controls response diversity" | |
) | |
with gr.Row(): | |
seed = gr.Number( | |
label="Seed", | |
value=-1, | |
info="Set for reproducible outputs (-1 for random)" | |
) | |
# Advanced settings | |
with gr.Accordion("🔬 Advanced", open=False): | |
stream_output = gr.Checkbox( | |
label="Stream Output", | |
value=True, | |
info="Show response as it's generated" | |
) | |
show_reasoning = gr.Checkbox( | |
label="Show Reasoning Process", | |
value=False, | |
info="Display chain-of-thought if available" | |
) | |
reasoning_lang = gr.Dropdown( | |
label="Reasoning Language", | |
choices=["English", "Spanish", "French", "German", "Chinese", "Japanese"], | |
value="English", | |
info="Language for reasoning process" | |
) | |
# Model info | |
with gr.Accordion("📊 Model Info", open=False): | |
gr.Markdown( | |
""" | |
**Model**: openai/gpt-oss-120b | |
- **Parameters**: 120 Billion | |
- **Architecture**: Transformer + MoE | |
- **Context**: 128K tokens | |
- **Training**: Multi-lingual, code, reasoning | |
- **License**: Open weight | |
**Capabilities**: | |
- Complex reasoning | |
- Code generation | |
- Creative writing | |
- Technical analysis | |
- Multi-lingual support | |
- Function calling | |
""" | |
) | |
# Examples section | |
with gr.Accordion("💡 Example Prompts", open=True): | |
examples = gr.Examples( | |
examples=[ | |
"Explain quantum computing to a 10-year-old", | |
"Write a Python function to detect palindromes with O(1) space complexity", | |
"What are the implications of AGI for society?", | |
"Create a detailed business plan for a sustainable energy startup", | |
"Translate 'Hello, how are you?' to 10 different languages", | |
"Debug this code: `def fib(n): return fib(n-1) + fib(n-2)`", | |
"Write a haiku about machine learning", | |
"Compare and contrast transformers vs RNNs for NLP tasks", | |
], | |
inputs=msg, | |
label="Click to load an example" | |
) | |
# Stats and info | |
with gr.Row(): | |
with gr.Column(): | |
token_count = gr.Textbox( | |
label="Token Count", | |
value="0 tokens", | |
interactive=False, | |
scale=1 | |
) | |
with gr.Column(): | |
response_time = gr.Textbox( | |
label="Response Time", | |
value="0.0s", | |
interactive=False, | |
scale=1 | |
) | |
with gr.Column(): | |
model_status = gr.Textbox( | |
label="Status", | |
value="🟢 Ready", | |
interactive=False, | |
scale=1 | |
) | |
# Event handlers | |
def update_system_prompt(mode): | |
return SYSTEM_PROMPTS.get(mode, SYSTEM_PROMPTS["default"]) | |
def user_submit(message, history): | |
if not message.strip(): | |
return "", history | |
return "", history + [(message, None)] | |
def bot_respond(history, system_prompt, temperature, max_tokens, top_p, provider): | |
if not history or history[-1][1] is not None: | |
return history | |
message = history[-1][0] | |
# Generate response (streaming) | |
bot_message = "" | |
for chunk in stream_response( | |
message, | |
history[:-1], | |
system_prompt, | |
temperature, | |
max_tokens, | |
top_p, | |
provider | |
): | |
bot_message = chunk | |
history[-1] = (message, bot_message) | |
yield history | |
# Connect event handlers | |
system_mode.change( | |
update_system_prompt, | |
inputs=[system_mode], | |
outputs=[system_prompt] | |
) | |
# Message submission | |
msg.submit( | |
user_submit, | |
[msg, chatbot], | |
[msg, chatbot], | |
queue=False | |
).then( | |
bot_respond, | |
[chatbot, system_prompt, temperature, max_tokens, top_p, provider], | |
chatbot | |
) | |
send_btn.click( | |
user_submit, | |
[msg, chatbot], | |
[msg, chatbot], | |
queue=False | |
).then( | |
bot_respond, | |
[chatbot, system_prompt, temperature, max_tokens, top_p, provider], | |
chatbot | |
) | |
# Action buttons | |
clear_btn.click( | |
lambda: (None, ""), | |
outputs=[chatbot, msg], | |
queue=False | |
) | |
undo_btn.click( | |
undo_last, | |
inputs=[chatbot], | |
outputs=[chatbot], | |
queue=False | |
) | |
retry_btn.click( | |
retry_last, | |
inputs=[msg, chatbot], | |
outputs=[msg, chatbot], | |
queue=False | |
).then( | |
bot_respond, | |
[chatbot, system_prompt, temperature, max_tokens, top_p, provider], | |
chatbot | |
) | |
# Login button | |
login_btn.click( | |
lambda: gr.Info("Please implement HuggingFace OAuth login"), | |
queue=False | |
) | |
# Footer | |
gr.Markdown( | |
""" | |
<div class='footer'> | |
<p>Built with 🔥 for the GPT-OSS-120B community | Model: openai/gpt-oss-120b</p> | |
<p>Remember: This is a 120 billion parameter model - expect incredible responses!</p> | |
</div> | |
""" | |
) | |
# Launch configuration | |
if __name__ == "__main__": | |
demo.launch() |