openai-oss-groq / app.py
laloadrianmorales's picture
Update app.py
f5fe376 verified
import gradio as gr
import os
from typing import List, Tuple
import json
import time
# Configure the model and provider
MODEL_ID = "openai/gpt-oss-120b"
DEFAULT_PROVIDER = "groq" # Can be changed to fireworks, hyperbolic, etc.
# System prompts for different modes
SYSTEM_PROMPTS = {
"default": "You are a helpful AI assistant.",
"creative": "You are a creative and imaginative AI that thinks outside the box.",
"technical": "You are a technical expert AI that provides detailed, accurate technical information.",
"concise": "You are a concise AI that provides brief, to-the-point responses.",
"teacher": "You are a patient teacher who explains concepts clearly with examples.",
"coder": "You are an expert programmer who writes clean, efficient, well-commented code.",
}
# CSS for dark theme and custom styling
custom_css = """
#chatbot {
height: 600px !important;
background: #0a0a0a;
}
#chatbot .message {
font-size: 14px;
line-height: 1.6;
}
.dark {
background: #0a0a0a;
}
.user-message {
background: rgba(0, 255, 136, 0.1) !important;
border-left: 3px solid #00ff88;
}
.assistant-message {
background: rgba(0, 255, 255, 0.05) !important;
border-left: 3px solid #00ffff;
}
.footer {
text-align: center;
padding: 20px;
color: #666;
}
"""
def format_message_history(history: List[Tuple[str, str]], system_prompt: str) -> List[dict]:
"""Format chat history for the model"""
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
for user_msg, assistant_msg in history:
if user_msg:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
return messages
def stream_response(message: str, history: List[Tuple[str, str]],
system_prompt: str, temperature: float, max_tokens: int,
top_p: float, provider: str):
"""Generate streaming response from the model"""
# Format messages for the model
messages = format_message_history(history, system_prompt)
messages.append({"role": "user", "content": message})
# Simulate streaming for demo (replace with actual API call)
# In production, you'd use the actual provider API here
demo_response = f"""I'm GPT-OSS-120B running on {provider}!
I received your message: "{message}"
With these settings:
- Temperature: {temperature}
- Max tokens: {max_tokens}
- Top-p: {top_p}
- System prompt: {system_prompt[:50]}...
This is where the actual model response would appear. In production, this would connect to the {provider} API to generate real responses from the 120B parameter model.
The model would analyze your input and provide a detailed, thoughtful response based on its massive 120 billion parameters of knowledge."""
# Simulate streaming effect
words = demo_response.split()
response = ""
for i in range(0, len(words), 3):
chunk = " ".join(words[i:i+3])
response += chunk + " "
time.sleep(0.05) # Simulate streaming delay
yield response.strip()
def clear_chat():
"""Clear the chat history"""
return None, []
def undo_last(history):
"""Remove the last message from history"""
if history:
return history[:-1]
return history
def retry_last(message, history):
"""Retry the last message"""
if history and history[-1][0]:
last_message = history[-1][0]
return last_message, history[:-1]
return message, history
def load_example(example):
"""Load an example prompt"""
return example
# Create the Gradio interface
with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="GPT-OSS-120B Chat") as demo:
# Header
gr.Markdown(
"""
# 🧠 GPT-OSS-120B Mega Chat
### 120 Billion Parameters of Pure Intelligence 🚀
Chat with OpenAI's massive GPT-OSS-120B model - one of the largest open-weight models available!
"""
)
# Main chat interface
with gr.Row():
# Chat column
with gr.Column(scale=3):
chatbot = gr.Chatbot(
label="Chat",
elem_id="chatbot",
bubble_full_width=False,
show_copy_button=True,
height=500,
type="tuples"
)
# Input area
with gr.Row():
msg = gr.Textbox(
label="Message",
placeholder="Ask anything... (Shift+Enter for new line, Enter to send)",
lines=3,
max_lines=10,
scale=5,
elem_classes="user-input"
)
with gr.Column(scale=1, min_width=80):
send_btn = gr.Button("Send 📤", variant="primary", size="lg")
stop_btn = gr.Button("Stop ⏹️", variant="stop", size="lg", visible=False)
# Action buttons
with gr.Row():
clear_btn = gr.Button("🗑️ Clear", size="sm")
undo_btn = gr.Button("↩️ Undo", size="sm")
retry_btn = gr.Button("🔄 Retry", size="sm")
# Settings column
with gr.Column(scale=1):
# Provider selection
with gr.Accordion("🔌 Inference Provider", open=True):
provider = gr.Dropdown(
label="Provider",
choices=["groq", "fireworks", "hyperbolic", "together", "anyscale"],
value=DEFAULT_PROVIDER,
info="Choose your inference provider"
)
login_btn = gr.Button("🔐 Sign in with HuggingFace", size="sm")
# Model settings
with gr.Accordion("⚙️ Model Settings", open=True):
system_mode = gr.Dropdown(
label="System Mode",
choices=list(SYSTEM_PROMPTS.keys()),
value="default",
info="Preset system prompts"
)
system_prompt = gr.Textbox(
label="Custom System Prompt",
value=SYSTEM_PROMPTS["default"],
lines=3,
info="Override with custom instructions"
)
temperature = gr.Slider(
label="Temperature",
minimum=0.0,
maximum=2.0,
value=0.7,
step=0.05,
info="Higher = more creative, Lower = more focused"
)
max_tokens = gr.Slider(
label="Max Tokens",
minimum=64,
maximum=8192,
value=2048,
step=64,
info="Maximum response length"
)
top_p = gr.Slider(
label="Top-p (Nucleus Sampling)",
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
info="Controls response diversity"
)
with gr.Row():
seed = gr.Number(
label="Seed",
value=-1,
info="Set for reproducible outputs (-1 for random)"
)
# Advanced settings
with gr.Accordion("🔬 Advanced", open=False):
stream_output = gr.Checkbox(
label="Stream Output",
value=True,
info="Show response as it's generated"
)
show_reasoning = gr.Checkbox(
label="Show Reasoning Process",
value=False,
info="Display chain-of-thought if available"
)
reasoning_lang = gr.Dropdown(
label="Reasoning Language",
choices=["English", "Spanish", "French", "German", "Chinese", "Japanese"],
value="English",
info="Language for reasoning process"
)
# Model info
with gr.Accordion("📊 Model Info", open=False):
gr.Markdown(
"""
**Model**: openai/gpt-oss-120b
- **Parameters**: 120 Billion
- **Architecture**: Transformer + MoE
- **Context**: 128K tokens
- **Training**: Multi-lingual, code, reasoning
- **License**: Open weight
**Capabilities**:
- Complex reasoning
- Code generation
- Creative writing
- Technical analysis
- Multi-lingual support
- Function calling
"""
)
# Examples section
with gr.Accordion("💡 Example Prompts", open=True):
examples = gr.Examples(
examples=[
"Explain quantum computing to a 10-year-old",
"Write a Python function to detect palindromes with O(1) space complexity",
"What are the implications of AGI for society?",
"Create a detailed business plan for a sustainable energy startup",
"Translate 'Hello, how are you?' to 10 different languages",
"Debug this code: `def fib(n): return fib(n-1) + fib(n-2)`",
"Write a haiku about machine learning",
"Compare and contrast transformers vs RNNs for NLP tasks",
],
inputs=msg,
label="Click to load an example"
)
# Stats and info
with gr.Row():
with gr.Column():
token_count = gr.Textbox(
label="Token Count",
value="0 tokens",
interactive=False,
scale=1
)
with gr.Column():
response_time = gr.Textbox(
label="Response Time",
value="0.0s",
interactive=False,
scale=1
)
with gr.Column():
model_status = gr.Textbox(
label="Status",
value="🟢 Ready",
interactive=False,
scale=1
)
# Event handlers
def update_system_prompt(mode):
return SYSTEM_PROMPTS.get(mode, SYSTEM_PROMPTS["default"])
def user_submit(message, history):
if not message.strip():
return "", history
return "", history + [(message, None)]
def bot_respond(history, system_prompt, temperature, max_tokens, top_p, provider):
if not history or history[-1][1] is not None:
return history
message = history[-1][0]
# Generate response (streaming)
bot_message = ""
for chunk in stream_response(
message,
history[:-1],
system_prompt,
temperature,
max_tokens,
top_p,
provider
):
bot_message = chunk
history[-1] = (message, bot_message)
yield history
# Connect event handlers
system_mode.change(
update_system_prompt,
inputs=[system_mode],
outputs=[system_prompt]
)
# Message submission
msg.submit(
user_submit,
[msg, chatbot],
[msg, chatbot],
queue=False
).then(
bot_respond,
[chatbot, system_prompt, temperature, max_tokens, top_p, provider],
chatbot
)
send_btn.click(
user_submit,
[msg, chatbot],
[msg, chatbot],
queue=False
).then(
bot_respond,
[chatbot, system_prompt, temperature, max_tokens, top_p, provider],
chatbot
)
# Action buttons
clear_btn.click(
lambda: (None, ""),
outputs=[chatbot, msg],
queue=False
)
undo_btn.click(
undo_last,
inputs=[chatbot],
outputs=[chatbot],
queue=False
)
retry_btn.click(
retry_last,
inputs=[msg, chatbot],
outputs=[msg, chatbot],
queue=False
).then(
bot_respond,
[chatbot, system_prompt, temperature, max_tokens, top_p, provider],
chatbot
)
# Login button
login_btn.click(
lambda: gr.Info("Please implement HuggingFace OAuth login"),
queue=False
)
# Footer
gr.Markdown(
"""
<div class='footer'>
<p>Built with 🔥 for the GPT-OSS-120B community | Model: openai/gpt-oss-120b</p>
<p>Remember: This is a 120 billion parameter model - expect incredible responses!</p>
</div>
"""
)
# Launch configuration
if __name__ == "__main__":
demo.launch()