Spaces:

laloadrianmorales
/

openai-oss-groq

Sleeping

File size: 13,465 Bytes

import gradio as gr
import os
from typing import List, Tuple
import json
import time

# Configure the model and provider
MODEL_ID = "openai/gpt-oss-120b"
DEFAULT_PROVIDER = "groq"  # Can be changed to fireworks, hyperbolic, etc.

# System prompts for different modes
SYSTEM_PROMPTS = {
    "default": "You are a helpful AI assistant.",
    "creative": "You are a creative and imaginative AI that thinks outside the box.",
    "technical": "You are a technical expert AI that provides detailed, accurate technical information.",
    "concise": "You are a concise AI that provides brief, to-the-point responses.",
    "teacher": "You are a patient teacher who explains concepts clearly with examples.",
    "coder": "You are an expert programmer who writes clean, efficient, well-commented code.",
}

# CSS for dark theme and custom styling
custom_css = """
#chatbot {
    height: 600px !important;
    background: #0a0a0a;
}
#chatbot .message {
    font-size: 14px;
    line-height: 1.6;
}
.dark {
    background: #0a0a0a;
}
.user-message {
    background: rgba(0, 255, 136, 0.1) !important;
    border-left: 3px solid #00ff88;
}
.assistant-message {
    background: rgba(0, 255, 255, 0.05) !important;
    border-left: 3px solid #00ffff;
}
.footer {
    text-align: center;
    padding: 20px;
    color: #666;
}
"""

def format_message_history(history: List[Tuple[str, str]], system_prompt: str) -> List[dict]:
    """Format chat history for the model"""
    messages = []
    
    if system_prompt:
        messages.append({"role": "system", "content": system_prompt})
    
    for user_msg, assistant_msg in history:
        if user_msg:
            messages.append({"role": "user", "content": user_msg})
        if assistant_msg:
            messages.append({"role": "assistant", "content": assistant_msg})
    
    return messages

def stream_response(message: str, history: List[Tuple[str, str]], 
                   system_prompt: str, temperature: float, max_tokens: int, 
                   top_p: float, provider: str):
    """Generate streaming response from the model"""
    
    # Format messages for the model
    messages = format_message_history(history, system_prompt)
    messages.append({"role": "user", "content": message})
    
    # Simulate streaming for demo (replace with actual API call)
    # In production, you'd use the actual provider API here
    demo_response = f"""I'm GPT-OSS-120B running on {provider}! 
    
I received your message: "{message}"

With these settings:
- Temperature: {temperature}
- Max tokens: {max_tokens}
- Top-p: {top_p}
- System prompt: {system_prompt[:50]}...

This is where the actual model response would appear. In production, this would connect to the {provider} API to generate real responses from the 120B parameter model.

The model would analyze your input and provide a detailed, thoughtful response based on its massive 120 billion parameters of knowledge."""
    
    # Simulate streaming effect
    words = demo_response.split()
    response = ""
    for i in range(0, len(words), 3):
        chunk = " ".join(words[i:i+3])
        response += chunk + " "
        time.sleep(0.05)  # Simulate streaming delay
        yield response.strip()

def clear_chat():
    """Clear the chat history"""
    return None, []

def undo_last(history):
    """Remove the last message from history"""
    if history:
        return history[:-1]
    return history

def retry_last(message, history):
    """Retry the last message"""
    if history and history[-1][0]:
        last_message = history[-1][0]
        return last_message, history[:-1]
    return message, history

def load_example(example):
    """Load an example prompt"""
    return example

# Create the Gradio interface
with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="GPT-OSS-120B Chat") as demo:
    
    # Header
    gr.Markdown(
        """
        # 🧠 GPT-OSS-120B Mega Chat
        ### 120 Billion Parameters of Pure Intelligence 🚀
        
        Chat with OpenAI's massive GPT-OSS-120B model - one of the largest open-weight models available!
        """
    )
    
    # Main chat interface
    with gr.Row():
        # Chat column
        with gr.Column(scale=3):
            chatbot = gr.Chatbot(
                label="Chat",
                elem_id="chatbot",
                bubble_full_width=False,
                show_copy_button=True,
                height=500,
                type="tuples"
            )
            
            # Input area
            with gr.Row():
                msg = gr.Textbox(
                    label="Message",
                    placeholder="Ask anything... (Shift+Enter for new line, Enter to send)",
                    lines=3,
                    max_lines=10,
                    scale=5,
                    elem_classes="user-input"
                )
                
                with gr.Column(scale=1, min_width=80):
                    send_btn = gr.Button("Send 📤", variant="primary", size="lg")
                    stop_btn = gr.Button("Stop ⏹️", variant="stop", size="lg", visible=False)
            
            # Action buttons
            with gr.Row():
                clear_btn = gr.Button("🗑️ Clear", size="sm")
                undo_btn = gr.Button("↩️ Undo", size="sm")
                retry_btn = gr.Button("🔄 Retry", size="sm")
                
        # Settings column
        with gr.Column(scale=1):
            # Provider selection
            with gr.Accordion("🔌 Inference Provider", open=True):
                provider = gr.Dropdown(
                    label="Provider",
                    choices=["groq", "fireworks", "hyperbolic", "together", "anyscale"],
                    value=DEFAULT_PROVIDER,
                    info="Choose your inference provider"
                )
                
                login_btn = gr.Button("🔐 Sign in with HuggingFace", size="sm")
            
            # Model settings
            with gr.Accordion("⚙️ Model Settings", open=True):
                system_mode = gr.Dropdown(
                    label="System Mode",
                    choices=list(SYSTEM_PROMPTS.keys()),
                    value="default",
                    info="Preset system prompts"
                )
                
                system_prompt = gr.Textbox(
                    label="Custom System Prompt",
                    value=SYSTEM_PROMPTS["default"],
                    lines=3,
                    info="Override with custom instructions"
                )
                
                temperature = gr.Slider(
                    label="Temperature",
                    minimum=0.0,
                    maximum=2.0,
                    value=0.7,
                    step=0.05,
                    info="Higher = more creative, Lower = more focused"
                )
                
                max_tokens = gr.Slider(
                    label="Max Tokens",
                    minimum=64,
                    maximum=8192,
                    value=2048,
                    step=64,
                    info="Maximum response length"
                )
                
                top_p = gr.Slider(
                    label="Top-p (Nucleus Sampling)",
                    minimum=0.1,
                    maximum=1.0,
                    value=0.95,
                    step=0.05,
                    info="Controls response diversity"
                )
                
                with gr.Row():
                    seed = gr.Number(
                        label="Seed",
                        value=-1,
                        info="Set for reproducible outputs (-1 for random)"
                    )
            
            # Advanced settings
            with gr.Accordion("🔬 Advanced", open=False):
                stream_output = gr.Checkbox(
                    label="Stream Output",
                    value=True,
                    info="Show response as it's generated"
                )
                
                show_reasoning = gr.Checkbox(
                    label="Show Reasoning Process",
                    value=False,
                    info="Display chain-of-thought if available"
                )
                
                reasoning_lang = gr.Dropdown(
                    label="Reasoning Language",
                    choices=["English", "Spanish", "French", "German", "Chinese", "Japanese"],
                    value="English",
                    info="Language for reasoning process"
                )
            
            # Model info
            with gr.Accordion("📊 Model Info", open=False):
                gr.Markdown(
                    """
                    **Model**: openai/gpt-oss-120b
                    - **Parameters**: 120 Billion
                    - **Architecture**: Transformer + MoE
                    - **Context**: 128K tokens
                    - **Training**: Multi-lingual, code, reasoning
                    - **License**: Open weight
                    
                    **Capabilities**:
                    - Complex reasoning
                    - Code generation
                    - Creative writing
                    - Technical analysis
                    - Multi-lingual support
                    - Function calling
                    """
                )
    
    # Examples section
    with gr.Accordion("💡 Example Prompts", open=True):
        examples = gr.Examples(
            examples=[
                "Explain quantum computing to a 10-year-old",
                "Write a Python function to detect palindromes with O(1) space complexity",
                "What are the implications of AGI for society?",
                "Create a detailed business plan for a sustainable energy startup",
                "Translate 'Hello, how are you?' to 10 different languages",
                "Debug this code: `def fib(n): return fib(n-1) + fib(n-2)`",
                "Write a haiku about machine learning",
                "Compare and contrast transformers vs RNNs for NLP tasks",
            ],
            inputs=msg,
            label="Click to load an example"
        )
    
    # Stats and info
    with gr.Row():
        with gr.Column():
            token_count = gr.Textbox(
                label="Token Count",
                value="0 tokens",
                interactive=False,
                scale=1
            )
        with gr.Column():
            response_time = gr.Textbox(
                label="Response Time", 
                value="0.0s",
                interactive=False,
                scale=1
            )
        with gr.Column():
            model_status = gr.Textbox(
                label="Status",
                value="🟢 Ready",
                interactive=False,
                scale=1
            )
    
    # Event handlers
    def update_system_prompt(mode):
        return SYSTEM_PROMPTS.get(mode, SYSTEM_PROMPTS["default"])
    
    def user_submit(message, history):
        if not message.strip():
            return "", history
        return "", history + [(message, None)]
    
    def bot_respond(history, system_prompt, temperature, max_tokens, top_p, provider):
        if not history or history[-1][1] is not None:
            return history
        
        message = history[-1][0]
        
        # Generate response (streaming)
        bot_message = ""
        for chunk in stream_response(
            message, 
            history[:-1], 
            system_prompt, 
            temperature, 
            max_tokens, 
            top_p,
            provider
        ):
            bot_message = chunk
            history[-1] = (message, bot_message)
            yield history
    
    # Connect event handlers
    system_mode.change(
        update_system_prompt,
        inputs=[system_mode],
        outputs=[system_prompt]
    )
    
    # Message submission
    msg.submit(
        user_submit,
        [msg, chatbot],
        [msg, chatbot],
        queue=False
    ).then(
        bot_respond,
        [chatbot, system_prompt, temperature, max_tokens, top_p, provider],
        chatbot
    )
    
    send_btn.click(
        user_submit,
        [msg, chatbot],
        [msg, chatbot],
        queue=False
    ).then(
        bot_respond,
        [chatbot, system_prompt, temperature, max_tokens, top_p, provider],
        chatbot
    )
    
    # Action buttons
    clear_btn.click(
        lambda: (None, ""),
        outputs=[chatbot, msg],
        queue=False
    )
    
    undo_btn.click(
        undo_last,
        inputs=[chatbot],
        outputs=[chatbot],
        queue=False
    )
    
    retry_btn.click(
        retry_last,
        inputs=[msg, chatbot],
        outputs=[msg, chatbot],
        queue=False
    ).then(
        bot_respond,
        [chatbot, system_prompt, temperature, max_tokens, top_p, provider],
        chatbot
    )
    
    # Login button
    login_btn.click(
        lambda: gr.Info("Please implement HuggingFace OAuth login"),
        queue=False
    )
    
    # Footer
    gr.Markdown(
        """
        <div class='footer'>
        <p>Built with 🔥 for the GPT-OSS-120B community | Model: openai/gpt-oss-120b</p>
        <p>Remember: This is a 120 billion parameter model - expect incredible responses!</p>
        </div>
        """
    )

# Launch configuration
if __name__ == "__main__":
    demo.launch()