import gradio as gr import os from typing import List, Tuple import json import time # Configure the model and provider MODEL_ID = "openai/gpt-oss-120b" DEFAULT_PROVIDER = "groq" # Can be changed to fireworks, hyperbolic, etc. # System prompts for different modes SYSTEM_PROMPTS = { "default": "You are a helpful AI assistant.", "creative": "You are a creative and imaginative AI that thinks outside the box.", "technical": "You are a technical expert AI that provides detailed, accurate technical information.", "concise": "You are a concise AI that provides brief, to-the-point responses.", "teacher": "You are a patient teacher who explains concepts clearly with examples.", "coder": "You are an expert programmer who writes clean, efficient, well-commented code.", } # CSS for dark theme and custom styling custom_css = """ #chatbot { height: 600px !important; background: #0a0a0a; } #chatbot .message { font-size: 14px; line-height: 1.6; } .dark { background: #0a0a0a; } .user-message { background: rgba(0, 255, 136, 0.1) !important; border-left: 3px solid #00ff88; } .assistant-message { background: rgba(0, 255, 255, 0.05) !important; border-left: 3px solid #00ffff; } .footer { text-align: center; padding: 20px; color: #666; } """ def format_message_history(history: List[Tuple[str, str]], system_prompt: str) -> List[dict]: """Format chat history for the model""" messages = [] if system_prompt: messages.append({"role": "system", "content": system_prompt}) for user_msg, assistant_msg in history: if user_msg: messages.append({"role": "user", "content": user_msg}) if assistant_msg: messages.append({"role": "assistant", "content": assistant_msg}) return messages def stream_response(message: str, history: List[Tuple[str, str]], system_prompt: str, temperature: float, max_tokens: int, top_p: float, provider: str): """Generate streaming response from the model""" # Format messages for the model messages = format_message_history(history, system_prompt) messages.append({"role": "user", "content": message}) # Simulate streaming for demo (replace with actual API call) # In production, you'd use the actual provider API here demo_response = f"""I'm GPT-OSS-120B running on {provider}! I received your message: "{message}" With these settings: - Temperature: {temperature} - Max tokens: {max_tokens} - Top-p: {top_p} - System prompt: {system_prompt[:50]}... This is where the actual model response would appear. In production, this would connect to the {provider} API to generate real responses from the 120B parameter model. The model would analyze your input and provide a detailed, thoughtful response based on its massive 120 billion parameters of knowledge.""" # Simulate streaming effect words = demo_response.split() response = "" for i in range(0, len(words), 3): chunk = " ".join(words[i:i+3]) response += chunk + " " time.sleep(0.05) # Simulate streaming delay yield response.strip() def clear_chat(): """Clear the chat history""" return None, [] def undo_last(history): """Remove the last message from history""" if history: return history[:-1] return history def retry_last(message, history): """Retry the last message""" if history and history[-1][0]: last_message = history[-1][0] return last_message, history[:-1] return message, history def load_example(example): """Load an example prompt""" return example # Create the Gradio interface with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="GPT-OSS-120B Chat") as demo: # Header gr.Markdown( """ # 🧠 GPT-OSS-120B Mega Chat ### 120 Billion Parameters of Pure Intelligence 🚀 Chat with OpenAI's massive GPT-OSS-120B model - one of the largest open-weight models available! """ ) # Main chat interface with gr.Row(): # Chat column with gr.Column(scale=3): chatbot = gr.Chatbot( label="Chat", elem_id="chatbot", bubble_full_width=False, show_copy_button=True, height=500, type="tuples" ) # Input area with gr.Row(): msg = gr.Textbox( label="Message", placeholder="Ask anything... (Shift+Enter for new line, Enter to send)", lines=3, max_lines=10, scale=5, elem_classes="user-input" ) with gr.Column(scale=1, min_width=80): send_btn = gr.Button("Send 📤", variant="primary", size="lg") stop_btn = gr.Button("Stop âšī¸", variant="stop", size="lg", visible=False) # Action buttons with gr.Row(): clear_btn = gr.Button("đŸ—‘ī¸ Clear", size="sm") undo_btn = gr.Button("â†Šī¸ Undo", size="sm") retry_btn = gr.Button("🔄 Retry", size="sm") # Settings column with gr.Column(scale=1): # Provider selection with gr.Accordion("🔌 Inference Provider", open=True): provider = gr.Dropdown( label="Provider", choices=["groq", "fireworks", "hyperbolic", "together", "anyscale"], value=DEFAULT_PROVIDER, info="Choose your inference provider" ) login_btn = gr.Button("🔐 Sign in with HuggingFace", size="sm") # Model settings with gr.Accordion("âš™ī¸ Model Settings", open=True): system_mode = gr.Dropdown( label="System Mode", choices=list(SYSTEM_PROMPTS.keys()), value="default", info="Preset system prompts" ) system_prompt = gr.Textbox( label="Custom System Prompt", value=SYSTEM_PROMPTS["default"], lines=3, info="Override with custom instructions" ) temperature = gr.Slider( label="Temperature", minimum=0.0, maximum=2.0, value=0.7, step=0.05, info="Higher = more creative, Lower = more focused" ) max_tokens = gr.Slider( label="Max Tokens", minimum=64, maximum=8192, value=2048, step=64, info="Maximum response length" ) top_p = gr.Slider( label="Top-p (Nucleus Sampling)", minimum=0.1, maximum=1.0, value=0.95, step=0.05, info="Controls response diversity" ) with gr.Row(): seed = gr.Number( label="Seed", value=-1, info="Set for reproducible outputs (-1 for random)" ) # Advanced settings with gr.Accordion("đŸ”Ŧ Advanced", open=False): stream_output = gr.Checkbox( label="Stream Output", value=True, info="Show response as it's generated" ) show_reasoning = gr.Checkbox( label="Show Reasoning Process", value=False, info="Display chain-of-thought if available" ) reasoning_lang = gr.Dropdown( label="Reasoning Language", choices=["English", "Spanish", "French", "German", "Chinese", "Japanese"], value="English", info="Language for reasoning process" ) # Model info with gr.Accordion("📊 Model Info", open=False): gr.Markdown( """ **Model**: openai/gpt-oss-120b - **Parameters**: 120 Billion - **Architecture**: Transformer + MoE - **Context**: 128K tokens - **Training**: Multi-lingual, code, reasoning - **License**: Open weight **Capabilities**: - Complex reasoning - Code generation - Creative writing - Technical analysis - Multi-lingual support - Function calling """ ) # Examples section with gr.Accordion("💡 Example Prompts", open=True): examples = gr.Examples( examples=[ "Explain quantum computing to a 10-year-old", "Write a Python function to detect palindromes with O(1) space complexity", "What are the implications of AGI for society?", "Create a detailed business plan for a sustainable energy startup", "Translate 'Hello, how are you?' to 10 different languages", "Debug this code: `def fib(n): return fib(n-1) + fib(n-2)`", "Write a haiku about machine learning", "Compare and contrast transformers vs RNNs for NLP tasks", ], inputs=msg, label="Click to load an example" ) # Stats and info with gr.Row(): with gr.Column(): token_count = gr.Textbox( label="Token Count", value="0 tokens", interactive=False, scale=1 ) with gr.Column(): response_time = gr.Textbox( label="Response Time", value="0.0s", interactive=False, scale=1 ) with gr.Column(): model_status = gr.Textbox( label="Status", value="đŸŸĸ Ready", interactive=False, scale=1 ) # Event handlers def update_system_prompt(mode): return SYSTEM_PROMPTS.get(mode, SYSTEM_PROMPTS["default"]) def user_submit(message, history): if not message.strip(): return "", history return "", history + [(message, None)] def bot_respond(history, system_prompt, temperature, max_tokens, top_p, provider): if not history or history[-1][1] is not None: return history message = history[-1][0] # Generate response (streaming) bot_message = "" for chunk in stream_response( message, history[:-1], system_prompt, temperature, max_tokens, top_p, provider ): bot_message = chunk history[-1] = (message, bot_message) yield history # Connect event handlers system_mode.change( update_system_prompt, inputs=[system_mode], outputs=[system_prompt] ) # Message submission msg.submit( user_submit, [msg, chatbot], [msg, chatbot], queue=False ).then( bot_respond, [chatbot, system_prompt, temperature, max_tokens, top_p, provider], chatbot ) send_btn.click( user_submit, [msg, chatbot], [msg, chatbot], queue=False ).then( bot_respond, [chatbot, system_prompt, temperature, max_tokens, top_p, provider], chatbot ) # Action buttons clear_btn.click( lambda: (None, ""), outputs=[chatbot, msg], queue=False ) undo_btn.click( undo_last, inputs=[chatbot], outputs=[chatbot], queue=False ) retry_btn.click( retry_last, inputs=[msg, chatbot], outputs=[msg, chatbot], queue=False ).then( bot_respond, [chatbot, system_prompt, temperature, max_tokens, top_p, provider], chatbot ) # Login button login_btn.click( lambda: gr.Info("Please implement HuggingFace OAuth login"), queue=False ) # Footer gr.Markdown( """ """ ) # Launch configuration if __name__ == "__main__": demo.launch()