import torch import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline # ---------------- CONFIG ---------------- MODEL_NAME = "google/gemma-3-270m-it" # ✅ instruction-tuned Gemma 3 model SYSTEM_PROMPT_DEFAULT = ( "You are a formal and polite AI assistant. " "Always respond appropriately depending on the selected explanation style." ) MAX_NEW_TOKENS_DEFAULT = 256 TEMP_DEFAULT = 0.7 TOP_P_DEFAULT = 0.9 # ---------------- LOAD MODEL ---------------- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, torch_dtype=torch.float32, # ✅ safe for CPU ) generator = pipeline( "text-generation", # ✅ causal LM (not seq2seq) model=model, tokenizer=tokenizer, device=-1 # ✅ force CPU ) # ---------------- HELPERS ---------------- def format_prompt(chat_history, user_message, system_message, response_style): # Start with system message prompt = system_message + "\n\n" # Add only user messages (optional: you can also add last assistant reply if needed) for turn in chat_history: if turn["role"] == "user": prompt += f"{turn['content']}\n" # Add the new user message prompt += f"{user_message}\n" # Optionally instruct for explanation style if response_style == "No explanation": prompt += " Answer concisely with no explanation." elif response_style == "Short explanation": prompt += " Answer briefly with a one-sentence explanation." elif response_style == "Detailed explanation": prompt += " Answer in detail with reasoning and examples." return prompt # ---------------- CHAT FUNCTION ---------------- def chat(user_message, chat_history, system_message, max_tokens, temperature, top_p, response_style): chat_history = chat_history or [] prompt = format_prompt(chat_history, user_message, system_message, response_style) output = generator( prompt, max_new_tokens=max_tokens, do_sample=True, temperature=temperature, top_p=top_p, )[0]['generated_text'] # For causal LMs, output includes the prompt → strip it response = output[len(prompt):].strip() # Save user and assistant content without labels chat_history.append({"role": "user", "content": user_message}) chat_history.append({"role": "assistant", "content": response}) return "", chat_history # ---------------- UI ---------------- with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo: gr.Markdown("# 🧠 Gemma-3-270M Chat Assistant (CPU-safe)") chatbot = gr.Chatbot(type="messages", height=500, show_copy_button=True) with gr.Row(): msg = gr.Textbox(label="💬 Your Message", placeholder="Type here…", scale=6) send_btn = gr.Button("🚀 Send", variant="primary", scale=1) clear_btn = gr.Button("🧹 Clear Chat", scale=1) with gr.Accordion("⚙️ Advanced Settings", open=False): system_prompt = gr.Textbox(label="System Prompt", value=SYSTEM_PROMPT_DEFAULT, lines=3) response_style = gr.Dropdown( ["No explanation", "Short explanation", "Detailed explanation"], value="Detailed explanation", label="Response Style" ) temperature = gr.Slider(0.1, 1.5, value=TEMP_DEFAULT, step=0.1, label="Temperature") top_p = gr.Slider(0.1, 1.0, value=TOP_P_DEFAULT, step=0.05, label="Top-p") max_tokens = gr.Slider(32, 512, value=MAX_NEW_TOKENS_DEFAULT, step=16, label="Max new tokens") send_btn.click( chat, [msg, chatbot, system_prompt, max_tokens, temperature, top_p, response_style], [msg, chatbot] ) msg.submit( chat, [msg, chatbot, system_prompt, max_tokens, temperature, top_p, response_style], [msg, chatbot] ) clear_btn.click(lambda: [], None, chatbot, queue=False) if __name__ == "__main__": demo.launch()