import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch from huggingface_hub import login import os # Use a secret token stored in your Space settings hf_token = os.environ.get("HF_TOKEN") if hf_token: login(token=hf_token) # Load model and tokenizer model_name = "google/gemma-3-270m" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto" ) # Chat function def chat_with_gamma(history, message): inputs = tokenizer.encode(message, return_tensors="pt").to(model.device) outputs = model.generate( inputs, max_length=256, do_sample=True, top_p=0.9, temperature=0.7, pad_token_id=tokenizer.eos_token_id ) reply = tokenizer.decode(outputs[0], skip_special_tokens=True) history.append((message, reply)) return history, "" # Gradio Chat UI with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("## 🚀 Chat with Gamma3-270M") gr.Markdown("Professional demo of **Gamma3 270M**, an open-source LLM.") chatbot = gr.Chatbot(height=400) msg = gr.Textbox(placeholder="Type your message...") clear = gr.Button("Clear Chat") state = gr.State([]) msg.submit(chat_with_gamma, [state, msg], [chatbot, msg]) clear.click(lambda: ([], ""), None, [chatbot, msg]) demo.launch()