# app.py
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import gradio as gr

# Model name
model_name = "Qwen/Qwen2.5-3B-Instruct"

# Load tokenizer and model
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_name)

print("Loading model...")
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    trust_remote_code=True,
)

# Chat function
def respond(message, history):
    messages = [{"role": "user", "content": message}]
    prompt = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=512,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
        )
    response = tokenizer.decode(
        outputs[0][inputs["input_ids"].shape[-1]:],
        skip_special_tokens=True
    )
    return response

# Create Gradio ChatInterface
# Gradio 3.50.2 supports ChatInterface fully
demo = gr.ChatInterface(
    fn=respond,
    title="Qwen2.5-3B Chatbot",
    description="Ask me anything! I'm a smart AI assistant by Alibaba Cloud.",
    examples=[
        "Explain relativity in simple terms.",
        "Write a Python function to reverse a string.",
        "Solve: 2x + 8 = 20"
    ]
)

# Launch
if __name__ == "__main__":
    demo.launch()