arad1367's picture
Update app.py
28fc680 verified
# app.py
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import gradio as gr
# Model name
model_name = "Qwen/Qwen2.5-3B-Instruct"
# Load tokenizer and model
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_name)
print("Loading model...")
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.bfloat16,
device_map="auto",
trust_remote_code=True,
)
# Chat function
def respond(message, history):
messages = [{"role": "user", "content": message}]
prompt = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=512,
temperature=0.7,
top_p=0.9,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
)
response = tokenizer.decode(
outputs[0][inputs["input_ids"].shape[-1]:],
skip_special_tokens=True
)
return response
# Create Gradio ChatInterface
# Gradio 3.50.2 supports ChatInterface fully
demo = gr.ChatInterface(
fn=respond,
title="Qwen2.5-3B Chatbot",
description="Ask me anything! I'm a smart AI assistant by Alibaba Cloud.",
examples=[
"Explain relativity in simple terms.",
"Write a Python function to reverse a string.",
"Solve: 2x + 8 = 20"
]
)
# Launch
if __name__ == "__main__":
demo.launch()