Spaces:

arad1367
/

Base-Model-Qwen2.5-3B

Sleeping

Base-Model-Qwen2.5-3B / app.py

Update app.py

28fc680 verified about 1 month ago

1.58 kB

	# app.py
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import torch
	import gradio as gr

	# Model name
	model_name = "Qwen/Qwen2.5-3B-Instruct"

	# Load tokenizer and model
	print("Loading tokenizer...")
	tokenizer = AutoTokenizer.from_pretrained(model_name)

	print("Loading model...")
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.bfloat16,
	device_map="auto",
	trust_remote_code=True,
	)

	# Chat function
	def respond(message, history):
	messages = [{"role": "user", "content": message}]
	prompt = tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True
	)
	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=512,
	temperature=0.7,
	top_p=0.9,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id,
	)
	response = tokenizer.decode(
	outputs[0][inputs["input_ids"].shape[-1]:],
	skip_special_tokens=True
	)
	return response

	# Create Gradio ChatInterface
	# Gradio 3.50.2 supports ChatInterface fully
	demo = gr.ChatInterface(
	fn=respond,
	title="Qwen2.5-3B Chatbot",
	description="Ask me anything! I'm a smart AI assistant by Alibaba Cloud.",
	examples=[
	"Explain relativity in simple terms.",
	"Write a Python function to reverse a string.",
	"Solve: 2x + 8 = 20"
	]
	)

	# Launch
	if __name__ == "__main__":
	demo.launch()