arad1367 commited on
Commit
cf8cf08
·
verified ·
1 Parent(s): 8426a0f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -22
app.py CHANGED
@@ -3,34 +3,32 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
  import gradio as gr
5
 
6
- # Model ID
7
  model_name = "Qwen/Qwen2.5-3B-Instruct"
8
 
9
- # Load tokenizer
10
  print("Loading tokenizer...")
11
  tokenizer = AutoTokenizer.from_pretrained(model_name)
12
 
13
- # Load model with bfloat16 and device_map for efficient GPU usage
14
  print("Loading model...")
15
  model = AutoModelForCausalLM.from_pretrained(
16
  model_name,
17
  torch_dtype=torch.bfloat16,
18
  device_map="auto",
19
  trust_remote_code=True,
20
- # Optional: use 4-bit quantization to save VRAM
21
- # quantization_config=transformers.BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)
22
  )
23
 
24
- # Chatbot function
25
  def respond(message, history):
26
- # Format message with chat template
27
  messages = [{"role": "user", "content": message}]
 
 
28
  prompt = tokenizer.apply_chat_template(
29
  messages,
30
  tokenize=False,
31
  add_generation_prompt=True
32
  )
33
-
34
  # Tokenize
35
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
36
 
@@ -42,28 +40,26 @@ def respond(message, history):
42
  temperature=0.7,
43
  top_p=0.9,
44
  do_sample=True,
45
- pad_token_id=tokenizer.eos_token_id
46
  )
 
 
 
 
 
 
 
47
 
48
- # Decode only the response part
49
- full_response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
50
- return full_response
51
-
52
- # Create Gradio ChatInterface
53
  demo = gr.ChatInterface(
54
  fn=respond,
55
- title="💬 Qwen2.5-3B-Instruct Chatbot",
56
- description="A smart, open-source chatbot powered by Qwen2.5-3B-Instruct. Ask anything!",
57
  examples=[
58
  "Explain quantum computing in simple terms.",
59
  "Write a Python function to check if a number is prime.",
60
- "Solve: 3x + 5 = 17",
61
- "Tell me a fun fact about space."
62
  ],
63
- # ✅ These are now supported with updated Gradio
64
- retry_btn=None, # Hides retry button
65
- undo_btn=None, # Hides undo button
66
- clear_btn=None # Optional: hide clear button too
67
  )
68
 
69
  # Launch
 
3
  import torch
4
  import gradio as gr
5
 
6
+ # Model identifier
7
  model_name = "Qwen/Qwen2.5-3B-Instruct"
8
 
9
+ # Load tokenizer and model
10
  print("Loading tokenizer...")
11
  tokenizer = AutoTokenizer.from_pretrained(model_name)
12
 
 
13
  print("Loading model...")
14
  model = AutoModelForCausalLM.from_pretrained(
15
  model_name,
16
  torch_dtype=torch.bfloat16,
17
  device_map="auto",
18
  trust_remote_code=True,
 
 
19
  )
20
 
21
+ # Chat function (no history used for simplicity and compatibility)
22
  def respond(message, history):
 
23
  messages = [{"role": "user", "content": message}]
24
+
25
+ # Apply chat template
26
  prompt = tokenizer.apply_chat_template(
27
  messages,
28
  tokenize=False,
29
  add_generation_prompt=True
30
  )
31
+
32
  # Tokenize
33
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
34
 
 
40
  temperature=0.7,
41
  top_p=0.9,
42
  do_sample=True,
43
+ pad_token_id=tokenizer.eos_token_id,
44
  )
45
+
46
+ # Decode response
47
+ response = tokenizer.decode(
48
+ outputs[0][inputs['input_ids'].shape[-1]:],
49
+ skip_special_tokens=True
50
+ )
51
+ return response
52
 
53
+ # Gradio Interface NO retry_btn / undo_btn (to avoid version issues)
 
 
 
 
54
  demo = gr.ChatInterface(
55
  fn=respond,
56
+ title="Qwen2.5-3B-Instruct Chatbot",
57
+ description="Ask me anything! I'm a 3B AI assistant by Alibaba Cloud.",
58
  examples=[
59
  "Explain quantum computing in simple terms.",
60
  "Write a Python function to check if a number is prime.",
61
+ "Solve: 3x + 5 = 17"
 
62
  ],
 
 
 
 
63
  )
64
 
65
  # Launch