Spaces:

arad1367
/

Base-Model-Qwen2.5-3B

Sleeping

App Files Files Community

arad1367 commited on Jul 29

Commit

cf8cf08

verified ·

1 Parent(s): 8426a0f

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -22

app.py CHANGED Viewed

@@ -3,34 +3,32 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 import gradio as gr
-# Model ID
 model_name = "Qwen/Qwen2.5-3B-Instruct"
-# Load tokenizer
 print("Loading tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-# Load model with bfloat16 and device_map for efficient GPU usage
 print("Loading model...")
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     torch_dtype=torch.bfloat16,
     device_map="auto",
     trust_remote_code=True,
-    # Optional: use 4-bit quantization to save VRAM
-    # quantization_config=transformers.BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)
 )
-# Chatbot function
 def respond(message, history):
-    # Format message with chat template
     messages = [{"role": "user", "content": message}]
     prompt = tokenizer.apply_chat_template(
         messages,
         tokenize=False,
         add_generation_prompt=True
     )
     # Tokenize
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
@@ -42,28 +40,26 @@ def respond(message, history):
             temperature=0.7,
             top_p=0.9,
             do_sample=True,
-            pad_token_id=tokenizer.eos_token_id
         )
-    # Decode only the response part
-    full_response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
-    return full_response
-# Create Gradio ChatInterface
 demo = gr.ChatInterface(
     fn=respond,
-    title="💬 Qwen2.5-3B-Instruct Chatbot",
-    description="A smart, open-source chatbot powered by Qwen2.5-3B-Instruct. Ask anything!",
     examples=[
         "Explain quantum computing in simple terms.",
         "Write a Python function to check if a number is prime.",
-        "Solve: 3x + 5 = 17",
-        "Tell me a fun fact about space."
     ],
-    # ✅ These are now supported with updated Gradio
-    retry_btn=None,      # Hides retry button
-    undo_btn=None,       # Hides undo button
-    clear_btn=None       # Optional: hide clear button too
 )
 # Launch

 import torch
 import gradio as gr
+# Model identifier
 model_name = "Qwen/Qwen2.5-3B-Instruct"
+# Load tokenizer and model
 print("Loading tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 print("Loading model...")
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     torch_dtype=torch.bfloat16,
     device_map="auto",
     trust_remote_code=True,
 )
+# Chat function (no history used for simplicity and compatibility)
 def respond(message, history):
     messages = [{"role": "user", "content": message}]
+    # Apply chat template
     prompt = tokenizer.apply_chat_template(
         messages,
         tokenize=False,
         add_generation_prompt=True
     )
     # Tokenize
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
             temperature=0.7,
             top_p=0.9,
             do_sample=True,
+            pad_token_id=tokenizer.eos_token_id,
         )
+    # Decode response
+    response = tokenizer.decode(
+        outputs[0][inputs['input_ids'].shape[-1]:],
+        skip_special_tokens=True
+    )
+    return response
+# Gradio Interface — NO retry_btn / undo_btn (to avoid version issues)
 demo = gr.ChatInterface(
     fn=respond,
+    title="Qwen2.5-3B-Instruct Chatbot",
+    description="Ask me anything! I'm a 3B AI assistant by Alibaba Cloud.",
     examples=[
         "Explain quantum computing in simple terms.",
         "Write a Python function to check if a number is prime.",
+        "Solve: 3x + 5 = 17"
     ],
 )
 # Launch