Spaces:

arad1367
/

Base-Model-Qwen2.5-3B

Sleeping

App Files Files Community

arad1367 commited on Jul 29

Commit

8426a0f

verified ·

1 Parent(s): c214a24

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -35

app.py CHANGED Viewed

@@ -3,64 +3,69 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 import gradio as gr
-# Load model and tokenizer
 model_name = "Qwen/Qwen2.5-3B-Instruct"
 print("Loading tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 print("Loading model...")
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
-    torch_dtype=torch.bfloat16,  # Use bfloat16 to save memory and speed up inference
-    device_map="auto",           # Automatically use GPU if available
-    trust_remote_code=True       # Required for Qwen models
 )
-# Define chat function
 def respond(message, history):
     messages = [{"role": "user", "content": message}]
-    # Apply chat template
-    text = tokenizer.apply_chat_template(
         messages,
         tokenize=False,
         add_generation_prompt=True
     )
-    # Tokenize input
-    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
-    # Generate response
-    generated_ids = model.generate(
-        **model_inputs,
-        max_new_tokens=512,
-        do_sample=True,
-        temperature=0.7,
-        top_p=0.9,
-        repetition_penalty=1.1
-    )
-    # Extract only the new tokens
-    generated_ids = generated_ids[0][model_inputs.input_ids.shape[-1]:]
-    response = tokenizer.decode(generated_ids, skip_special_tokens=True)
-    return response
-# Create Gradio chat interface
 demo = gr.ChatInterface(
     fn=respond,
-    title="Qwen2.5-3B Chatbot",
-    description="Chat with Qwen2.5-3B-Instruct, a powerful 3-billion-parameter LLM by Alibaba Cloud.",
     examples=[
         "Explain quantum computing in simple terms.",
-        "Write a Python function to calculate Fibonacci numbers.",
-        "Tell me a joke about AI."
     ],
-    retry_btn=None,
-    undo_btn=None,
 )
-# Launch the app
 if __name__ == "__main__":
     demo.launch()

 import torch
 import gradio as gr
+# Model ID
 model_name = "Qwen/Qwen2.5-3B-Instruct"
+# Load tokenizer
 print("Loading tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+# Load model with bfloat16 and device_map for efficient GPU usage
 print("Loading model...")
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
+    torch_dtype=torch.bfloat16,
+    device_map="auto",
+    trust_remote_code=True,
+    # Optional: use 4-bit quantization to save VRAM
+    # quantization_config=transformers.BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)
 )
+# Chatbot function
 def respond(message, history):
+    # Format message with chat template
     messages = [{"role": "user", "content": message}]
+    prompt = tokenizer.apply_chat_template(
         messages,
         tokenize=False,
         add_generation_prompt=True
     )
+    # Tokenize
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    # Generate
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=512,
+            temperature=0.7,
+            top_p=0.9,
+            do_sample=True,
+            pad_token_id=tokenizer.eos_token_id
+        )
+    # Decode only the response part
+    full_response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
+    return full_response
+# Create Gradio ChatInterface
 demo = gr.ChatInterface(
     fn=respond,
+    title="💬 Qwen2.5-3B-Instruct Chatbot",
+    description="A smart, open-source chatbot powered by Qwen2.5-3B-Instruct. Ask anything!",
     examples=[
         "Explain quantum computing in simple terms.",
+        "Write a Python function to check if a number is prime.",
+        "Solve: 3x + 5 = 17",
+        "Tell me a fun fact about space."
     ],
+    # ✅ These are now supported with updated Gradio
+    retry_btn=None,      # Hides retry button
+    undo_btn=None,       # Hides undo button
+    clear_btn=None       # Optional: hide clear button too
 )
+# Launch
 if __name__ == "__main__":
     demo.launch()