hhelesto commited on
Commit
64eb70f
·
verified ·
1 Parent(s): 995f124

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -0
app.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
3
+ from peft import PeftModel
4
+ import gradio as gr
5
+
6
+ # --- Load Model & Tokenizer ---
7
+
8
+ base_model_name = "unsloth/llama-3.2-3b-bnb-4bit"
9
+ adapter_model_name = "aismaanly/ai_synthetic"
10
+
11
+ bnb_config = BitsAndBytesConfig(
12
+ load_in_4bit=True,
13
+ bnb_4bit_quant_type="nf4",
14
+ bnb_4bit_use_double_quant=True,
15
+ bnb_4bit_compute_dtype=torch.bfloat16
16
+ )
17
+
18
+ print("Loading base model...")
19
+ model = AutoModelForCausalLM.from_pretrained(
20
+ base_model_name,
21
+ quantization_config=bnb_config,
22
+ device_map="auto"
23
+ )
24
+
25
+ print("Loading tokenizer...")
26
+ tokenizer = AutoTokenizer.from_pretrained(base_model_name)
27
+
28
+ print("Loading PEFT adapter...")
29
+ model = PeftModel.from_pretrained(model, adapter_model_name)
30
+ model = model.merge_and_unload()
31
+ print("Model ready!")
32
+
33
+ # --- Gradio Function ---
34
+
35
+ def generate_text(prompt):
36
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
37
+ outputs = model.generate(**inputs, max_new_tokens=100)
38
+ text = tokenizer.decode(outputs[0], skip_special_tokens=True)
39
+ return text
40
+
41
+ # --- Gradio Interface ---
42
+ def chat_fn(message, history):
43
+ prompt = message
44
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
45
+ outputs = model.generate(**inputs, max_new_tokens=100)
46
+ text = tokenizer.decode(outputs[0], skip_special_tokens=True)
47
+ return text
48
+
49
+ gr.ChatInterface(
50
+ fn=chat_fn,
51
+ title="LLM Finetuned Comment Generator",
52
+ description="Chat with the model.",
53
+ ).launch(share=True)