Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,33 +1,43 @@
|
|
1 |
-
# app.py
|
2 |
|
3 |
import torch
|
4 |
import gradio as gr
|
5 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
6 |
-
from peft import PeftModel
|
7 |
|
8 |
# --- KONFIGURASI ---
|
9 |
-
# Ganti dengan nama user/repo HF Anda tempat menyimpan adapter
|
10 |
ADAPTER_MODEL_ID = "DeryFerd/qwen2-0.5b-taksonomi-finetune"
|
11 |
BASE_MODEL_ID = "Qwen/Qwen2-0.5B-Instruct"
|
12 |
|
13 |
-
# --- Muat Model & Tokenizer ---
|
14 |
-
print("Loading base model...")
|
|
|
15 |
base_model = AutoModelForCausalLM.from_pretrained(
|
16 |
BASE_MODEL_ID,
|
17 |
-
torch_dtype=torch.
|
18 |
-
device_map="
|
19 |
)
|
20 |
|
21 |
print("Loading tokenizer...")
|
22 |
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
|
23 |
|
|
|
|
|
24 |
print("Loading LoRA adapter...")
|
25 |
model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL_ID)
|
26 |
-
print("Model ready!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
# --- Fungsi untuk Inferensi ---
|
29 |
def get_taxonomy(scientific_name):
|
30 |
-
# Template prompt yang sama
|
31 |
alpaca_prompt = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
|
32 |
|
33 |
### Instruction:
|
@@ -41,13 +51,16 @@ Provide the complete taxonomy for the given scientific name.
|
|
41 |
|
42 |
prompt = alpaca_prompt.format(scientific_name, "")
|
43 |
|
44 |
-
|
45 |
-
outputs =
|
46 |
-
|
47 |
-
# Decode dan bersihkan output
|
48 |
-
full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
49 |
-
response_only = full_text.split("### Response:")[1].strip()
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
return response_only
|
52 |
|
53 |
# --- Buat UI dengan Gradio ---
|
@@ -55,8 +68,8 @@ iface = gr.Interface(
|
|
55 |
fn=get_taxonomy,
|
56 |
inputs=gr.Textbox(lines=2, label="Scientific Name", placeholder="Contoh: Orycteropus afer"),
|
57 |
outputs=gr.Textbox(lines=5, label="Taksonomi Hasil Prediksi Model"),
|
58 |
-
title="Animal Taxonomy AI",
|
59 |
-
description="Demo fine-tuning LLM
|
60 |
allow_flagging="never"
|
61 |
)
|
62 |
|
|
|
1 |
+
# app.py (Versi CPU - Lambat)
|
2 |
|
3 |
import torch
|
4 |
import gradio as gr
|
5 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
|
|
6 |
|
7 |
# --- KONFIGURASI ---
|
|
|
8 |
ADAPTER_MODEL_ID = "DeryFerd/qwen2-0.5b-taksonomi-finetune"
|
9 |
BASE_MODEL_ID = "Qwen/Qwen2-0.5B-Instruct"
|
10 |
|
11 |
+
# --- Muat Model & Tokenizer untuk CPU ---
|
12 |
+
print("Loading base model onto CPU...")
|
13 |
+
# Perubahan 1: Memuat model tanpa optimasi 4-bit dan dengan tipe data standar
|
14 |
base_model = AutoModelForCausalLM.from_pretrained(
|
15 |
BASE_MODEL_ID,
|
16 |
+
torch_dtype=torch.float32, # Tipe data standar untuk CPU
|
17 |
+
device_map="cpu", # Perubahan 2: Paksa model untuk berjalan di CPU
|
18 |
)
|
19 |
|
20 |
print("Loading tokenizer...")
|
21 |
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
|
22 |
|
23 |
+
# Perubahan 3: Muat adapter LoRA dengan cara standar dari PEFT
|
24 |
+
from peft import PeftModel
|
25 |
print("Loading LoRA adapter...")
|
26 |
model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL_ID)
|
27 |
+
print("Model ready on CPU!")
|
28 |
+
|
29 |
+
# --- Buat Pipeline untuk Inferensi di CPU ---
|
30 |
+
# Perubahan 4: Menggunakan pipeline transformers standar yang lebih stabil
|
31 |
+
pipe = pipeline(
|
32 |
+
"text-generation",
|
33 |
+
model=model,
|
34 |
+
tokenizer=tokenizer,
|
35 |
+
device=-1, # -1 berarti menggunakan CPU
|
36 |
+
)
|
37 |
|
38 |
# --- Fungsi untuk Inferensi ---
|
39 |
def get_taxonomy(scientific_name):
|
40 |
+
# Template prompt yang sama
|
41 |
alpaca_prompt = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
|
42 |
|
43 |
### Instruction:
|
|
|
51 |
|
52 |
prompt = alpaca_prompt.format(scientific_name, "")
|
53 |
|
54 |
+
# Menjalankan pipeline
|
55 |
+
outputs = pipe(prompt, max_new_tokens=128, do_sample=False)
|
|
|
|
|
|
|
|
|
56 |
|
57 |
+
# Membersihkan output
|
58 |
+
generated_text = outputs[0]['generated_text']
|
59 |
+
try:
|
60 |
+
response_only = generated_text.split("### Response:")[1].strip()
|
61 |
+
except IndexError:
|
62 |
+
response_only = "Model tidak menghasilkan respons yang valid."
|
63 |
+
|
64 |
return response_only
|
65 |
|
66 |
# --- Buat UI dengan Gradio ---
|
|
|
68 |
fn=get_taxonomy,
|
69 |
inputs=gr.Textbox(lines=2, label="Scientific Name", placeholder="Contoh: Orycteropus afer"),
|
70 |
outputs=gr.Textbox(lines=5, label="Taksonomi Hasil Prediksi Model"),
|
71 |
+
title="Animal Taxonomy AI (CPU Version)",
|
72 |
+
description="Demo fine-tuning LLM yang berjalan di CPU. Harap bersabar, respons akan lebih lambat.",
|
73 |
allow_flagging="never"
|
74 |
)
|
75 |
|