Spaces:

DeryFerd
/

Finetune-LLM-Taxonomy

Sleeping

App Files Files Community

DeryFerd commited on Aug 13

Commit

75c7966

verified ·

1 Parent(s): 3e5608c

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -18

app.py CHANGED Viewed

@@ -1,33 +1,43 @@
-# app.py
 import torch
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForCausalLM
-from peft import PeftModel
 # --- KONFIGURASI ---
-# Ganti dengan nama user/repo HF Anda tempat menyimpan adapter
 ADAPTER_MODEL_ID = "DeryFerd/qwen2-0.5b-taksonomi-finetune"
 BASE_MODEL_ID = "Qwen/Qwen2-0.5B-Instruct"
-# --- Muat Model & Tokenizer ---
-print("Loading base model...")
 base_model = AutoModelForCausalLM.from_pretrained(
     BASE_MODEL_ID,
-    torch_dtype=torch.bfloat16,
-    device_map="auto",
 )
 print("Loading tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
 print("Loading LoRA adapter...")
 model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL_ID)
-print("Model ready!")
 # --- Fungsi untuk Inferensi ---
 def get_taxonomy(scientific_name):
-    # Template prompt yang sama seperti saat training
     alpaca_prompt = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
 ### Instruction:
@@ -41,13 +51,16 @@ Provide the complete taxonomy for the given scientific name.
     prompt = alpaca_prompt.format(scientific_name, "")
-    inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
-    outputs = model.generate(**inputs, max_new_tokens=128, do_sample=False)
-    # Decode dan bersihkan output
-    full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    response_only = full_text.split("### Response:")[1].strip()
     return response_only
 # --- Buat UI dengan Gradio ---
@@ -55,8 +68,8 @@ iface = gr.Interface(
     fn=get_taxonomy,
     inputs=gr.Textbox(lines=2, label="Scientific Name", placeholder="Contoh: Orycteropus afer"),
     outputs=gr.Textbox(lines=5, label="Taksonomi Hasil Prediksi Model"),
-    title="Animal Taxonomy AI",
-    description="Demo fine-tuning LLM (Qwen2-0.5B) untuk memprediksi taksonomi hewan. Masukkan nama ilmiah untuk melihat hasilnya.",
     allow_flagging="never"
 )

+# app.py (Versi CPU - Lambat)
 import torch
 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 # --- KONFIGURASI ---
 ADAPTER_MODEL_ID = "DeryFerd/qwen2-0.5b-taksonomi-finetune"
 BASE_MODEL_ID = "Qwen/Qwen2-0.5B-Instruct"
+# --- Muat Model & Tokenizer untuk CPU ---
+print("Loading base model onto CPU...")
+# Perubahan 1: Memuat model tanpa optimasi 4-bit dan dengan tipe data standar
 base_model = AutoModelForCausalLM.from_pretrained(
     BASE_MODEL_ID,
+    torch_dtype=torch.float32, # Tipe data standar untuk CPU
+    device_map="cpu",         # Perubahan 2: Paksa model untuk berjalan di CPU
 )
 print("Loading tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
+# Perubahan 3: Muat adapter LoRA dengan cara standar dari PEFT
+from peft import PeftModel
 print("Loading LoRA adapter...")
 model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL_ID)
+print("Model ready on CPU!")
+# --- Buat Pipeline untuk Inferensi di CPU ---
+# Perubahan 4: Menggunakan pipeline transformers standar yang lebih stabil
+pipe = pipeline(
+    "text-generation",
+    model=model,
+    tokenizer=tokenizer,
+    device=-1, # -1 berarti menggunakan CPU
+)
 # --- Fungsi untuk Inferensi ---
 def get_taxonomy(scientific_name):
+    # Template prompt yang sama
     alpaca_prompt = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
 ### Instruction:
     prompt = alpaca_prompt.format(scientific_name, "")
+    # Menjalankan pipeline
+    outputs = pipe(prompt, max_new_tokens=128, do_sample=False)
+    # Membersihkan output
+    generated_text = outputs[0]['generated_text']
+    try:
+        response_only = generated_text.split("### Response:")[1].strip()
+    except IndexError:
+        response_only = "Model tidak menghasilkan respons yang valid."
     return response_only
 # --- Buat UI dengan Gradio ---
     fn=get_taxonomy,
     inputs=gr.Textbox(lines=2, label="Scientific Name", placeholder="Contoh: Orycteropus afer"),
     outputs=gr.Textbox(lines=5, label="Taksonomi Hasil Prediksi Model"),
+    title="Animal Taxonomy AI (CPU Version)",
+    description="Demo fine-tuning LLM yang berjalan di CPU. Harap bersabar, respons akan lebih lambat.",
     allow_flagging="never"
 )