DeryFerd commited on
Commit
75c7966
·
verified ·
1 Parent(s): 3e5608c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -18
app.py CHANGED
@@ -1,33 +1,43 @@
1
- # app.py
2
 
3
  import torch
4
  import gradio as gr
5
- from transformers import AutoTokenizer, AutoModelForCausalLM
6
- from peft import PeftModel
7
 
8
  # --- KONFIGURASI ---
9
- # Ganti dengan nama user/repo HF Anda tempat menyimpan adapter
10
  ADAPTER_MODEL_ID = "DeryFerd/qwen2-0.5b-taksonomi-finetune"
11
  BASE_MODEL_ID = "Qwen/Qwen2-0.5B-Instruct"
12
 
13
- # --- Muat Model & Tokenizer ---
14
- print("Loading base model...")
 
15
  base_model = AutoModelForCausalLM.from_pretrained(
16
  BASE_MODEL_ID,
17
- torch_dtype=torch.bfloat16,
18
- device_map="auto",
19
  )
20
 
21
  print("Loading tokenizer...")
22
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
23
 
 
 
24
  print("Loading LoRA adapter...")
25
  model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL_ID)
26
- print("Model ready!")
 
 
 
 
 
 
 
 
 
27
 
28
  # --- Fungsi untuk Inferensi ---
29
  def get_taxonomy(scientific_name):
30
- # Template prompt yang sama seperti saat training
31
  alpaca_prompt = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
32
 
33
  ### Instruction:
@@ -41,13 +51,16 @@ Provide the complete taxonomy for the given scientific name.
41
 
42
  prompt = alpaca_prompt.format(scientific_name, "")
43
 
44
- inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
45
- outputs = model.generate(**inputs, max_new_tokens=128, do_sample=False)
46
-
47
- # Decode dan bersihkan output
48
- full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
49
- response_only = full_text.split("### Response:")[1].strip()
50
 
 
 
 
 
 
 
 
51
  return response_only
52
 
53
  # --- Buat UI dengan Gradio ---
@@ -55,8 +68,8 @@ iface = gr.Interface(
55
  fn=get_taxonomy,
56
  inputs=gr.Textbox(lines=2, label="Scientific Name", placeholder="Contoh: Orycteropus afer"),
57
  outputs=gr.Textbox(lines=5, label="Taksonomi Hasil Prediksi Model"),
58
- title="Animal Taxonomy AI",
59
- description="Demo fine-tuning LLM (Qwen2-0.5B) untuk memprediksi taksonomi hewan. Masukkan nama ilmiah untuk melihat hasilnya.",
60
  allow_flagging="never"
61
  )
62
 
 
1
+ # app.py (Versi CPU - Lambat)
2
 
3
  import torch
4
  import gradio as gr
5
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 
6
 
7
  # --- KONFIGURASI ---
 
8
  ADAPTER_MODEL_ID = "DeryFerd/qwen2-0.5b-taksonomi-finetune"
9
  BASE_MODEL_ID = "Qwen/Qwen2-0.5B-Instruct"
10
 
11
+ # --- Muat Model & Tokenizer untuk CPU ---
12
+ print("Loading base model onto CPU...")
13
+ # Perubahan 1: Memuat model tanpa optimasi 4-bit dan dengan tipe data standar
14
  base_model = AutoModelForCausalLM.from_pretrained(
15
  BASE_MODEL_ID,
16
+ torch_dtype=torch.float32, # Tipe data standar untuk CPU
17
+ device_map="cpu", # Perubahan 2: Paksa model untuk berjalan di CPU
18
  )
19
 
20
  print("Loading tokenizer...")
21
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
22
 
23
+ # Perubahan 3: Muat adapter LoRA dengan cara standar dari PEFT
24
+ from peft import PeftModel
25
  print("Loading LoRA adapter...")
26
  model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL_ID)
27
+ print("Model ready on CPU!")
28
+
29
+ # --- Buat Pipeline untuk Inferensi di CPU ---
30
+ # Perubahan 4: Menggunakan pipeline transformers standar yang lebih stabil
31
+ pipe = pipeline(
32
+ "text-generation",
33
+ model=model,
34
+ tokenizer=tokenizer,
35
+ device=-1, # -1 berarti menggunakan CPU
36
+ )
37
 
38
  # --- Fungsi untuk Inferensi ---
39
  def get_taxonomy(scientific_name):
40
+ # Template prompt yang sama
41
  alpaca_prompt = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
42
 
43
  ### Instruction:
 
51
 
52
  prompt = alpaca_prompt.format(scientific_name, "")
53
 
54
+ # Menjalankan pipeline
55
+ outputs = pipe(prompt, max_new_tokens=128, do_sample=False)
 
 
 
 
56
 
57
+ # Membersihkan output
58
+ generated_text = outputs[0]['generated_text']
59
+ try:
60
+ response_only = generated_text.split("### Response:")[1].strip()
61
+ except IndexError:
62
+ response_only = "Model tidak menghasilkan respons yang valid."
63
+
64
  return response_only
65
 
66
  # --- Buat UI dengan Gradio ---
 
68
  fn=get_taxonomy,
69
  inputs=gr.Textbox(lines=2, label="Scientific Name", placeholder="Contoh: Orycteropus afer"),
70
  outputs=gr.Textbox(lines=5, label="Taksonomi Hasil Prediksi Model"),
71
+ title="Animal Taxonomy AI (CPU Version)",
72
+ description="Demo fine-tuning LLM yang berjalan di CPU. Harap bersabar, respons akan lebih lambat.",
73
  allow_flagging="never"
74
  )
75