# app.py (Versi CPU - Lambat) import torch import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline # --- KONFIGURASI --- ADAPTER_MODEL_ID = "DeryFerd/qwen2-0.5b-taksonomi-finetune" BASE_MODEL_ID = "Qwen/Qwen2-0.5B-Instruct" # --- Muat Model & Tokenizer untuk CPU --- print("Loading base model onto CPU...") # Perubahan 1: Memuat model tanpa optimasi 4-bit dan dengan tipe data standar base_model = AutoModelForCausalLM.from_pretrained( BASE_MODEL_ID, torch_dtype=torch.float32, # Tipe data standar untuk CPU device_map="cpu", # Perubahan 2: Paksa model untuk berjalan di CPU ) print("Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID) # Perubahan 3: Muat adapter LoRA dengan cara standar dari PEFT from peft import PeftModel print("Loading LoRA adapter...") model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL_ID) print("Model ready on CPU!") # --- Buat Pipeline untuk Inferensi di CPU --- # Perubahan 4: Menggunakan pipeline transformers standar yang lebih stabil pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, ) # --- Fungsi untuk Inferensi --- def get_taxonomy(scientific_name): # Template prompt yang sama alpaca_prompt = """Below is an instruction that describes a task. Write a response that appropriately completes the request. ### Instruction: Provide the complete taxonomy for the given scientific name. ### Input: {} ### Response: {}""" prompt = alpaca_prompt.format(scientific_name, "") # Menjalankan pipeline outputs = pipe(prompt, max_new_tokens=128, do_sample=False) # Membersihkan output generated_text = outputs[0]['generated_text'] try: response_only = generated_text.split("### Response:")[1].strip() except IndexError: response_only = "Model tidak menghasilkan respons yang valid." return response_only # --- Buat UI dengan Gradio --- iface = gr.Interface( fn=get_taxonomy, inputs=gr.Textbox(lines=2, label="Scientific Name", placeholder="Contoh: Orycteropus afer"), outputs=gr.Textbox(lines=5, label="Taksonomi Hasil Prediksi Model"), title="Animal Taxonomy AI (CPU Version)", description="Demo fine-tuning LLM yang berjalan di CPU. Harap bersabar, respons akan lebih lambat.", allow_flagging="never" ) iface.launch()