Spaces:

DeryFerd
/

Finetune-LLM-Taxonomy

Sleeping

App Files Files Community

Finetune-LLM-Taxonomy / app.py

DeryFerd

Update app.py

0d0f756 verified 28 days ago

raw

history blame contribute delete

2.37 kB

	# app.py (Versi CPU - Lambat)

	import torch
	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

	# --- KONFIGURASI ---
	ADAPTER_MODEL_ID = "DeryFerd/qwen2-0.5b-taksonomi-finetune"
	BASE_MODEL_ID = "Qwen/Qwen2-0.5B-Instruct"

	# --- Muat Model & Tokenizer untuk CPU ---
	print("Loading base model onto CPU...")
	# Perubahan 1: Memuat model tanpa optimasi 4-bit dan dengan tipe data standar
	base_model = AutoModelForCausalLM.from_pretrained(
	BASE_MODEL_ID,
	torch_dtype=torch.float32, # Tipe data standar untuk CPU
	device_map="cpu", # Perubahan 2: Paksa model untuk berjalan di CPU
	)

	print("Loading tokenizer...")
	tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)

	# Perubahan 3: Muat adapter LoRA dengan cara standar dari PEFT
	from peft import PeftModel
	print("Loading LoRA adapter...")
	model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL_ID)
	print("Model ready on CPU!")

	# --- Buat Pipeline untuk Inferensi di CPU ---
	# Perubahan 4: Menggunakan pipeline transformers standar yang lebih stabil
	pipe = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	)

	# --- Fungsi untuk Inferensi ---
	def get_taxonomy(scientific_name):
	# Template prompt yang sama
	alpaca_prompt = """Below is an instruction that describes a task. Write a response that appropriately completes the request.

	### Instruction:
	Provide the complete taxonomy for the given scientific name.

	### Input:
	{}

	### Response:
	{}"""

	prompt = alpaca_prompt.format(scientific_name, "")

	# Menjalankan pipeline
	outputs = pipe(prompt, max_new_tokens=128, do_sample=False)

	# Membersihkan output
	generated_text = outputs[0]['generated_text']
	try:
	response_only = generated_text.split("### Response:")[1].strip()
	except IndexError:
	response_only = "Model tidak menghasilkan respons yang valid."

	return response_only

	# --- Buat UI dengan Gradio ---
	iface = gr.Interface(
	fn=get_taxonomy,
	inputs=gr.Textbox(lines=2, label="Scientific Name", placeholder="Contoh: Orycteropus afer"),
	outputs=gr.Textbox(lines=5, label="Taksonomi Hasil Prediksi Model"),
	title="Animal Taxonomy AI (CPU Version)",
	description="Demo fine-tuning LLM yang berjalan di CPU. Harap bersabar, respons akan lebih lambat.",
	allow_flagging="never"
	)

	iface.launch()