|
import json |
|
import os |
|
import torch |
|
import random |
|
import numpy as np |
|
from tqdm import tqdm |
|
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig |
|
|
|
|
|
seed = 42 |
|
torch.manual_seed(seed) |
|
random.seed(seed) |
|
np.random.seed(seed) |
|
if torch.cuda.is_available(): |
|
torch.cuda.manual_seed_all(seed) |
|
torch.backends.cudnn.deterministic = True |
|
torch.backends.cudnn.benchmark = False |
|
print(f"🔒 시드 고정 완료 (seed={seed})") |
|
|
|
|
|
fp16_ckpt = "sooh098/midmb-kculture-qa" |
|
test_json_path = "../data/korean_culture_qa_V1.0_test+.json" |
|
test_jsonl_path = "../data/test.jsonl" |
|
output_json_path = "../data/output.json" |
|
use_cuda = torch.cuda.is_available() |
|
|
|
|
|
bnb_config = BitsAndBytesConfig( |
|
load_in_4bit=True, |
|
bnb_4bit_quant_type="nf4", |
|
bnb_4bit_compute_dtype=torch.float16 |
|
) |
|
|
|
|
|
print("🚀 4bit 모델 로드 중...") |
|
tokenizer = AutoTokenizer.from_pretrained( |
|
fp16_ckpt, |
|
trust_remote_code=True, |
|
use_fast=True |
|
) |
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
fp16_ckpt, |
|
quantization_config=bnb_config, |
|
device_map={"": 0}, |
|
torch_dtype=torch.float16 if use_cuda else torch.float32, |
|
trust_remote_code=True |
|
) |
|
model.eval() |
|
print("✅ 4bit 모델 로드 완료") |
|
|
|
|
|
predictions = [] |
|
with open(test_jsonl_path, "r", encoding="utf-8") as f: |
|
lines = f.readlines() |
|
|
|
for i, line in enumerate(tqdm(lines, desc="🔍 예측 생성 중", unit="샘플")): |
|
sample = json.loads(line) |
|
input_text = sample["input"] |
|
instruction = sample.get("instruction", "") |
|
|
|
|
|
prompt = ( |
|
"<|begin_of_text|>\n" |
|
f"<|start_header_id|>system<|end_header_id|>\n{instruction}\n" |
|
"<|eot_id|>\n" |
|
f"<|start_header_id|>user<|end_header_id|>\n{input_text}\n" |
|
"<|eot_id|>\n" |
|
"<|start_header_id|>assistant<|end_header_id|>\n" |
|
) |
|
|
|
inputs = tokenizer(prompt, return_tensors="pt").to(model.device) |
|
if "token_type_ids" in inputs: |
|
del inputs["token_type_ids"] |
|
|
|
with torch.no_grad(): |
|
|
|
torch.manual_seed(seed) |
|
output_ids = model.generate( |
|
**inputs, |
|
max_new_tokens=256, |
|
repetition_penalty=1.05, |
|
temperature=0.28, |
|
top_p=0.85, |
|
top_k=20, |
|
do_sample=True, |
|
eos_token_id=tokenizer.eos_token_id, |
|
pad_token_id=tokenizer.pad_token_id |
|
) |
|
|
|
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=False) |
|
raw_response = output_text.split("<|start_header_id|>assistant<|end_header_id|>\n")[-1] |
|
prediction = raw_response.split("<|end_of_text|>")[0].strip() |
|
predictions.append(prediction) |
|
|
|
print(f"\n📝 샘플 {i + 1}") |
|
print(f"🤖 예측: {prediction}") |
|
|
|
|
|
with open(test_json_path, "r", encoding="utf-8") as f: |
|
test_data = json.load(f) |
|
|
|
|
|
for i, item in enumerate(test_data): |
|
answer = predictions[i] if i < len(predictions) else "" |
|
item["output"] = {"answer": answer} |
|
|
|
|
|
os.makedirs(os.path.dirname(output_json_path), exist_ok=True) |
|
with open(output_json_path, "w", encoding="utf-8") as f: |
|
json.dump(test_data, f, ensure_ascii=False, indent=2) |
|
|
|
print(f"\n✅ 최종 결과가 '{output_json_path}'에 저장되었습니다.") |
|
|