codescripts / vllmarenaans.py
f541119578's picture
Upload folder using huggingface_hub
fdf190d verified
from transformers import AutoTokenizer
from vllm import LLM, SamplingParams
import argparse
import json
from tqdm import tqdm
parser = argparse.ArgumentParser()
parser.add_argument('--model', type=str,help='模型路径')
parser.add_argument('--judge', type=str,help='模型路径')
parser.add_argument('--split', type=str,help='模型路径')
args = parser.parse_args()
# Initialize the tokenizer
tokenizer = AutoTokenizer.from_pretrained(f"/home/aiscuser/fhw/model_weights/{args.model}", trust_remote_code=True)
# Input the model name or path. Can be GPTQ or AWQ models.
llm = LLM(f"/home/aiscuser/fhw/model_weights/{args.model}", dtype='float16', tensor_parallel_size=8, trust_remote_code=True, enforce_eager=True, max_model_len=8192)
sampling_params = SamplingParams(temperature=1.0, top_p=0.95, max_tokens=8192)
# Prepare your prompts
f = open(f"/home/aiscuser/fhw/data/{args.judge}_split_{args.split}.json", 'r+')
lines = f.readlines()
fw = open(f"/home/aiscuser/fhw/data/{args.judge}_split_{args.split}_answerby_{args.model}.json", 'w+')
prompts = []
for line in tqdm(lines):
d = json.loads(line)
instruction = d["instruction"]
messages = [{"role": "user", "content": instruction}]
text = tokenizer.apply_chat_template(
messages,
tokenize=False
)
prompts.append(text)
outputs = llm.generate(prompts=prompts, sampling_params=sampling_params)
for line, output in zip(lines, outputs):
d =json.loads(line)
d["response"] = output.outputs[0].text
fw.write(json.dumps(d)+"\n")