|
from transformers import AutoTokenizer |
|
from vllm import LLM, SamplingParams |
|
import argparse |
|
import json |
|
from tqdm import tqdm |
|
import re |
|
def extract_score(judgement): |
|
d = {} |
|
extracted = re.findall(r"\[\[(\d*\.\d+|\d+)/10\]\]", judgement, re.S) |
|
if len(extracted) > 0: |
|
d["score"] = float(extracted[-1]) |
|
return int(d["score"]) |
|
extracted = re.findall(r"\[\[(\d*\.\d+|\d+)\]\]", judgement, re.S) |
|
if len(extracted) > 0: |
|
d["score"] = float(extracted[-1]) |
|
return int(d["score"]) |
|
extracted = re.findall(r"\*\*Score: \[(\d*\.\d+|\d+)/10\]\*\*", judgement, re.S) |
|
if len(extracted) > 0: |
|
d["score"] = float(extracted[-1]) |
|
return int(d["score"]) |
|
extracted = re.findall(r"\*\*Score: \[(\d*\.\d+|\d+)\]\*\*", judgement, re.S) |
|
if len(extracted) > 0: |
|
d["score"] = float(extracted[-1]) |
|
return int(d["score"]) |
|
extracted = re.findall(r"\*\*Score: (\d*\.\d+|\d+)/10\*\*", judgement, re.S) |
|
if len(extracted) > 0: |
|
d["score"] = float(extracted[-1]) |
|
return int(d["score"]) |
|
extracted = re.findall(r"\*\*Score: (\d*\.\d+|\d+)\*\*", judgement, re.S) |
|
if len(extracted) > 0: |
|
d["score"] = float(extracted[-1]) |
|
return int(d["score"]) |
|
extracted = re.findall(r"\*\*Score:\*\* (\d*\.\d+|\d+)/10", judgement, re.S) |
|
if len(extracted) > 0: |
|
d["score"] = float(extracted[-1]) |
|
return int(d["score"]) |
|
extracted = re.findall(r"\*\*Score:\*\* (\d*\.\d+|\d+)", judgement, re.S) |
|
if len(extracted) > 0: |
|
d["score"] = float(extracted[-1]) |
|
return int(d["score"]) |
|
extracted = re.findall(r"Score(.*?)", judgement, re.S) |
|
if len(extracted) > 0: |
|
judgement = extracted[-1] |
|
extracted = re.findall(r"\d*\.\d+|\d+", judgement, re.S) |
|
if len(extracted) > 0: |
|
d["score"] = float(extracted[-1]) |
|
return int(d["score"]) |
|
return -1 |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument('--judgename', type=str,help='模型路径') |
|
parser.add_argument('--modelnames', nargs='+') |
|
args = parser.parse_args() |
|
|
|
f = open(f"/home/aiscuser/fhw/data/{args.judgename}_filtered_by_answer.json", "r+") |
|
ddd = json.loads(f.readlines()[0]) |
|
|
|
fr = open(f"/home/aiscuser/fhw/data/{args.judgename}_answerby_{args.judgename}.json", 'r+') |
|
linesr = fr.readlines() |
|
|
|
all_lines = [] |
|
for modelname in args.modelnames: |
|
f = open(f"/home/aiscuser/fhw/data/{args.judgename}_judge_{modelname}.json", 'r+') |
|
all_lines.append(f.readlines()) |
|
|
|
|
|
a, b, c, d = 0, 0, 0, 0 |
|
|
|
fw = open(f"/home/aiscuser/fhw/data/{args.judgename}_with_best_answer.json", "w+") |
|
|
|
for i in tqdm(ddd[args.judgename]): |
|
reference = json.loads(linesr[i]) |
|
da = json.loads(all_lines[0][a]) if a<len(all_lines[0]) else json.loads(all_lines[0][0]) |
|
db = json.loads(all_lines[1][b]) if b<len(all_lines[1]) else json.loads(all_lines[1][0]) |
|
dc = json.loads(all_lines[2][c]) if c<len(all_lines[2]) else json.loads(all_lines[2][0]) |
|
dd = json.loads(all_lines[3][d]) if d<len(all_lines[3]) else json.loads(all_lines[3][0]) |
|
|
|
da["battlescore"], db["battlescore"], dc["battlescore"], dd["battlescore"] = -1, -1, -1, -1 |
|
|
|
if da["index"] == i: |
|
da["battlescore"] = extract_score(da["battle"]) |
|
a = a + 1 |
|
if db["index"] == i: |
|
db["battlescore"] = extract_score(db["battle"]) |
|
b = b + 1 |
|
if dc["index"] == i: |
|
dc["battlescore"] = extract_score(dc["battle"]) |
|
c = c + 1 |
|
if dd["index"] == i: |
|
dd["battlescore"] = extract_score(dd["battle"]) |
|
d = d + 1 |
|
|
|
instruction = reference["instruction"] |
|
scorelist = [da["battlescore"], db["battlescore"], dc["battlescore"], dd["battlescore"]] |
|
maxscore = max(scorelist) |
|
maxindex = scorelist.index(maxscore) |
|
|
|
if maxscore>6: |
|
bestname = args.modelnames[maxindex] |
|
bestanswer = [da, db, dc, dd][maxindex]["response"] |
|
else: |
|
bestname = args.judgename |
|
bestanswer = reference["response"] |
|
fw.write(json.dumps({"instruction": instruction, "scorelist": scorelist, "bestname": bestname, "bestanswer": bestanswer, "modelnames": args.modelnames, "judgename": args.judgename})+"\n") |
|
|
|
|
|
|