codescripts / battlescore.py
f541119578's picture
Upload folder using huggingface_hub
fdf190d verified
from transformers import AutoTokenizer
from vllm import LLM, SamplingParams
import argparse
import json
from tqdm import tqdm
import re
def extract_score(judgement):
d = {}
extracted = re.findall(r"\[\[(\d*\.\d+|\d+)/10\]\]", judgement, re.S)
if len(extracted) > 0:
d["score"] = float(extracted[-1])
return int(d["score"])
extracted = re.findall(r"\[\[(\d*\.\d+|\d+)\]\]", judgement, re.S)
if len(extracted) > 0:
d["score"] = float(extracted[-1])
return int(d["score"])
extracted = re.findall(r"\*\*Score: \[(\d*\.\d+|\d+)/10\]\*\*", judgement, re.S)
if len(extracted) > 0:
d["score"] = float(extracted[-1])
return int(d["score"])
extracted = re.findall(r"\*\*Score: \[(\d*\.\d+|\d+)\]\*\*", judgement, re.S)
if len(extracted) > 0:
d["score"] = float(extracted[-1])
return int(d["score"])
extracted = re.findall(r"\*\*Score: (\d*\.\d+|\d+)/10\*\*", judgement, re.S)
if len(extracted) > 0:
d["score"] = float(extracted[-1])
return int(d["score"])
extracted = re.findall(r"\*\*Score: (\d*\.\d+|\d+)\*\*", judgement, re.S)
if len(extracted) > 0:
d["score"] = float(extracted[-1])
return int(d["score"])
extracted = re.findall(r"\*\*Score:\*\* (\d*\.\d+|\d+)/10", judgement, re.S)
if len(extracted) > 0:
d["score"] = float(extracted[-1])
return int(d["score"])
extracted = re.findall(r"\*\*Score:\*\* (\d*\.\d+|\d+)", judgement, re.S)
if len(extracted) > 0:
d["score"] = float(extracted[-1])
return int(d["score"])
extracted = re.findall(r"Score(.*?)", judgement, re.S)
if len(extracted) > 0:
judgement = extracted[-1]
extracted = re.findall(r"\d*\.\d+|\d+", judgement, re.S)
if len(extracted) > 0:
d["score"] = float(extracted[-1])
return int(d["score"])
return -1
parser = argparse.ArgumentParser()
parser.add_argument('--judgename', type=str,help='模型路径')
parser.add_argument('--modelnames', nargs='+')
args = parser.parse_args()
f = open(f"/home/aiscuser/fhw/data/{args.judgename}_filtered_by_answer.json", "r+")
ddd = json.loads(f.readlines()[0])
fr = open(f"/home/aiscuser/fhw/data/{args.judgename}_answerby_{args.judgename}.json", 'r+')
linesr = fr.readlines()
all_lines = []
for modelname in args.modelnames:
f = open(f"/home/aiscuser/fhw/data/{args.judgename}_judge_{modelname}.json", 'r+')
all_lines.append(f.readlines())
a, b, c, d = 0, 0, 0, 0
fw = open(f"/home/aiscuser/fhw/data/{args.judgename}_with_best_answer.json", "w+")
for i in tqdm(ddd[args.judgename]):
reference = json.loads(linesr[i])
da = json.loads(all_lines[0][a]) if a<len(all_lines[0]) else json.loads(all_lines[0][0])
db = json.loads(all_lines[1][b]) if b<len(all_lines[1]) else json.loads(all_lines[1][0])
dc = json.loads(all_lines[2][c]) if c<len(all_lines[2]) else json.loads(all_lines[2][0])
dd = json.loads(all_lines[3][d]) if d<len(all_lines[3]) else json.loads(all_lines[3][0])
da["battlescore"], db["battlescore"], dc["battlescore"], dd["battlescore"] = -1, -1, -1, -1
if da["index"] == i:
da["battlescore"] = extract_score(da["battle"])
a = a + 1
if db["index"] == i:
db["battlescore"] = extract_score(db["battle"])
b = b + 1
if dc["index"] == i:
dc["battlescore"] = extract_score(dc["battle"])
c = c + 1
if dd["index"] == i:
dd["battlescore"] = extract_score(dd["battle"])
d = d + 1
instruction = reference["instruction"]
scorelist = [da["battlescore"], db["battlescore"], dc["battlescore"], dd["battlescore"]]
maxscore = max(scorelist)
maxindex = scorelist.index(maxscore)
if maxscore>6:
bestname = args.modelnames[maxindex]
bestanswer = [da, db, dc, dd][maxindex]["response"]
else:
bestname = args.judgename
bestanswer = reference["response"]
fw.write(json.dumps({"instruction": instruction, "scorelist": scorelist, "bestname": bestname, "bestanswer": bestanswer, "modelnames": args.modelnames, "judgename": args.judgename})+"\n")