|
from transformers import AutoTokenizer |
|
import argparse |
|
import json |
|
from tqdm import tqdm |
|
import os |
|
|
|
parser = argparse.ArgumentParser() |
|
parser.add_argument('--judgemodel', type=str,help='评判模型') |
|
args = parser.parse_args() |
|
names = os.listdir("/home/aiscuser/fhw/data") |
|
all_lines=[] |
|
modelnames = [] |
|
for name in names: |
|
if f"{args.judgemodel}_answerby_" not in name: |
|
continue |
|
else: |
|
print(name) |
|
f = open(f"/home/aiscuser/fhw/data/{name}", 'r+') |
|
lines = f.readlines() |
|
modelname = name.replace(".json","").split("_")[-1] |
|
print(modelname) |
|
modelnames.append(modelname) |
|
all_lines.append(lines) |
|
|
|
t = 0 |
|
good0 = [] |
|
good1 = [] |
|
good2 = [] |
|
good3 = [] |
|
good4 = [] |
|
for line0, line1, line2, line3, line4 in tqdm(zip(all_lines[0], all_lines[1], all_lines[2], all_lines[3], all_lines[4])): |
|
d0 = json.loads(line0) |
|
d1 = json.loads(line1) |
|
d2 = json.loads(line2) |
|
d3 = json.loads(line3) |
|
d4 = json.loads(line4) |
|
len0 = len(d0["response"].split(" ")) |
|
len1 = len(d1["response"].split(" ")) |
|
len2 = len(d2["response"].split(" ")) |
|
len3 = len(d3["response"].split(" ")) |
|
len4 = len(d4["response"].split(" ")) |
|
if len0<=1800 and len0>=3: |
|
good0.append(t) |
|
if len1<=1800 and len1>=3: |
|
good1.append(t) |
|
if len2<=1800 and len2>=3: |
|
good2.append(t) |
|
if len3<=1800 and len3>=3: |
|
good3.append(t) |
|
if len4<=1800 and len4>=3: |
|
good4.append(t) |
|
t = t + 1 |
|
|
|
fw = open(f"/home/aiscuser/fhw/data/{args.judgemodel}_filtered_by_answer.json","w+") |
|
fw.write(json.dumps({modelnames[0]: good0, modelnames[1]: good1, modelnames[2]: good2, modelnames[3]: good3, modelnames[4]: good4})+"\n") |
|
|
|
|
|
|
|
|