codescripts / split.py
f541119578's picture
Upload folder using huggingface_hub
fdf190d verified
from tqdm import tqdm
import json
f = open("/home/aiscuser/fhw/data/all_instruct_with_answers_cleaned.json", "r+")
lines = f.readlines()
a, b, c, d, e = 0, 0, 0, 0, 0
linesa, linesb, linesc, linesd, linese = [], [], [], [], []
for line in tqdm(lines):
dd = json.loads(line)
if dd["judgename"] == "athene":
a +=1
linesa.append(line)
continue
if dd["judgename"] == "deepseekcoder":
b +=1
linesb.append(line)
continue
if dd["judgename"] == "llama":
c +=1
linesc.append(line)
continue
if dd["judgename"] == "qwen":
d +=1
linesd.append(line)
continue
if dd["judgename"] == "qwq":
e +=1
linese.append(line)
continue
print({"athene": a, "deepseekcoder": b, "llama": c, "qwen": d, "qwq": e})
fw = open("/home/aiscuser/fhw/data/athene_split_0.json", "w+")
for line in linesa[:10747]:
fw.write(line)
fw = open("/home/aiscuser/fhw/data/athene_split_1.json", "w+")
for line in linesa[10747:21494]:
fw.write(line)
fw = open("/home/aiscuser/fhw/data/athene_split_2.json", "w+")
for line in linesa[21494:32241]:
fw.write(line)
fw = open("/home/aiscuser/fhw/data/athene_split_3.json", "w+")
for line in linesa[32241:42988]:
fw.write(line)
fw = open("/home/aiscuser/fhw/data/athene_split_4.json", "w+")
for line in linesa[42988:53735]:
fw.write(line)
fw = open("/home/aiscuser/fhw/data/athene_split_5.json", "w+")
for line in linesa[53735:64481]:
fw.write(line)
fw = open("/home/aiscuser/fhw/data/deepseekcoder_split_0.json", "w+")
for line in linesb[:11219]:
fw.write(line)
fw = open("/home/aiscuser/fhw/data/deepseekcoder_split_1.json", "w+")
for line in linesb[11219:22438]:
fw.write(line)
fw = open("/home/aiscuser/fhw/data/deepseekcoder_split_2.json", "w+")
for line in linesb[22438:33658]:
fw.write(line)
fw = open("/home/aiscuser/fhw/data/deepseekcoder_split_3.json", "w+")
for line in linesb[33658:44877]:
fw.write(line)
fw = open("/home/aiscuser/fhw/data/deepseekcoder_split_3.json", "w+")
for line in linesb[44877:56096]:
fw.write(line)
fw = open("/home/aiscuser/fhw/data/deepseekcoder_split_3.json", "w+")
for line in linesb[56096:67316]:
fw.write(line)
fw = open("/home/aiscuser/fhw/data/llama_split_0.json", "w+")
for line in linesc[:11604]:
fw.write(line)
fw = open("/home/aiscuser/fhw/data/llama_split_1.json", "w+")
for line in linesc[11604:23208]:
fw.write(line)
fw = open("/home/aiscuser/fhw/data/llama_split_2.json", "w+")
for line in linesc[23208:34813]:
fw.write(line)
fw = open("/home/aiscuser/fhw/data/llama_split_3.json", "w+")
for line in linesc[34813:46417]:
fw.write(line)
fw = open("/home/aiscuser/fhw/data/llama_split_4.json", "w+")
for line in linesc[46417:58021]:
fw.write(line)
fw = open("/home/aiscuser/fhw/data/llama_split_5.json", "w+")
for line in linesc[58021:69626]:
fw.write(line)
fw = open("/home/aiscuser/fhw/data/qwen_split_0.json", "w+")
for line in linesd[:10607]:
fw.write(line)
fw = open("/home/aiscuser/fhw/data/qwen_split_1.json", "w+")
for line in linesd[10607:21213]:
fw.write(line)
fw = open("/home/aiscuser/fhw/data/qwen_split_2.json", "w+")
for line in linesd[21213:31820]:
fw.write(line)
fw = open("/home/aiscuser/fhw/data/qwen_split_3.json", "w+")
for line in linesd[31820:42427]:
fw.write(line)
fw = open("/home/aiscuser/fhw/data/qwen_split_4.json", "w+")
for line in linesd[42427:53034]:
fw.write(line)
fw = open("/home/aiscuser/fhw/data/qwen_split_5.json", "w+")
for line in linesd[53034:63641]:
fw.write(line)
fw = open("/home/aiscuser/fhw/data/qwq_split_0.json", "w+")
for line in linesd[:7776]:
fw.write(line)
fw = open("/home/aiscuser/fhw/data/qwq_split_1.json", "w+")
for line in linesd[7776:15552]:
fw.write(line)
fw = open("/home/aiscuser/fhw/data/qwq_split_2.json", "w+")
for line in linesd[15552:23328]:
fw.write(line)
fw = open("/home/aiscuser/fhw/data/qwq_split_3.json", "w+")
for line in linesd[23328:31104]:
fw.write(line)
fw = open("/home/aiscuser/fhw/data/qwq_split_4.json", "w+")
for line in linesd[31104:38880]:
fw.write(line)
fw = open("/home/aiscuser/fhw/data/qwq_split_5.json", "w+")
for line in linesd[38880:46656]:
fw.write(line)