codescripts / selectscore.py
f541119578's picture
Upload folder using huggingface_hub
fdf190d verified
raw
history blame contribute delete
671 Bytes
import re
import json
from tqdm import tqdm
import os
import random
f = open(f"/home/aiscuser/fhw/data/qwq_python_final.json", 'r+')
fw = open(f"/home/aiscuser/fhw/data/qwq_python_selected.json", 'w+')
lines = f.readlines()
random.shuffle(lines)
scores = {"1": 0, "2": 0, "3": 0, "4": 0, "5": 0, "6": 0, "7": 0, "8": 0, "9": 0, "10": 0}
for line in tqdm(lines):
d = json.loads(line)
if d["score"]>=9 and d["score"]<=10:
d["instruction"] = d["instruction"].replace("<|start_header_id|>assistant", "")
fw.write(line)
if d["score"]>=1 and d["score"]<=10:
scores[str(int(d["score"]))] = scores[str(int(d["score"]))] + 1
print(scores)