import re | |
import json | |
from tqdm import tqdm | |
import os | |
import random | |
f = open(f"/home/aiscuser/fhw/data/qwq_python_final.json", 'r+') | |
fw = open(f"/home/aiscuser/fhw/data/qwq_python_selected.json", 'w+') | |
lines = f.readlines() | |
random.shuffle(lines) | |
scores = {"1": 0, "2": 0, "3": 0, "4": 0, "5": 0, "6": 0, "7": 0, "8": 0, "9": 0, "10": 0} | |
for line in tqdm(lines): | |
d = json.loads(line) | |
if d["score"]>=9 and d["score"]<=10: | |
d["instruction"] = d["instruction"].replace("<|start_header_id|>assistant", "") | |
fw.write(line) | |
if d["score"]>=1 and d["score"]<=10: | |
scores[str(int(d["score"]))] = scores[str(int(d["score"]))] + 1 | |
print(scores) | |