import re import json from tqdm import tqdm import os import random f = open(f"/home/aiscuser/fhw/data/qwq_python_final.json", 'r+') fw = open(f"/home/aiscuser/fhw/data/qwq_python_selected.json", 'w+') lines = f.readlines() random.shuffle(lines) scores = {"1": 0, "2": 0, "3": 0, "4": 0, "5": 0, "6": 0, "7": 0, "8": 0, "9": 0, "10": 0} for line in tqdm(lines): d = json.loads(line) if d["score"]>=9 and d["score"]<=10: d["instruction"] = d["instruction"].replace("<|start_header_id|>assistant", "") fw.write(line) if d["score"]>=1 and d["score"]<=10: scores[str(int(d["score"]))] = scores[str(int(d["score"]))] + 1 print(scores)