File size: 671 Bytes
fdf190d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import re
import json
from tqdm import tqdm
import os
import random
f = open(f"/home/aiscuser/fhw/data/qwq_python_final.json", 'r+')
fw = open(f"/home/aiscuser/fhw/data/qwq_python_selected.json", 'w+')
lines = f.readlines()
random.shuffle(lines)
scores = {"1": 0, "2": 0, "3": 0, "4": 0, "5": 0, "6": 0, "7": 0, "8": 0, "9": 0, "10": 0}
for line in tqdm(lines):
    d = json.loads(line)
    if d["score"]>=9 and d["score"]<=10:
        d["instruction"] = d["instruction"].replace("<|start_header_id|>assistant", "")
        fw.write(line)
    
    if d["score"]>=1 and d["score"]<=10:
        scores[str(int(d["score"]))] = scores[str(int(d["score"]))] + 1
print(scores)