File size: 331 Bytes
fdf190d
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
import json
from tqdm import tqdm
f = open("/home/aiscuser/fhw/data/qwq_python_deduplicated.json", "r+")
fw = open("/home/aiscuser/fhw/data/qwq_python_length.json", "w+")
lines = f.readlines()
for line in tqdm(lines):
    d = json.loads(line)
    length = len(d["instruction"].split())
    if length <= 500:
        fw.write(line)