File size: 331 Bytes
fdf190d |
1 2 3 4 5 6 7 8 9 10 11 |
import json
from tqdm import tqdm
f = open("/home/aiscuser/fhw/data/qwq_python_deduplicated.json", "r+")
fw = open("/home/aiscuser/fhw/data/qwq_python_length.json", "w+")
lines = f.readlines()
for line in tqdm(lines):
d = json.loads(line)
length = len(d["instruction"].split())
if length <= 500:
fw.write(line)
|