import json | |
from tqdm import tqdm | |
f = open("/home/aiscuser/fhw/data/qwq_python_deduplicated.json", "r+") | |
fw = open("/home/aiscuser/fhw/data/qwq_python_length.json", "w+") | |
lines = f.readlines() | |
for line in tqdm(lines): | |
d = json.loads(line) | |
length = len(d["instruction"].split()) | |
if length <= 500: | |
fw.write(line) | |