codescripts / extractscore.py
f541119578's picture
Upload folder using huggingface_hub
fdf190d verified
import re
import json
from tqdm import tqdm
import os
#names = os.listdir("processed_data")
f = open("/home/aiscuser/fhw/data/llama_instruct_final.json", "r+")
fw = open("/home/aiscuser/fhw/data/llama_instruct_selected.json", 'w+')
"""
lines = []
for name in names:
if "llama_python_scored" in name:
f = open(f"processed_data/{name}", 'r+')
lines.extend(f.readlines())
"""
lines = f.readlines()
for line in tqdm(lines):
d = json.loads(line)
instruction = d["instruction"]
judgement = d["quality_judgement"]
extracted = re.findall(r"\[\[(\d*\.\d+|\d+)/10\]\]", judgement, re.S)
if len(extracted) > 0:
d["score"] = float(extracted[-1])
fw.write(json.dumps(d)+"\n")
continue
extracted = re.findall(r"\[\[(\d*\.\d+|\d+)\]\]", judgement, re.S)
if len(extracted) > 0:
d["score"] = float(extracted[-1])
fw.write(json.dumps(d)+"\n")
continue
extracted = re.findall(r"\*\*Score: \[(\d*\.\d+|\d+)/10\]\*\*", judgement, re.S)
if len(extracted) > 0:
d["score"] = float(extracted[-1])
fw.write(json.dumps(d)+"\n")
continue
extracted = re.findall(r"\*\*Score: \[(\d*\.\d+|\d+)\]\*\*", judgement, re.S)
if len(extracted) > 0:
d["score"] = float(extracted[-1])
fw.write(json.dumps(d)+"\n")
continue
extracted = re.findall(r"\*\*Score: (\d*\.\d+|\d+)/10\*\*", judgement, re.S)
if len(extracted) > 0:
d["score"] = float(extracted[-1])
fw.write(json.dumps(d)+"\n")
continue
extracted = re.findall(r"\*\*Score: (\d*\.\d+|\d+)\*\*", judgement, re.S)
if len(extracted) > 0:
d["score"] = float(extracted[-1])
fw.write(json.dumps(d)+"\n")
continue
extracted = re.findall(r"\*\*Score:\*\* (\d*\.\d+|\d+)/10", judgement, re.S)
if len(extracted) > 0:
d["score"] = float(extracted[-1])
fw.write(json.dumps(d)+"\n")
continue
extracted = re.findall(r"\*\*Score:\*\* (\d*\.\d+|\d+)", judgement, re.S)
if len(extracted) > 0:
d["score"] = float(extracted[-1])
fw.write(json.dumps(d)+"\n")
continue
extracted = re.findall(r"Score(.*?)", judgement, re.S)
if len(extracted) > 0:
judgement = extracted[-1]
extracted = re.findall(r"\d*\.\d+|\d+", judgement, re.S)
if len(extracted) > 0:
d["score"] = float(extracted[-1])
fw.write(json.dumps(d)+"\n")
continue
extracted = re.findall(r"\d*\.\d+|\d+", judgement, re.S)
if len(extracted) > 0:
d["score"] = float(extracted[0])
fw.write(json.dumps(d)+"\n")
continue
#print("###########################################")