|
import re |
|
import json |
|
from tqdm import tqdm |
|
import os |
|
|
|
f = open("/home/aiscuser/fhw/data/llama_instruct_final.json", "r+") |
|
fw = open("/home/aiscuser/fhw/data/llama_instruct_selected.json", 'w+') |
|
""" |
|
lines = [] |
|
for name in names: |
|
if "llama_python_scored" in name: |
|
f = open(f"processed_data/{name}", 'r+') |
|
lines.extend(f.readlines()) |
|
""" |
|
lines = f.readlines() |
|
for line in tqdm(lines): |
|
d = json.loads(line) |
|
instruction = d["instruction"] |
|
judgement = d["quality_judgement"] |
|
extracted = re.findall(r"\[\[(\d*\.\d+|\d+)/10\]\]", judgement, re.S) |
|
if len(extracted) > 0: |
|
d["score"] = float(extracted[-1]) |
|
fw.write(json.dumps(d)+"\n") |
|
continue |
|
extracted = re.findall(r"\[\[(\d*\.\d+|\d+)\]\]", judgement, re.S) |
|
if len(extracted) > 0: |
|
d["score"] = float(extracted[-1]) |
|
fw.write(json.dumps(d)+"\n") |
|
continue |
|
extracted = re.findall(r"\*\*Score: \[(\d*\.\d+|\d+)/10\]\*\*", judgement, re.S) |
|
if len(extracted) > 0: |
|
d["score"] = float(extracted[-1]) |
|
fw.write(json.dumps(d)+"\n") |
|
continue |
|
extracted = re.findall(r"\*\*Score: \[(\d*\.\d+|\d+)\]\*\*", judgement, re.S) |
|
if len(extracted) > 0: |
|
d["score"] = float(extracted[-1]) |
|
fw.write(json.dumps(d)+"\n") |
|
continue |
|
extracted = re.findall(r"\*\*Score: (\d*\.\d+|\d+)/10\*\*", judgement, re.S) |
|
if len(extracted) > 0: |
|
d["score"] = float(extracted[-1]) |
|
fw.write(json.dumps(d)+"\n") |
|
continue |
|
extracted = re.findall(r"\*\*Score: (\d*\.\d+|\d+)\*\*", judgement, re.S) |
|
if len(extracted) > 0: |
|
d["score"] = float(extracted[-1]) |
|
fw.write(json.dumps(d)+"\n") |
|
continue |
|
extracted = re.findall(r"\*\*Score:\*\* (\d*\.\d+|\d+)/10", judgement, re.S) |
|
if len(extracted) > 0: |
|
d["score"] = float(extracted[-1]) |
|
fw.write(json.dumps(d)+"\n") |
|
continue |
|
extracted = re.findall(r"\*\*Score:\*\* (\d*\.\d+|\d+)", judgement, re.S) |
|
if len(extracted) > 0: |
|
d["score"] = float(extracted[-1]) |
|
fw.write(json.dumps(d)+"\n") |
|
continue |
|
extracted = re.findall(r"Score(.*?)", judgement, re.S) |
|
if len(extracted) > 0: |
|
judgement = extracted[-1] |
|
extracted = re.findall(r"\d*\.\d+|\d+", judgement, re.S) |
|
if len(extracted) > 0: |
|
d["score"] = float(extracted[-1]) |
|
fw.write(json.dumps(d)+"\n") |
|
continue |
|
extracted = re.findall(r"\d*\.\d+|\d+", judgement, re.S) |
|
if len(extracted) > 0: |
|
d["score"] = float(extracted[0]) |
|
fw.write(json.dumps(d)+"\n") |
|
continue |
|
|
|
|
|
|