|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import argparse |
|
from sys import exit as sysexit |
|
from pathlib import Path |
|
import sys |
|
|
|
def list_files(directory, ext): |
|
files_unsorted = directory.glob(f"HumanEval_*{ext}") |
|
|
|
|
|
def key(s): |
|
return int(str(s.name).split("_")[1]) |
|
files_sorted = sorted(files_unsorted, key=(lambda s: key(s))) |
|
|
|
|
|
|
|
|
|
size = key(files_sorted[-1]) + 1 |
|
files_array = [None] * size |
|
for f in files_sorted: |
|
k = key(f) |
|
files_array[k] = f |
|
|
|
return files_array |
|
|
|
def main(eval_script, language, extension): |
|
args = argparse.ArgumentParser() |
|
|
|
args.add_argument( |
|
"--directory", type=str, required=True, help="Directory to read benchmarks from" |
|
) |
|
args.add_argument( |
|
"--files", |
|
type=int, |
|
nargs="*", |
|
default=[], |
|
help="Specify the benchmarks to evaluate by their number, e.g. --files 0 1 2" |
|
) |
|
args = args.parse_args() |
|
|
|
directory = Path(args.directory).resolve() |
|
|
|
files_sorted = list_files(directory, extension) |
|
|
|
|
|
if len(files_sorted) == 0: |
|
print(f'The specified directory does not contain files of type {extension}') |
|
sysexit(1) |
|
|
|
files_index = [] |
|
if len(args.files) > 0: |
|
files_index = args.files |
|
else: |
|
files_index = range(len(files_sorted)) |
|
|
|
total = 0 |
|
passed = 0 |
|
syntax_error = 0 |
|
|
|
results_file = Path(Path(__file__).parent, "..", "results", language.lower() + ".csv").resolve() |
|
|
|
with open(results_file, "w") as f: |
|
for i in files_index: |
|
filepath = files_sorted[i] |
|
if filepath is None: |
|
print("File {} does not exist!".format(i)) |
|
continue |
|
res = eval_script(filepath) |
|
output = f"{language},{filepath.stem},{res['status']}\n" |
|
f.write(output) |
|
print(output, end="") |
|
total += 1 |
|
if res['status'] == "OK": |
|
passed += 1 |
|
elif res['status'] == "SyntaxError": |
|
syntax_error += 1 |
|
print (f"Total {total}, Syntax Error {syntax_error}, Passed {passed}") |
|
|
|
|
|
|
|
def main_check_stubs(check_script, language, extension): |
|
args = argparse.ArgumentParser() |
|
|
|
args.add_argument( |
|
"--directory", type=str, required=True, help="Directory to read benchmarks from" |
|
) |
|
args.add_argument( |
|
"--files", |
|
type=int, |
|
nargs="*", |
|
default=[], |
|
help="Specify the benchmarks to evaluate by their number, e.g. --files 0 1 2" |
|
) |
|
args = args.parse_args() |
|
|
|
directory = Path(args.directory).resolve() |
|
|
|
files_sorted = list_files(directory, extension) |
|
|
|
|
|
if len(files_sorted) == 0: |
|
print(f'The specified directory does not contain files of type {extension}') |
|
sysexit(1) |
|
|
|
files_index = [] |
|
if len(args.files) > 0: |
|
files_index = args.files |
|
else: |
|
files_index = range(len(files_sorted)) |
|
|
|
total = 0 |
|
passed = 0 |
|
|
|
results_file = Path(Path(__file__).parent, "..", "check_results", language.lower() + ".csv").resolve() |
|
|
|
with open(results_file, "w") as f: |
|
for i in files_index: |
|
filepath = files_sorted[i] |
|
if filepath is None: |
|
print("File {} does not exist!".format(i)) |
|
continue |
|
res = check_script(filepath) |
|
output = f"{language},{filepath.stem},{res['status']}\n" |
|
f.write(output) |
|
print(output, end="") |
|
total += 1 |
|
if res['status'] == "OK": |
|
passed += 1 |
|
print (f"Total {total}, Passed {passed}") |
|
|
|
if total != passed: |
|
sys.exit(1) |
|
|