Spaces:
Running
Running
chore: Reorder arguments in run_model_task function
Browse files- .gitignore +2 -0
- calculate_memory_usage.py +10 -7
- code_efficiency_calculator.py +15 -19
- leaderboard.py +1 -2
- requirements.txt +3 -0
.gitignore
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
|
|
|
|
|
| 1 |
# Created by https://www.toptal.com/developers/gitignore/api/python,visualstudiocode,macos,windows
|
| 2 |
# Edit at https://www.toptal.com/developers/gitignore?templates=python,visualstudiocode,macos,windows
|
| 3 |
|
|
|
|
| 1 |
+
results/
|
| 2 |
+
|
| 3 |
# Created by https://www.toptal.com/developers/gitignore/api/python,visualstudiocode,macos,windows
|
| 4 |
# Edit at https://www.toptal.com/developers/gitignore?templates=python,visualstudiocode,macos,windows
|
| 5 |
|
calculate_memory_usage.py
CHANGED
|
@@ -1,7 +1,5 @@
|
|
| 1 |
-
import json
|
| 2 |
import os
|
| 3 |
import glob
|
| 4 |
-
import numpy as np
|
| 5 |
import argparse
|
| 6 |
from code_efficiency_calculator import run_model_task
|
| 7 |
|
|
@@ -52,8 +50,8 @@ def report_max_memory_usage(dat_file_path):
|
|
| 52 |
max_memory_usage = max(max_memory_usage, mem_in_mb)
|
| 53 |
return max_memory_usage
|
| 54 |
|
| 55 |
-
def report_results(task,model):
|
| 56 |
-
run_model_task(task,model)
|
| 57 |
dat_directory = f"./results/{task}_{model}"
|
| 58 |
canonical_solution_directory = f"./results/{task}_canonical_solution"
|
| 59 |
canonical_solution_memory_usage = {}
|
|
@@ -209,12 +207,17 @@ def report_results(task,model):
|
|
| 209 |
total_500_nmu = total_500_nmu/len(normalized_execution_time_list)*100
|
| 210 |
total_500_tmu = total_500_tmu/len(normalized_execution_time_list)*100
|
| 211 |
|
| 212 |
-
|
|
|
|
| 213 |
|
| 214 |
if __name__ == "__main__":
|
| 215 |
parse = argparse.ArgumentParser()
|
| 216 |
parse.add_argument("--task", type=str, default="EffiBench")
|
| 217 |
parse.add_argument("--model", type=str, default="gpt-4")
|
| 218 |
-
|
| 219 |
args = parse.parse_args()
|
| 220 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import glob
|
|
|
|
| 3 |
import argparse
|
| 4 |
from code_efficiency_calculator import run_model_task
|
| 5 |
|
|
|
|
| 50 |
max_memory_usage = max(max_memory_usage, mem_in_mb)
|
| 51 |
return max_memory_usage
|
| 52 |
|
| 53 |
+
def report_results(task, model, file):
|
| 54 |
+
run_model_task(task, model, file)
|
| 55 |
dat_directory = f"./results/{task}_{model}"
|
| 56 |
canonical_solution_directory = f"./results/{task}_canonical_solution"
|
| 57 |
canonical_solution_memory_usage = {}
|
|
|
|
| 207 |
total_500_nmu = total_500_nmu/len(normalized_execution_time_list)*100
|
| 208 |
total_500_tmu = total_500_tmu/len(normalized_execution_time_list)*100
|
| 209 |
|
| 210 |
+
return f"{model}&{total_execution_time:.2f}&{normalized_execution_time:.2f}&{max_net:.2f}&{total_500_net:.1f}&{total_max_memory_usage:.2f}&{normalized_max_memory_usage:.2f}&{max_nmu:.2f}&{total_500_nmu:.1f}&{total_memory_usage:.2f}&{normalized_memory_usage:.2f}&{max_tmu:.2f}&{total_500_tmu:.1f}&{pass1:.1f}\\\\"
|
| 211 |
+
|
| 212 |
|
| 213 |
if __name__ == "__main__":
|
| 214 |
parse = argparse.ArgumentParser()
|
| 215 |
parse.add_argument("--task", type=str, default="EffiBench")
|
| 216 |
parse.add_argument("--model", type=str, default="gpt-4")
|
| 217 |
+
parse.add_argument("--file", type=str, default="")
|
| 218 |
args = parse.parse_args()
|
| 219 |
+
|
| 220 |
+
if not args.file:
|
| 221 |
+
args.file = f"./{args.task}_{args.model}.json"
|
| 222 |
+
|
| 223 |
+
report_results(args.task,args.model, args.file)
|
code_efficiency_calculator.py
CHANGED
|
@@ -11,22 +11,12 @@ import os
|
|
| 11 |
import re
|
| 12 |
import shutil
|
| 13 |
import contextlib
|
| 14 |
-
import
|
| 15 |
-
import json
|
| 16 |
-
from typing import Optional, Callable, Dict
|
| 17 |
-
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 18 |
-
import inspect
|
| 19 |
-
import numpy as np
|
| 20 |
-
import sys
|
| 21 |
import concurrent.futures
|
| 22 |
-
import time
|
| 23 |
from tqdm import tqdm
|
| 24 |
import contextlib
|
| 25 |
-
import faulthandler
|
| 26 |
import io
|
| 27 |
import os
|
| 28 |
-
import multiprocessing
|
| 29 |
-
import platform
|
| 30 |
import signal
|
| 31 |
from tqdm import tqdm
|
| 32 |
|
|
@@ -618,13 +608,14 @@ def fetch_completion(dataset,model):
|
|
| 618 |
return dataset
|
| 619 |
|
| 620 |
|
| 621 |
-
def run_model_task(task,model):
|
| 622 |
|
| 623 |
if "/" in model:
|
| 624 |
model = model.split("/")[1]
|
| 625 |
dat_path = f"./results/{task}_{model}"
|
| 626 |
canonical_solution_path = f"./results/{task}_canonical_solution"
|
| 627 |
-
|
|
|
|
| 628 |
dataset = json.load(f)
|
| 629 |
|
| 630 |
if os.path.exists(dat_path):
|
|
@@ -639,7 +630,7 @@ def run_model_task(task,model):
|
|
| 639 |
|
| 640 |
fetch_completion(dataset,dat_path)
|
| 641 |
|
| 642 |
-
with open(
|
| 643 |
dataset = json.load(f)
|
| 644 |
for i in range(len(dataset)):
|
| 645 |
dataset[i]["dataset"] = f"{task}"
|
|
@@ -647,9 +638,14 @@ def run_model_task(task,model):
|
|
| 647 |
|
| 648 |
|
| 649 |
if __name__ == "__main__":
|
| 650 |
-
|
| 651 |
-
|
| 652 |
-
|
| 653 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 654 |
|
| 655 |
-
run_model_task(args.model,args.task)
|
|
|
|
| 11 |
import re
|
| 12 |
import shutil
|
| 13 |
import contextlib
|
| 14 |
+
from concurrent.futures import ThreadPoolExecutor
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
import concurrent.futures
|
|
|
|
| 16 |
from tqdm import tqdm
|
| 17 |
import contextlib
|
|
|
|
| 18 |
import io
|
| 19 |
import os
|
|
|
|
|
|
|
| 20 |
import signal
|
| 21 |
from tqdm import tqdm
|
| 22 |
|
|
|
|
| 608 |
return dataset
|
| 609 |
|
| 610 |
|
| 611 |
+
def run_model_task(task, model, file):
|
| 612 |
|
| 613 |
if "/" in model:
|
| 614 |
model = model.split("/")[1]
|
| 615 |
dat_path = f"./results/{task}_{model}"
|
| 616 |
canonical_solution_path = f"./results/{task}_canonical_solution"
|
| 617 |
+
|
| 618 |
+
with open(file, "r") as f:
|
| 619 |
dataset = json.load(f)
|
| 620 |
|
| 621 |
if os.path.exists(dat_path):
|
|
|
|
| 630 |
|
| 631 |
fetch_completion(dataset,dat_path)
|
| 632 |
|
| 633 |
+
with open(file, "r") as f:
|
| 634 |
dataset = json.load(f)
|
| 635 |
for i in range(len(dataset)):
|
| 636 |
dataset[i]["dataset"] = f"{task}"
|
|
|
|
| 638 |
|
| 639 |
|
| 640 |
if __name__ == "__main__":
|
| 641 |
+
parse = argparse.ArgumentParser()
|
| 642 |
+
parse.add_argument("--task", type=str, default="EffiBench")
|
| 643 |
+
parse.add_argument("--model", type=str, default="gpt-4")
|
| 644 |
+
parse.add_argument("--file", type=str, default="")
|
| 645 |
+
args = parse.parse_args()
|
| 646 |
+
|
| 647 |
+
if not args.file:
|
| 648 |
+
args.file = f"./{args.task}_{args.model}.json"
|
| 649 |
+
|
| 650 |
+
run_model_task(args.task, args.model, args.file)
|
| 651 |
|
|
|
leaderboard.py
CHANGED
|
@@ -63,8 +63,7 @@ def process_uploaded_file(file):
|
|
| 63 |
except Exception as e:
|
| 64 |
return f"Error parsing the task and model name from the file name: {str(e)}! Should be in the format of <task>_<model>.json"
|
| 65 |
|
| 66 |
-
|
| 67 |
-
|
| 68 |
|
| 69 |
def build_leaderboard_tab(leaderboard_table_file):
|
| 70 |
gr.Markdown(make_default_md_1(), elem_id="leaderboard_markdown")
|
|
|
|
| 63 |
except Exception as e:
|
| 64 |
return f"Error parsing the task and model name from the file name: {str(e)}! Should be in the format of <task>_<model>.json"
|
| 65 |
|
| 66 |
+
return report_results(task, model, file)
|
|
|
|
| 67 |
|
| 68 |
def build_leaderboard_tab(leaderboard_table_file):
|
| 69 |
gr.Markdown(make_default_md_1(), elem_id="leaderboard_markdown")
|
requirements.txt
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
plotly
|
| 2 |
line_profiler
|
| 3 |
memory_profiler
|
|
|
|
| 1 |
+
tqdm
|
| 2 |
+
numpy
|
| 3 |
+
gradio
|
| 4 |
plotly
|
| 5 |
line_profiler
|
| 6 |
memory_profiler
|