Spaces:
Runtime error
Runtime error
jasonshaoshun
commited on
Commit
·
5dd7582
1
Parent(s):
53e4364
debug
Browse files- app.py +16 -14
- src/display/utils.py +12 -23
app.py
CHANGED
|
@@ -19,14 +19,15 @@ from src.display.css_html_js import custom_css
|
|
| 19 |
from src.display.utils import (
|
| 20 |
BENCHMARK_COLS,
|
| 21 |
BENCHMARK_COLS_MULTIMODAL,
|
| 22 |
-
|
| 23 |
COLS,
|
| 24 |
COLS_MIB,
|
| 25 |
COLS_MULTIMODAL,
|
| 26 |
EVAL_COLS,
|
| 27 |
EVAL_TYPES,
|
| 28 |
AutoEvalColumn,
|
| 29 |
-
|
|
|
|
| 30 |
fields,
|
| 31 |
)
|
| 32 |
from src.envs import API, EVAL_REQUESTS_PATH, QUEUE_REPO, REPO_ID, TOKEN, RESULTS_REPO_MIB_SUBGRAPH, EVAL_RESULTS_MIB_SUBGRAPH_PATH, RESULTS_REPO_MIB_CAUSALGRAPH, EVAL_RESULTS_MIB_CAUSALGRAPH_PATH
|
|
@@ -69,8 +70,9 @@ except Exception:
|
|
| 69 |
|
| 70 |
|
| 71 |
|
| 72 |
-
LEADERBOARD_DF_MIB_SUBGRAPH = get_leaderboard_df_mib_subgraph(EVAL_RESULTS_MIB_SUBGRAPH_PATH, EVAL_REQUESTS_PATH, COLS_MIB,
|
| 73 |
-
|
|
|
|
| 74 |
|
| 75 |
# LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
|
| 76 |
# LEADERBOARD_DF_MULTIMODAL = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS_MULTIMODAL, BENCHMARK_COLS_MULTIMODAL)
|
|
@@ -95,14 +97,14 @@ def init_leaderboard_mib_subgraph(dataframe, track):
|
|
| 95 |
|
| 96 |
return Leaderboard(
|
| 97 |
value=dataframe,
|
| 98 |
-
datatype=[c.type for c in fields(
|
| 99 |
select_columns=SelectColumns(
|
| 100 |
-
default_selection=[c.name for c in fields(
|
| 101 |
-
cant_deselect=[c.name for c in fields(
|
| 102 |
label="Select Columns to Display:",
|
| 103 |
),
|
| 104 |
-
search_columns=["Method"], # Changed from
|
| 105 |
-
hide_columns=[c.name for c in fields(
|
| 106 |
bool_checkboxgroup_label="Hide models",
|
| 107 |
interactive=False,
|
| 108 |
)
|
|
@@ -120,14 +122,14 @@ def init_leaderboard_mib_causalgraph(dataframe, track):
|
|
| 120 |
|
| 121 |
return Leaderboard(
|
| 122 |
value=dataframe,
|
| 123 |
-
datatype=[c.type for c in fields(
|
| 124 |
select_columns=SelectColumns(
|
| 125 |
-
default_selection=[c.name for c in fields(
|
| 126 |
-
cant_deselect=[c.name for c in fields(
|
| 127 |
label="Select Columns to Display:",
|
| 128 |
),
|
| 129 |
-
search_columns=["Method"], # Changed from
|
| 130 |
-
hide_columns=[c.name for c in fields(
|
| 131 |
bool_checkboxgroup_label="Hide models",
|
| 132 |
interactive=False,
|
| 133 |
)
|
|
|
|
| 19 |
from src.display.utils import (
|
| 20 |
BENCHMARK_COLS,
|
| 21 |
BENCHMARK_COLS_MULTIMODAL,
|
| 22 |
+
BENCHMARK_COLS_MIB_SUBGRAPH,
|
| 23 |
COLS,
|
| 24 |
COLS_MIB,
|
| 25 |
COLS_MULTIMODAL,
|
| 26 |
EVAL_COLS,
|
| 27 |
EVAL_TYPES,
|
| 28 |
AutoEvalColumn,
|
| 29 |
+
AutoEvalColumn_mib_subgraph,
|
| 30 |
+
AutoEvalColumn_mib_causalgraph,
|
| 31 |
fields,
|
| 32 |
)
|
| 33 |
from src.envs import API, EVAL_REQUESTS_PATH, QUEUE_REPO, REPO_ID, TOKEN, RESULTS_REPO_MIB_SUBGRAPH, EVAL_RESULTS_MIB_SUBGRAPH_PATH, RESULTS_REPO_MIB_CAUSALGRAPH, EVAL_RESULTS_MIB_CAUSALGRAPH_PATH
|
|
|
|
| 70 |
|
| 71 |
|
| 72 |
|
| 73 |
+
LEADERBOARD_DF_MIB_SUBGRAPH = get_leaderboard_df_mib_subgraph(EVAL_RESULTS_MIB_SUBGRAPH_PATH, EVAL_REQUESTS_PATH, COLS_MIB, BENCHMARK_COLS_MIB_SUBGRAPH)
|
| 74 |
+
|
| 75 |
+
LEADERBOARD_DF_MIB_CAUSALGRAPH = get_leaderboard_df_mib_causalgraph(EVAL_RESULTS_MIB_CAUSALGRAPH_PATH, EVAL_REQUESTS_PATH, COLS_MIB, BENCHMARK_COLS_MIB_CAUASALGRAPH)
|
| 76 |
|
| 77 |
# LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
|
| 78 |
# LEADERBOARD_DF_MULTIMODAL = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS_MULTIMODAL, BENCHMARK_COLS_MULTIMODAL)
|
|
|
|
| 97 |
|
| 98 |
return Leaderboard(
|
| 99 |
value=dataframe,
|
| 100 |
+
datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
|
| 101 |
select_columns=SelectColumns(
|
| 102 |
+
default_selection=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.displayed_by_default],
|
| 103 |
+
cant_deselect=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.never_hidden],
|
| 104 |
label="Select Columns to Display:",
|
| 105 |
),
|
| 106 |
+
search_columns=["Method"], # Changed from AutoEvalColumn_mib_subgraph.model.name to "Method"
|
| 107 |
+
hide_columns=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.hidden],
|
| 108 |
bool_checkboxgroup_label="Hide models",
|
| 109 |
interactive=False,
|
| 110 |
)
|
|
|
|
| 122 |
|
| 123 |
return Leaderboard(
|
| 124 |
value=dataframe,
|
| 125 |
+
datatype=[c.type for c in fields(AutoEvalColumn_mib_causalgraph)],
|
| 126 |
select_columns=SelectColumns(
|
| 127 |
+
default_selection=[c.name for c in fields(AutoEvalColumn_mib_causalgraph) if c.displayed_by_default],
|
| 128 |
+
cant_deselect=[c.name for c in fields(AutoEvalColumn_mib_causalgraph) if c.never_hidden],
|
| 129 |
label="Select Columns to Display:",
|
| 130 |
),
|
| 131 |
+
search_columns=["Method"], # Changed from AutoEvalColumn_mib_causalgraph.model.name to "Method"
|
| 132 |
+
hide_columns=[c.name for c in fields(AutoEvalColumn_mib_causalgraph) if c.hidden],
|
| 133 |
bool_checkboxgroup_label="Hide models",
|
| 134 |
interactive=False,
|
| 135 |
)
|
src/display/utils.py
CHANGED
|
@@ -21,7 +21,7 @@ class ColumnContent:
|
|
| 21 |
never_hidden: bool = False
|
| 22 |
|
| 23 |
## Leaderboard columns
|
| 24 |
-
|
| 25 |
auto_eval_column_dict = []
|
| 26 |
auto_eval_column_dict_multimodal = []
|
| 27 |
|
|
@@ -29,42 +29,40 @@ auto_eval_column_dict_multimodal = []
|
|
| 29 |
|
| 30 |
|
| 31 |
|
| 32 |
-
|
| 33 |
|
| 34 |
# Method name column
|
| 35 |
-
|
| 36 |
|
| 37 |
# For each task and model combination
|
| 38 |
for task in TasksMib_Subgraph:
|
| 39 |
for model in task.value.models:
|
| 40 |
col_name = f"{task.value.benchmark}_{model}" # ioi_meta_llama, mcqa_qwen, etc.
|
| 41 |
-
|
| 42 |
col_name,
|
| 43 |
ColumnContent,
|
| 44 |
ColumnContent(col_name, "number", True)
|
| 45 |
])
|
| 46 |
|
| 47 |
# Average column
|
| 48 |
-
|
| 49 |
|
| 50 |
|
| 51 |
# Create the dataclass for MIB columns
|
| 52 |
-
|
| 53 |
|
| 54 |
# Column selection for display
|
| 55 |
-
COLS_MIB = [c.name for c in fields(
|
| 56 |
|
| 57 |
-
|
| 58 |
-
|
| 59 |
for task in TasksMib_Subgraph:
|
| 60 |
for model in task.value.models:
|
| 61 |
col_name = f"{task.value.col_name}_{model.replace('-', '_')}"
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
|
|
|
|
|
|
|
| 68 |
|
| 69 |
|
| 70 |
|
|
@@ -72,15 +70,6 @@ for task in TasksMib_Subgraph:
|
|
| 72 |
|
| 73 |
|
| 74 |
|
| 75 |
-
# Init
|
| 76 |
-
|
| 77 |
-
auto_eval_column_dict_mib.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
| 78 |
-
# auto_eval_column_dict_mib.append(["hf_repo", ColumnContent, ColumnContent("HF Repo", "str", False)])
|
| 79 |
-
# auto_eval_column_dict_mib.append(["track", ColumnContent, ColumnContent("Track", "markdown", False)])
|
| 80 |
-
|
| 81 |
-
#Scores
|
| 82 |
-
for task in TasksMib_Subgraph:
|
| 83 |
-
auto_eval_column_dict_mib.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
| 84 |
|
| 85 |
|
| 86 |
|
|
|
|
| 21 |
never_hidden: bool = False
|
| 22 |
|
| 23 |
## Leaderboard columns
|
| 24 |
+
auto_eval_column_dict_mib_subgraph = []
|
| 25 |
auto_eval_column_dict = []
|
| 26 |
auto_eval_column_dict_multimodal = []
|
| 27 |
|
|
|
|
| 29 |
|
| 30 |
|
| 31 |
|
| 32 |
+
auto_eval_column_dict_mib_subgraph = []
|
| 33 |
|
| 34 |
# Method name column
|
| 35 |
+
auto_eval_column_dict_mib_subgraph.append(["method", ColumnContent, ColumnContent("Method", "markdown", True, never_hidden=True)])
|
| 36 |
|
| 37 |
# For each task and model combination
|
| 38 |
for task in TasksMib_Subgraph:
|
| 39 |
for model in task.value.models:
|
| 40 |
col_name = f"{task.value.benchmark}_{model}" # ioi_meta_llama, mcqa_qwen, etc.
|
| 41 |
+
auto_eval_column_dict_mib_subgraph.append([
|
| 42 |
col_name,
|
| 43 |
ColumnContent,
|
| 44 |
ColumnContent(col_name, "number", True)
|
| 45 |
])
|
| 46 |
|
| 47 |
# Average column
|
| 48 |
+
auto_eval_column_dict_mib_subgraph.append(["average", ColumnContent, ColumnContent("Average", "number", True)])
|
| 49 |
|
| 50 |
|
| 51 |
# Create the dataclass for MIB columns
|
| 52 |
+
AutoEvalColumn_mib_subgraph = make_dataclass("AutoEvalColumn_mib_subgraph", auto_eval_column_dict_mib_subgraph, frozen=True)
|
| 53 |
|
| 54 |
# Column selection for display
|
| 55 |
+
COLS_MIB = [c.name for c in fields(AutoEvalColumn_mib_subgraph) if not c.hidden]
|
| 56 |
|
| 57 |
+
|
| 58 |
+
BENCHMARK_COLS_MIB_SUBGRAPH = []
|
| 59 |
for task in TasksMib_Subgraph:
|
| 60 |
for model in task.value.models:
|
| 61 |
col_name = f"{task.value.col_name}_{model.replace('-', '_')}"
|
| 62 |
+
BENCHMARK_COLS_MIB_SUBGRAPH.append(col_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
+
# Implement the same for causal graph, auto_eval_column_dict_mib_causalgraph, AutoEvalColumn_mib_causalgraph
|
| 65 |
+
BENCHMARK_COLS_MIB_CAUASALGRAPH = []
|
| 66 |
|
| 67 |
|
| 68 |
|
|
|
|
| 70 |
|
| 71 |
|
| 72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
|
| 74 |
|
| 75 |
|