Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Fix graphs
Browse files- app.py +5 -3
- src/display/utils.py +1 -0
- src/populate.py +3 -0
- style.css +6 -0
app.py
CHANGED
|
@@ -165,7 +165,9 @@ def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
|
|
| 165 |
|
| 166 |
# 'always_here_cols' γ 'columns' γγι€ε€γγ¦ιθ€γιΏγγ
|
| 167 |
columns = [c for c in columns if c not in always_here_cols]
|
| 168 |
-
new_columns =
|
|
|
|
|
|
|
| 169 |
|
| 170 |
# ιθ€γζι€γγ€γ€ι εΊγηΆζ
|
| 171 |
seen = set()
|
|
@@ -306,7 +308,7 @@ def toggle_all_categories(action: str) -> list[gr.CheckboxGroup]:
|
|
| 306 |
|
| 307 |
|
| 308 |
def plot_size_vs_score(df: pd.DataFrame, hidden_df: pd.DataFrame) -> go.Figure:
|
| 309 |
-
df2 = hidden_df.
|
| 310 |
df2 = df2[df2["#Params (B)"] > 0]
|
| 311 |
df2 = df2[["model_name_for_query", "#Params (B)", "AVG", "Few-shot"]]
|
| 312 |
df2["AVG"] = df2["AVG"].astype(float)
|
|
@@ -333,7 +335,7 @@ TASK_AVG_NAME_MAP = {
|
|
| 333 |
|
| 334 |
|
| 335 |
def plot_average_scores(df: pd.DataFrame, hidden_df: pd.DataFrame) -> go.Figure:
|
| 336 |
-
df2 = hidden_df.
|
| 337 |
df2 = df2[["model_name_for_query", "Few-shot"] + list(TASK_AVG_NAME_MAP.keys())]
|
| 338 |
df2 = df2.rename(columns={"model_name_for_query": "Model", "Few-shot": "n-shot"})
|
| 339 |
df2 = df2.rename(columns=TASK_AVG_NAME_MAP)
|
|
|
|
| 165 |
|
| 166 |
# 'always_here_cols' γ 'columns' γγι€ε€γγ¦ιθ€γιΏγγ
|
| 167 |
columns = [c for c in columns if c not in always_here_cols]
|
| 168 |
+
new_columns = (
|
| 169 |
+
always_here_cols + [c for c in COLS if c in df.columns and c in columns] + [AutoEvalColumn.row_id.name]
|
| 170 |
+
)
|
| 171 |
|
| 172 |
# ιθ€γζι€γγ€γ€ι εΊγηΆζ
|
| 173 |
seen = set()
|
|
|
|
| 308 |
|
| 309 |
|
| 310 |
def plot_size_vs_score(df: pd.DataFrame, hidden_df: pd.DataFrame) -> go.Figure:
|
| 311 |
+
df2 = hidden_df[hidden_df[AutoEvalColumn.row_id.name].isin(df[AutoEvalColumn.row_id.name])]
|
| 312 |
df2 = df2[df2["#Params (B)"] > 0]
|
| 313 |
df2 = df2[["model_name_for_query", "#Params (B)", "AVG", "Few-shot"]]
|
| 314 |
df2["AVG"] = df2["AVG"].astype(float)
|
|
|
|
| 335 |
|
| 336 |
|
| 337 |
def plot_average_scores(df: pd.DataFrame, hidden_df: pd.DataFrame) -> go.Figure:
|
| 338 |
+
df2 = hidden_df[hidden_df[AutoEvalColumn.row_id.name].isin(df[AutoEvalColumn.row_id.name])]
|
| 339 |
df2 = df2[["model_name_for_query", "Few-shot"] + list(TASK_AVG_NAME_MAP.keys())]
|
| 340 |
df2 = df2.rename(columns={"model_name_for_query": "Model", "Few-shot": "n-shot"})
|
| 341 |
df2 = df2.rename(columns=TASK_AVG_NAME_MAP)
|
src/display/utils.py
CHANGED
|
@@ -63,6 +63,7 @@ auto_eval_column_dict.append(
|
|
| 63 |
)
|
| 64 |
auto_eval_column_dict.append(["backend", ColumnContent, ColumnContent("Backend Library", "str", False, dummy=True)])
|
| 65 |
auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
|
|
|
|
| 66 |
|
| 67 |
# We use make dataclass to dynamically fill the scores from Tasks
|
| 68 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
|
|
|
| 63 |
)
|
| 64 |
auto_eval_column_dict.append(["backend", ColumnContent, ColumnContent("Backend Library", "str", False, dummy=True)])
|
| 65 |
auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
|
| 66 |
+
auto_eval_column_dict.append(["row_id", ColumnContent, ColumnContent("ID", "number", False, dummy=True)])
|
| 67 |
|
| 68 |
# We use make dataclass to dynamically fill the scores from Tasks
|
| 69 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
src/populate.py
CHANGED
|
@@ -15,6 +15,9 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
| 15 |
|
| 16 |
df = pd.DataFrame.from_records(all_data_json)
|
| 17 |
|
|
|
|
|
|
|
|
|
|
| 18 |
score_cols = [
|
| 19 |
"ALT E to J BLEU",
|
| 20 |
"ALT J to E BLEU",
|
|
|
|
| 15 |
|
| 16 |
df = pd.DataFrame.from_records(all_data_json)
|
| 17 |
|
| 18 |
+
# Add a row ID column
|
| 19 |
+
df[AutoEvalColumn.row_id.name] = range(len(df))
|
| 20 |
+
|
| 21 |
score_cols = [
|
| 22 |
"ALT E to J BLEU",
|
| 23 |
"ALT J to E BLEU",
|
style.css
CHANGED
|
@@ -135,3 +135,9 @@
|
|
| 135 |
flex-direction: row;
|
| 136 |
align-items: center;
|
| 137 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
flex-direction: row;
|
| 136 |
align-items: center;
|
| 137 |
}
|
| 138 |
+
|
| 139 |
+
/* Hides the final AutoEvalColumn */
|
| 140 |
+
#llm-benchmark-tab-table table td:last-child,
|
| 141 |
+
#llm-benchmark-tab-table table th:last-child {
|
| 142 |
+
display: none;
|
| 143 |
+
}
|