Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
change bele task
Browse files- app.py +4 -0
- src/about.py +2 -2
- src/leaderboard/read_evals.py +4 -1
app.py
CHANGED
|
@@ -68,6 +68,10 @@ leaderboard_df = original_df.copy()
|
|
| 68 |
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
| 69 |
|
| 70 |
def style_df(df: pd.DataFrame) -> Styler:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
leaderboard_df_styled = df.style.background_gradient(cmap="viridis")
|
| 72 |
rounding = {'#Params (B)': "{:.1f}"}
|
| 73 |
for task in Tasks:
|
|
|
|
| 68 |
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
| 69 |
|
| 70 |
def style_df(df: pd.DataFrame) -> Styler:
|
| 71 |
+
# new_df = df.copy(deep=True)
|
| 72 |
+
# new_df['polish_poleval2018_task3_test_10k'] = -new_df['polish_poleval2018_task3_test_10k']
|
| 73 |
+
# new_df = new_df.to_frame()
|
| 74 |
+
|
| 75 |
leaderboard_df_styled = df.style.background_gradient(cmap="viridis")
|
| 76 |
rounding = {'#Params (B)': "{:.1f}"}
|
| 77 |
for task in Tasks:
|
src/about.py
CHANGED
|
@@ -13,14 +13,14 @@ class Task:
|
|
| 13 |
# ---------------------------------------------------
|
| 14 |
class Tasks(Enum):
|
| 15 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
| 16 |
-
task2 = Task("belebele_pol_Latn", "acc,none", "belebele_pol_Latn", "multiple_choice")
|
| 17 |
task3 = Task("polemo2_in", "exact_match,score-first", "polemo2-in_g", "generate_until")
|
| 18 |
task4 = Task("polemo2_in_multiple_choice", "acc,none", "polemo2-in_mc", "multiple_choice")
|
| 19 |
task5 = Task("polemo2_out", "exact_match,score-first", "polemo2-out_g", "generate_until")
|
| 20 |
task6 = Task("polemo2_out_multiple_choice", "acc,none", "polemo2-out_mc", "multiple_choice")
|
| 21 |
task7 = Task("polish_8tags_multiple_choice", "acc,none", "8tags_mc", "multiple_choice")
|
| 22 |
task8 = Task("polish_8tags_regex", "exact_match,score-first", "8tags_g", "generate_until")
|
| 23 |
-
|
| 24 |
task9 = Task("polish_belebele_regex", "exact_match,score-first", "belebele_g", "generate_until")
|
| 25 |
task10 = Task("polish_dyk_multiple_choice", "f1,none", "dyk_mc", "multiple_choice")
|
| 26 |
task11 = Task("polish_dyk_regex", "f1,score-first", "dyk_g", "generate_until")
|
|
|
|
| 13 |
# ---------------------------------------------------
|
| 14 |
class Tasks(Enum):
|
| 15 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
| 16 |
+
# task2 = Task("belebele_pol_Latn", "acc,none", "belebele_pol_Latn", "multiple_choice")
|
| 17 |
task3 = Task("polemo2_in", "exact_match,score-first", "polemo2-in_g", "generate_until")
|
| 18 |
task4 = Task("polemo2_in_multiple_choice", "acc,none", "polemo2-in_mc", "multiple_choice")
|
| 19 |
task5 = Task("polemo2_out", "exact_match,score-first", "polemo2-out_g", "generate_until")
|
| 20 |
task6 = Task("polemo2_out_multiple_choice", "acc,none", "polemo2-out_mc", "multiple_choice")
|
| 21 |
task7 = Task("polish_8tags_multiple_choice", "acc,none", "8tags_mc", "multiple_choice")
|
| 22 |
task8 = Task("polish_8tags_regex", "exact_match,score-first", "8tags_g", "generate_until")
|
| 23 |
+
task9a = Task("polish_belebele_mc", "acc,none", "belebele_mc", "multiple_choice")
|
| 24 |
task9 = Task("polish_belebele_regex", "exact_match,score-first", "belebele_g", "generate_until")
|
| 25 |
task10 = Task("polish_dyk_multiple_choice", "f1,none", "dyk_mc", "multiple_choice")
|
| 26 |
task11 = Task("polish_dyk_regex", "f1,score-first", "dyk_g", "generate_until")
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -376,7 +376,10 @@ def get_raw_eval_results(results_path: str, requests_path: str, metadata) -> lis
|
|
| 376 |
# print('missing_results_for_task', missing_results_for_task)
|
| 377 |
for task, models in missing_results_for_task.items():
|
| 378 |
print(f"Missing results for {task} for {len(models)} models")
|
| 379 |
-
print(" ".join(models))
|
|
|
|
|
|
|
|
|
|
| 380 |
|
| 381 |
print(f"Missing metadata for {len(missing_metadata)} models")
|
| 382 |
for model in missing_metadata:
|
|
|
|
| 376 |
# print('missing_results_for_task', missing_results_for_task)
|
| 377 |
for task, models in missing_results_for_task.items():
|
| 378 |
print(f"Missing results for {task} for {len(models)} models")
|
| 379 |
+
# print(" ".join(models))
|
| 380 |
+
for model in models:
|
| 381 |
+
print(f'"{model}"')
|
| 382 |
+
print()
|
| 383 |
|
| 384 |
print(f"Missing metadata for {len(missing_metadata)} models")
|
| 385 |
for model in missing_metadata:
|