Commit
·
83fabc4
1
Parent(s):
e885ab4
add ranking
Browse files- src/display/about.py +2 -2
- src/populate.py +28 -2
src/display/about.py
CHANGED
@@ -10,7 +10,7 @@ class Task:
|
|
10 |
|
11 |
# Init: to update with your specific keys
|
12 |
class Tasks(Enum):
|
13 |
-
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
14 |
task0 = Task("asr_eval1", "EN_LibriSpeech", "EN_LibriSpeech CER")
|
15 |
task1 = Task("asr_eval2", "ML_SUPERB", "ML_SUPERB CER")
|
16 |
task2 = Task("asr_eval3", "Bitrate", "Bitrate")
|
@@ -28,7 +28,7 @@ The leaderboard for discrete speech challenge (ASR Track) at Interspeech 2024. C
|
|
28 |
LLM_BENCHMARKS_TEXT = f"""
|
29 |
## How it works
|
30 |
|
31 |
-
The evaluation (static version) are conducted by the organizers only.
|
32 |
|
33 |
We will accept submissions from the google form (see rules in the challenge website).
|
34 |
|
|
|
10 |
|
11 |
# Init: to update with your specific keys
|
12 |
class Tasks(Enum):
|
13 |
+
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
14 |
task0 = Task("asr_eval1", "EN_LibriSpeech", "EN_LibriSpeech CER")
|
15 |
task1 = Task("asr_eval2", "ML_SUPERB", "ML_SUPERB CER")
|
16 |
task2 = Task("asr_eval3", "Bitrate", "Bitrate")
|
|
|
28 |
LLM_BENCHMARKS_TEXT = f"""
|
29 |
## How it works
|
30 |
|
31 |
+
The evaluation (static version) are conducted by the organizers only.
|
32 |
|
33 |
We will accept submissions from the google form (see rules in the challenge website).
|
34 |
|
src/populate.py
CHANGED
@@ -13,8 +13,34 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
13 |
all_data_json = [v.to_dict() for v in raw_data]
|
14 |
|
15 |
df = pd.DataFrame.from_records(all_data_json)
|
16 |
-
# df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
17 |
-
df = df.sort_values(by=[AutoEvalColumn.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
df = df[cols].round(decimals=2)
|
19 |
|
20 |
# filter out if any of the benchmarks have not been produced
|
|
|
13 |
all_data_json = [v.to_dict() for v in raw_data]
|
14 |
|
15 |
df = pd.DataFrame.from_records(all_data_json)
|
16 |
+
# df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False))
|
17 |
+
# df = df.sort_values(by=[AutoEvalColumn.task3.name], ascending=True)
|
18 |
+
|
19 |
+
|
20 |
+
df[AutoEvalColumn.task0.name] = pd.Series(
|
21 |
+
np.stack(
|
22 |
+
np.array(df[AutoEvalColumn.task0.name].values)
|
23 |
+
).squeeze()
|
24 |
+
)
|
25 |
+
df[AutoEvalColumn.task1.name] = pd.Series(
|
26 |
+
np.stack(
|
27 |
+
np.array(df[AutoEvalColumn.task1.name].values)
|
28 |
+
).squeeze()
|
29 |
+
)
|
30 |
+
df[AutoEvalColumn.task2.name] = pd.Series(
|
31 |
+
np.stack(
|
32 |
+
np.array(df[AutoEvalColumn.task2.name].values)
|
33 |
+
).squeeze()
|
34 |
+
)
|
35 |
+
|
36 |
+
en_cer_rank = df[AutoEvalColumn.task0.name].rank(method="min", numeric_only=True, ascending=True)
|
37 |
+
ml_cer_rank = df[AutoEvalColumn.task1.name].rank(method="min", numeric_only=True, ascending=True)
|
38 |
+
bitrate_rank = df[AutoEvalColumn.task2.name].rank(method="min", numeric_only=True, ascending=True)
|
39 |
+
df["Ranking"] = pd.Series((en_cer_rank + ml_cer_rank + bitrate_rank)/3)
|
40 |
+
df = df.sort_values(by=["Ranking", AutoEvalColumn.task1.name], ascending=True)
|
41 |
+
df["Rank"] = df.groupby("Precision").cumcount() + 1
|
42 |
+
df.pop("Ranking")
|
43 |
+
|
44 |
df = df[cols].round(decimals=2)
|
45 |
|
46 |
# filter out if any of the benchmarks have not been produced
|