Spaces:
Running
Running
Konstantin Chernyshev
commited on
Commit
·
c6356a2
1
Parent(s):
148c1e7
feat: auto convert values to percentage
Browse files- app.py +2 -3
- src/populate.py +83 -55
app.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
import os
|
2 |
-
from typing import Any
|
3 |
|
4 |
import gradio as gr
|
5 |
import pandas as pd
|
@@ -153,13 +152,13 @@ def init_leaderboard(dataframe: pd.DataFrame, columns_dict: dict[str, Field]) ->
|
|
153 |
# create the hidden and visible dataframes to display
|
154 |
hidden_leaderboard_df = gr.components.Dataframe(
|
155 |
value=dataframe,
|
156 |
-
datatype=[c.
|
157 |
visible=False,
|
158 |
interactive=False,
|
159 |
)
|
160 |
leaderboard_df = gr.components.Dataframe(
|
161 |
value=dataframe[[c.pretty_name for c in columns_dict.values() if c.displayed_by_default]],
|
162 |
-
datatype=[c.
|
163 |
elem_id="leaderboard-df",
|
164 |
interactive=False,
|
165 |
)
|
|
|
1 |
import os
|
|
|
2 |
|
3 |
import gradio as gr
|
4 |
import pandas as pd
|
|
|
152 |
# create the hidden and visible dataframes to display
|
153 |
hidden_leaderboard_df = gr.components.Dataframe(
|
154 |
value=dataframe,
|
155 |
+
datatype=[c.gradio_column_type for c in columns_dict.values()],
|
156 |
visible=False,
|
157 |
interactive=False,
|
158 |
)
|
159 |
leaderboard_df = gr.components.Dataframe(
|
160 |
value=dataframe[[c.pretty_name for c in columns_dict.values() if c.displayed_by_default]],
|
161 |
+
datatype=[c.gradio_column_type for c in columns_dict.values()],
|
162 |
elem_id="leaderboard-df",
|
163 |
interactive=False,
|
164 |
)
|
src/populate.py
CHANGED
@@ -8,6 +8,7 @@ from transformers import AutoConfig
|
|
8 |
|
9 |
|
10 |
UNKNOWN_MODEL_SHOW_SIZE = 150
|
|
|
11 |
|
12 |
|
13 |
def get_hf_model_info_card_or_none(model_name: str) -> ModelInfo | None:
|
@@ -137,12 +138,18 @@ def get_hf_data_by_model_name(model_name: str) -> dict:
|
|
137 |
@dataclass
|
138 |
class Field:
|
139 |
pretty_name: str
|
140 |
-
column_type: str
|
141 |
displayed_by_default: bool = True
|
142 |
never_hidden: bool = False
|
143 |
fully_hidden: bool = False
|
144 |
tags: list[str] = field(default_factory=list)
|
145 |
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
|
147 |
MODEL_COLUMNS_DICT = {
|
148 |
"model_type_symbol": Field("T", "str", never_hidden=True),
|
@@ -155,44 +162,45 @@ MODEL_COLUMNS_DICT = {
|
|
155 |
"model_architecture": Field("Architecture", "str", displayed_by_default=False),
|
156 |
"model_license": Field("License", "markdown", displayed_by_default=False),
|
157 |
"model_family": Field("Family", "str", displayed_by_default=False),
|
|
|
158 |
}
|
159 |
|
160 |
U_MATH_COLUMNS_DICT = {
|
161 |
"rank": Field("Rank", "number", never_hidden=True),
|
162 |
**MODEL_COLUMNS_DICT,
|
163 |
"judge_model_name": Field("Judge Model Name", "markdown", displayed_by_default=False),
|
164 |
-
"u_math_acc": Field("U-MATH Acc", "
|
165 |
-
"u_math_text_acc": Field("U-MATH Text Acc", "
|
166 |
-
"u_math_visual_acc": Field("U-MATH Visual Acc", "
|
167 |
-
"differential_calc_acc": Field("Diff Calc Acc", "
|
168 |
-
"differential_calc_text_acc": Field("Diff Calc Text Acc", "
|
169 |
"differential_calc_visual_acc": Field(
|
170 |
-
"Diff Calc Visual Acc", "
|
171 |
),
|
172 |
-
"integral_calc_acc": Field("Integral Calc Acc", "
|
173 |
-
"integral_calc_text_acc": Field("Integral Calc Text Acc", "
|
174 |
"integral_calc_visual_acc": Field(
|
175 |
-
"Integral Calc Visual Acc", "
|
176 |
),
|
177 |
-
"algebra_acc": Field("Algebra Acc", "
|
178 |
-
"algebra_text_acc": Field("Algebra Text Acc", "
|
179 |
-
"algebra_visual_acc": Field("Algebra Visual Acc", "
|
180 |
-
"multivariable_calculus_acc": Field("Multivar Calc Acc", "
|
181 |
"multivariable_calculus_text_acc": Field(
|
182 |
-
"Multivar Calc Text Acc", "
|
183 |
),
|
184 |
"multivariable_calculus_visual_acc": Field(
|
185 |
-
"Multivar Calc Visual Acc", "
|
186 |
),
|
187 |
-
"precalculus_review_acc": Field("Precalc Acc", "
|
188 |
-
"precalculus_review_text_acc": Field("Precalc Text Acc", "
|
189 |
"precalculus_review_visual_acc": Field(
|
190 |
-
"Precalc Visual Acc", "
|
191 |
),
|
192 |
-
"sequences_series_acc": Field("Seq & Series Acc", "
|
193 |
-
"sequences_series_text_acc": Field("Seq & Series Text Acc", "
|
194 |
"sequences_series_visual_acc": Field(
|
195 |
-
"Seq & Series Visual Acc", "
|
196 |
),
|
197 |
}
|
198 |
|
@@ -200,46 +208,46 @@ MU_MATH_COLUMNS_DICT = {
|
|
200 |
"rank": Field("Rank", "number", never_hidden=True),
|
201 |
**MODEL_COLUMNS_DICT,
|
202 |
"extract_model_name": Field("Extract Model Name", "markdown", displayed_by_default=False),
|
203 |
-
"mu_math_f1": Field("μ-MATH F1", "
|
204 |
-
"mu_math_tpr": Field("μ-MATH TPR", "
|
205 |
-
"mu_math_tnr": Field("μ-MATH TNR", "
|
206 |
-
"mu_math_ppv": Field("μ-MATH PPV", "
|
207 |
-
"mu_math_npv": Field("μ-MATH NPV", "
|
208 |
-
"GPT-4o_f1": Field("GPT-4o Subset F1", "
|
209 |
-
"GPT-4o_tpr": Field("GPT-4o Subset TPR", "
|
210 |
-
"GPT-4o_tnr": Field("GPT-4o Subset TNR", "
|
211 |
-
"GPT-4o_ppv": Field("GPT-4o Subset PPV", "
|
212 |
-
"GPT-4o_npv": Field("GPT-4o Subset NPV", "
|
213 |
-
"Gemini-1.5-Pro_f1": Field("Gemini-1.5-Pro Subset F1", "
|
214 |
-
"Gemini-1.5-Pro_tpr": Field("Gemini-1.5-Pro Subset TPR", "
|
215 |
-
"Gemini-1.5-Pro_tnr": Field("Gemini-1.5-Pro Subset TNR", "
|
216 |
-
"Gemini-1.5-Pro_ppv": Field("Gemini-1.5-Pro Subset PPV", "
|
217 |
-
"Gemini-1.5-Pro_npv": Field("Gemini-1.5-Pro Subset NPV", "
|
218 |
-
"Llama-3.1-70B-Instruct_f1": Field("Llama-3.1-70B Subset F1", "
|
219 |
-
"Llama-3.1-70B-Instruct_tpr": Field("Llama-3.1-70B Subset TPR", "
|
220 |
-
"Llama-3.1-70B-Instruct_tnr": Field("Llama-3.1-70B Subset TNR", "
|
221 |
-
"Llama-3.1-70B-Instruct_ppv": Field("Llama-3.1-70B Subset PPV", "
|
222 |
-
"Llama-3.1-70B-Instruct_npv": Field("Llama-3.1-70B Subset NPV", "
|
223 |
-
"Qwen2.5-72B-Instruct_f1": Field("Qwen2.5-72B Subset F1", "
|
224 |
-
"Qwen2.5-72B-Instruct_tpr": Field("Qwen2.5-72B Subset TPR", "
|
225 |
-
"Qwen2.5-72B-Instruct_tnr": Field("Qwen2.5-72B Subset TNR", "
|
226 |
-
"Qwen2.5-72B-Instruct_ppv": Field("Qwen2.5-72B Subset PPV", "
|
227 |
-
"Qwen2.5-72B-Instruct_npv": Field("Qwen2.5-72B Subset NPV", "
|
228 |
}
|
229 |
U_MATH_AND_MU_MATH_COLUMNS_DICT = {
|
230 |
"u_math_rank": Field("U-MATH Rank", "number", never_hidden=True),
|
231 |
"mu_math_rank": Field("μ-MATH Rank", "number", never_hidden=True),
|
232 |
**MODEL_COLUMNS_DICT,
|
233 |
-
"u_math_acc": Field("U-MATH Acc", "
|
234 |
-
"u_math_text_acc": Field("U-MATH Text Acc", "
|
235 |
-
"u_math_visual_acc": Field("U-MATH Visual Acc", "
|
236 |
"judge_model_name": Field("Judge Model Name", "markdown", displayed_by_default=False),
|
237 |
"extract_model_name": Field("Extract Model Name", "markdown", displayed_by_default=False),
|
238 |
-
"mu_math_f1": Field("μ-MATH F1", "
|
239 |
-
"mu_math_tpr": Field("μ-MATH TPR", "
|
240 |
-
"mu_math_tnr": Field("μ-MATH TNR", "
|
241 |
-
"mu_math_ppv": Field("μ-MATH PPV", "
|
242 |
-
"mu_math_npv": Field("μ-MATH NPV", "
|
243 |
}
|
244 |
|
245 |
|
@@ -306,6 +314,16 @@ def get_u_math_leaderboard_df(use_pretty_names: bool = True, add_meta: bool = Tr
|
|
306 |
df_meta = get_model_meta_info_df(df["full_model_name"].unique())
|
307 |
df = pd.merge(df, df_meta, on=["full_model_name"], how="left")
|
308 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
309 |
# convert to pretty names and sort columns by order in dict
|
310 |
if use_pretty_names:
|
311 |
df = df[U_MATH_COLUMNS_DICT.keys()]
|
@@ -351,6 +369,16 @@ def get_mu_math_leaderboard_df(use_pretty_names: bool = True, add_meta: bool = T
|
|
351 |
df_meta = get_model_meta_info_df(df["full_model_name"].unique())
|
352 |
df = pd.merge(df, df_meta, on=["full_model_name"], how="left")
|
353 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
354 |
# convert to pretty names and sort columns by order in dict
|
355 |
if use_pretty_names:
|
356 |
df = df[MU_MATH_COLUMNS_DICT.keys()]
|
|
|
8 |
|
9 |
|
10 |
UNKNOWN_MODEL_SHOW_SIZE = 150
|
11 |
+
PERCENT_ROUND_DIGITS = 1
|
12 |
|
13 |
|
14 |
def get_hf_model_info_card_or_none(model_name: str) -> ModelInfo | None:
|
|
|
138 |
@dataclass
|
139 |
class Field:
|
140 |
pretty_name: str
|
141 |
+
column_type: str # rate (auto-convert to percent number), number, str, markdown
|
142 |
displayed_by_default: bool = True
|
143 |
never_hidden: bool = False
|
144 |
fully_hidden: bool = False
|
145 |
tags: list[str] = field(default_factory=list)
|
146 |
|
147 |
+
@property
|
148 |
+
def gradio_column_type(self) -> str:
|
149 |
+
if self.column_type == "rate":
|
150 |
+
return "number"
|
151 |
+
return self.column_type
|
152 |
+
|
153 |
|
154 |
MODEL_COLUMNS_DICT = {
|
155 |
"model_type_symbol": Field("T", "str", never_hidden=True),
|
|
|
162 |
"model_architecture": Field("Architecture", "str", displayed_by_default=False),
|
163 |
"model_license": Field("License", "markdown", displayed_by_default=False),
|
164 |
"model_family": Field("Family", "str", displayed_by_default=False),
|
165 |
+
"model_url": Field("Model URL", "str", fully_hidden=True, displayed_by_default=False),
|
166 |
}
|
167 |
|
168 |
U_MATH_COLUMNS_DICT = {
|
169 |
"rank": Field("Rank", "number", never_hidden=True),
|
170 |
**MODEL_COLUMNS_DICT,
|
171 |
"judge_model_name": Field("Judge Model Name", "markdown", displayed_by_default=False),
|
172 |
+
"u_math_acc": Field("U-MATH Acc", "rate", never_hidden=True, tags=["u_math"]),
|
173 |
+
"u_math_text_acc": Field("U-MATH Text Acc", "rate", tags=["u_math", "text"]),
|
174 |
+
"u_math_visual_acc": Field("U-MATH Visual Acc", "rate", tags=["u_math", "visual"]),
|
175 |
+
"differential_calc_acc": Field("Diff Calc Acc", "rate", displayed_by_default=False, tags=["subjects"]),
|
176 |
+
"differential_calc_text_acc": Field("Diff Calc Text Acc", "rate", displayed_by_default=False, tags=["text"]),
|
177 |
"differential_calc_visual_acc": Field(
|
178 |
+
"Diff Calc Visual Acc", "rate", displayed_by_default=False, tags=["visual"]
|
179 |
),
|
180 |
+
"integral_calc_acc": Field("Integral Calc Acc", "rate", displayed_by_default=False, tags=["subjects"]),
|
181 |
+
"integral_calc_text_acc": Field("Integral Calc Text Acc", "rate", displayed_by_default=False, tags=["text"]),
|
182 |
"integral_calc_visual_acc": Field(
|
183 |
+
"Integral Calc Visual Acc", "rate", displayed_by_default=False, tags=["visual"]
|
184 |
),
|
185 |
+
"algebra_acc": Field("Algebra Acc", "rate", displayed_by_default=False, tags=["subjects"]),
|
186 |
+
"algebra_text_acc": Field("Algebra Text Acc", "rate", displayed_by_default=False, tags=["text"]),
|
187 |
+
"algebra_visual_acc": Field("Algebra Visual Acc", "rate", displayed_by_default=False, tags=["visual"]),
|
188 |
+
"multivariable_calculus_acc": Field("Multivar Calc Acc", "rate", displayed_by_default=False, tags=["subjects"]),
|
189 |
"multivariable_calculus_text_acc": Field(
|
190 |
+
"Multivar Calc Text Acc", "rate", displayed_by_default=False, tags=["text"]
|
191 |
),
|
192 |
"multivariable_calculus_visual_acc": Field(
|
193 |
+
"Multivar Calc Visual Acc", "rate", displayed_by_default=False, tags=["visual"]
|
194 |
),
|
195 |
+
"precalculus_review_acc": Field("Precalc Acc", "rate", displayed_by_default=False, tags=["subjects"]),
|
196 |
+
"precalculus_review_text_acc": Field("Precalc Text Acc", "rate", displayed_by_default=False, tags=["text"]),
|
197 |
"precalculus_review_visual_acc": Field(
|
198 |
+
"Precalc Visual Acc", "rate", displayed_by_default=False, tags=["visual"]
|
199 |
),
|
200 |
+
"sequences_series_acc": Field("Seq & Series Acc", "rate", displayed_by_default=False, tags=["subjects"]),
|
201 |
+
"sequences_series_text_acc": Field("Seq & Series Text Acc", "rate", displayed_by_default=False, tags=["text"]),
|
202 |
"sequences_series_visual_acc": Field(
|
203 |
+
"Seq & Series Visual Acc", "rate", displayed_by_default=False, tags=["visual"]
|
204 |
),
|
205 |
}
|
206 |
|
|
|
208 |
"rank": Field("Rank", "number", never_hidden=True),
|
209 |
**MODEL_COLUMNS_DICT,
|
210 |
"extract_model_name": Field("Extract Model Name", "markdown", displayed_by_default=False),
|
211 |
+
"mu_math_f1": Field("μ-MATH F1", "rate", never_hidden=True, tags=["mu_math", "splits"]),
|
212 |
+
"mu_math_tpr": Field("μ-MATH TPR", "rate", displayed_by_default=False, tags=["mu_math"]),
|
213 |
+
"mu_math_tnr": Field("μ-MATH TNR", "rate", displayed_by_default=False, tags=["mu_math"]),
|
214 |
+
"mu_math_ppv": Field("μ-MATH PPV", "rate", displayed_by_default=False, tags=["mu_math"]),
|
215 |
+
"mu_math_npv": Field("μ-MATH NPV", "rate", displayed_by_default=False, tags=["mu_math"]),
|
216 |
+
"GPT-4o_f1": Field("GPT-4o Subset F1", "rate", tags=["splits"]),
|
217 |
+
"GPT-4o_tpr": Field("GPT-4o Subset TPR", "rate", displayed_by_default=False),
|
218 |
+
"GPT-4o_tnr": Field("GPT-4o Subset TNR", "rate", displayed_by_default=False),
|
219 |
+
"GPT-4o_ppv": Field("GPT-4o Subset PPV", "rate", displayed_by_default=False),
|
220 |
+
"GPT-4o_npv": Field("GPT-4o Subset NPV", "rate", displayed_by_default=False),
|
221 |
+
"Gemini-1.5-Pro_f1": Field("Gemini-1.5-Pro Subset F1", "rate", tags=["splits"]),
|
222 |
+
"Gemini-1.5-Pro_tpr": Field("Gemini-1.5-Pro Subset TPR", "rate", displayed_by_default=False),
|
223 |
+
"Gemini-1.5-Pro_tnr": Field("Gemini-1.5-Pro Subset TNR", "rate", displayed_by_default=False),
|
224 |
+
"Gemini-1.5-Pro_ppv": Field("Gemini-1.5-Pro Subset PPV", "rate", displayed_by_default=False),
|
225 |
+
"Gemini-1.5-Pro_npv": Field("Gemini-1.5-Pro Subset NPV", "rate", displayed_by_default=False),
|
226 |
+
"Llama-3.1-70B-Instruct_f1": Field("Llama-3.1-70B Subset F1", "rate", tags=["splits"]),
|
227 |
+
"Llama-3.1-70B-Instruct_tpr": Field("Llama-3.1-70B Subset TPR", "rate", displayed_by_default=False),
|
228 |
+
"Llama-3.1-70B-Instruct_tnr": Field("Llama-3.1-70B Subset TNR", "rate", displayed_by_default=False),
|
229 |
+
"Llama-3.1-70B-Instruct_ppv": Field("Llama-3.1-70B Subset PPV", "rate", displayed_by_default=False),
|
230 |
+
"Llama-3.1-70B-Instruct_npv": Field("Llama-3.1-70B Subset NPV", "rate", displayed_by_default=False),
|
231 |
+
"Qwen2.5-72B-Instruct_f1": Field("Qwen2.5-72B Subset F1", "rate", tags=["splits"]),
|
232 |
+
"Qwen2.5-72B-Instruct_tpr": Field("Qwen2.5-72B Subset TPR", "rate", displayed_by_default=False),
|
233 |
+
"Qwen2.5-72B-Instruct_tnr": Field("Qwen2.5-72B Subset TNR", "rate", displayed_by_default=False),
|
234 |
+
"Qwen2.5-72B-Instruct_ppv": Field("Qwen2.5-72B Subset PPV", "rate", displayed_by_default=False),
|
235 |
+
"Qwen2.5-72B-Instruct_npv": Field("Qwen2.5-72B Subset NPV", "rate", displayed_by_default=False),
|
236 |
}
|
237 |
U_MATH_AND_MU_MATH_COLUMNS_DICT = {
|
238 |
"u_math_rank": Field("U-MATH Rank", "number", never_hidden=True),
|
239 |
"mu_math_rank": Field("μ-MATH Rank", "number", never_hidden=True),
|
240 |
**MODEL_COLUMNS_DICT,
|
241 |
+
"u_math_acc": Field("U-MATH Acc", "rate", tags=["main", "u_math", "mu_math"]),
|
242 |
+
"u_math_text_acc": Field("U-MATH Text Acc", "rate", displayed_by_default=False, tags=["u_math"]),
|
243 |
+
"u_math_visual_acc": Field("U-MATH Visual Acc", "rate", displayed_by_default=False, tags=["u_math"]),
|
244 |
"judge_model_name": Field("Judge Model Name", "markdown", displayed_by_default=False),
|
245 |
"extract_model_name": Field("Extract Model Name", "markdown", displayed_by_default=False),
|
246 |
+
"mu_math_f1": Field("μ-MATH F1", "rate", tags=["main", "u_math", "mu_math"]),
|
247 |
+
"mu_math_tpr": Field("μ-MATH TPR", "rate", displayed_by_default=False, tags=["mu_math"]),
|
248 |
+
"mu_math_tnr": Field("μ-MATH TNR", "rate", displayed_by_default=False, tags=["mu_math"]),
|
249 |
+
"mu_math_ppv": Field("μ-MATH PPV", "rate", displayed_by_default=False, tags=["mu_math"]),
|
250 |
+
"mu_math_npv": Field("μ-MATH NPV", "rate", displayed_by_default=False, tags=["mu_math"]),
|
251 |
}
|
252 |
|
253 |
|
|
|
314 |
df_meta = get_model_meta_info_df(df["full_model_name"].unique())
|
315 |
df = pd.merge(df, df_meta, on=["full_model_name"], how="left")
|
316 |
|
317 |
+
# resolve rate columns to percent
|
318 |
+
for col in df.columns:
|
319 |
+
if U_MATH_COLUMNS_DICT[col].column_type == "rate":
|
320 |
+
if all(df[col] <= 1):
|
321 |
+
df[col] = (df[col] * 100).round(PERCENT_ROUND_DIGITS)
|
322 |
+
elif any(df[col] > 1) and all(df[col] <= 100):
|
323 |
+
df[col] = df[col].round(PERCENT_ROUND_DIGITS)
|
324 |
+
else:
|
325 |
+
raise ValueError(f"Column {col} has values {df[col]} that are not in [0, 1] or [0, 100]")
|
326 |
+
|
327 |
# convert to pretty names and sort columns by order in dict
|
328 |
if use_pretty_names:
|
329 |
df = df[U_MATH_COLUMNS_DICT.keys()]
|
|
|
369 |
df_meta = get_model_meta_info_df(df["full_model_name"].unique())
|
370 |
df = pd.merge(df, df_meta, on=["full_model_name"], how="left")
|
371 |
|
372 |
+
# resolve rate columns to percent
|
373 |
+
for col in df.columns:
|
374 |
+
if MU_MATH_COLUMNS_DICT[col].column_type == "rate":
|
375 |
+
if all(df[col] <= 1):
|
376 |
+
df[col] = (df[col] * 100).round(2)
|
377 |
+
elif any(df[col] > 1) and all(df[col] <= 100):
|
378 |
+
df[col] = df[col].round(2)
|
379 |
+
else:
|
380 |
+
raise ValueError(f"Column {col} has values {df[col]} that are not in [0, 1] or [0, 100]")
|
381 |
+
|
382 |
# convert to pretty names and sort columns by order in dict
|
383 |
if use_pretty_names:
|
384 |
df = df[MU_MATH_COLUMNS_DICT.keys()]
|