Konstantin Chernyshev commited on
Commit
c6356a2
·
1 Parent(s): 148c1e7

feat: auto convert values to percentage

Browse files
Files changed (2) hide show
  1. app.py +2 -3
  2. src/populate.py +83 -55
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import os
2
- from typing import Any
3
 
4
  import gradio as gr
5
  import pandas as pd
@@ -153,13 +152,13 @@ def init_leaderboard(dataframe: pd.DataFrame, columns_dict: dict[str, Field]) ->
153
  # create the hidden and visible dataframes to display
154
  hidden_leaderboard_df = gr.components.Dataframe(
155
  value=dataframe,
156
- datatype=[c.column_type for c in columns_dict.values()],
157
  visible=False,
158
  interactive=False,
159
  )
160
  leaderboard_df = gr.components.Dataframe(
161
  value=dataframe[[c.pretty_name for c in columns_dict.values() if c.displayed_by_default]],
162
- datatype=[c.column_type for c in columns_dict.values()],
163
  elem_id="leaderboard-df",
164
  interactive=False,
165
  )
 
1
  import os
 
2
 
3
  import gradio as gr
4
  import pandas as pd
 
152
  # create the hidden and visible dataframes to display
153
  hidden_leaderboard_df = gr.components.Dataframe(
154
  value=dataframe,
155
+ datatype=[c.gradio_column_type for c in columns_dict.values()],
156
  visible=False,
157
  interactive=False,
158
  )
159
  leaderboard_df = gr.components.Dataframe(
160
  value=dataframe[[c.pretty_name for c in columns_dict.values() if c.displayed_by_default]],
161
+ datatype=[c.gradio_column_type for c in columns_dict.values()],
162
  elem_id="leaderboard-df",
163
  interactive=False,
164
  )
src/populate.py CHANGED
@@ -8,6 +8,7 @@ from transformers import AutoConfig
8
 
9
 
10
  UNKNOWN_MODEL_SHOW_SIZE = 150
 
11
 
12
 
13
  def get_hf_model_info_card_or_none(model_name: str) -> ModelInfo | None:
@@ -137,12 +138,18 @@ def get_hf_data_by_model_name(model_name: str) -> dict:
137
  @dataclass
138
  class Field:
139
  pretty_name: str
140
- column_type: str
141
  displayed_by_default: bool = True
142
  never_hidden: bool = False
143
  fully_hidden: bool = False
144
  tags: list[str] = field(default_factory=list)
145
 
 
 
 
 
 
 
146
 
147
  MODEL_COLUMNS_DICT = {
148
  "model_type_symbol": Field("T", "str", never_hidden=True),
@@ -155,44 +162,45 @@ MODEL_COLUMNS_DICT = {
155
  "model_architecture": Field("Architecture", "str", displayed_by_default=False),
156
  "model_license": Field("License", "markdown", displayed_by_default=False),
157
  "model_family": Field("Family", "str", displayed_by_default=False),
 
158
  }
159
 
160
  U_MATH_COLUMNS_DICT = {
161
  "rank": Field("Rank", "number", never_hidden=True),
162
  **MODEL_COLUMNS_DICT,
163
  "judge_model_name": Field("Judge Model Name", "markdown", displayed_by_default=False),
164
- "u_math_acc": Field("U-MATH Acc", "number", never_hidden=True, tags=["u_math"]),
165
- "u_math_text_acc": Field("U-MATH Text Acc", "number", tags=["u_math", "text"]),
166
- "u_math_visual_acc": Field("U-MATH Visual Acc", "number", tags=["u_math", "visual"]),
167
- "differential_calc_acc": Field("Diff Calc Acc", "number", displayed_by_default=False, tags=["subjects"]),
168
- "differential_calc_text_acc": Field("Diff Calc Text Acc", "number", displayed_by_default=False, tags=["text"]),
169
  "differential_calc_visual_acc": Field(
170
- "Diff Calc Visual Acc", "number", displayed_by_default=False, tags=["visual"]
171
  ),
172
- "integral_calc_acc": Field("Integral Calc Acc", "number", displayed_by_default=False, tags=["subjects"]),
173
- "integral_calc_text_acc": Field("Integral Calc Text Acc", "number", displayed_by_default=False, tags=["text"]),
174
  "integral_calc_visual_acc": Field(
175
- "Integral Calc Visual Acc", "number", displayed_by_default=False, tags=["visual"]
176
  ),
177
- "algebra_acc": Field("Algebra Acc", "number", displayed_by_default=False, tags=["subjects"]),
178
- "algebra_text_acc": Field("Algebra Text Acc", "number", displayed_by_default=False, tags=["text"]),
179
- "algebra_visual_acc": Field("Algebra Visual Acc", "number", displayed_by_default=False, tags=["visual"]),
180
- "multivariable_calculus_acc": Field("Multivar Calc Acc", "number", displayed_by_default=False, tags=["subjects"]),
181
  "multivariable_calculus_text_acc": Field(
182
- "Multivar Calc Text Acc", "number", displayed_by_default=False, tags=["text"]
183
  ),
184
  "multivariable_calculus_visual_acc": Field(
185
- "Multivar Calc Visual Acc", "number", displayed_by_default=False, tags=["visual"]
186
  ),
187
- "precalculus_review_acc": Field("Precalc Acc", "number", displayed_by_default=False, tags=["subjects"]),
188
- "precalculus_review_text_acc": Field("Precalc Text Acc", "number", displayed_by_default=False, tags=["text"]),
189
  "precalculus_review_visual_acc": Field(
190
- "Precalc Visual Acc", "number", displayed_by_default=False, tags=["visual"]
191
  ),
192
- "sequences_series_acc": Field("Seq & Series Acc", "number", displayed_by_default=False, tags=["subjects"]),
193
- "sequences_series_text_acc": Field("Seq & Series Text Acc", "number", displayed_by_default=False, tags=["text"]),
194
  "sequences_series_visual_acc": Field(
195
- "Seq & Series Visual Acc", "number", displayed_by_default=False, tags=["visual"]
196
  ),
197
  }
198
 
@@ -200,46 +208,46 @@ MU_MATH_COLUMNS_DICT = {
200
  "rank": Field("Rank", "number", never_hidden=True),
201
  **MODEL_COLUMNS_DICT,
202
  "extract_model_name": Field("Extract Model Name", "markdown", displayed_by_default=False),
203
- "mu_math_f1": Field("μ-MATH F1", "number", never_hidden=True, tags=["mu_math", "splits"]),
204
- "mu_math_tpr": Field("μ-MATH TPR", "number", displayed_by_default=False, tags=["mu_math"]),
205
- "mu_math_tnr": Field("μ-MATH TNR", "number", displayed_by_default=False, tags=["mu_math"]),
206
- "mu_math_ppv": Field("μ-MATH PPV", "number", displayed_by_default=False, tags=["mu_math"]),
207
- "mu_math_npv": Field("μ-MATH NPV", "number", displayed_by_default=False, tags=["mu_math"]),
208
- "GPT-4o_f1": Field("GPT-4o Subset F1", "number", tags=["splits"]),
209
- "GPT-4o_tpr": Field("GPT-4o Subset TPR", "number", displayed_by_default=False),
210
- "GPT-4o_tnr": Field("GPT-4o Subset TNR", "number", displayed_by_default=False),
211
- "GPT-4o_ppv": Field("GPT-4o Subset PPV", "number", displayed_by_default=False),
212
- "GPT-4o_npv": Field("GPT-4o Subset NPV", "number", displayed_by_default=False),
213
- "Gemini-1.5-Pro_f1": Field("Gemini-1.5-Pro Subset F1", "number", tags=["splits"]),
214
- "Gemini-1.5-Pro_tpr": Field("Gemini-1.5-Pro Subset TPR", "number", displayed_by_default=False),
215
- "Gemini-1.5-Pro_tnr": Field("Gemini-1.5-Pro Subset TNR", "number", displayed_by_default=False),
216
- "Gemini-1.5-Pro_ppv": Field("Gemini-1.5-Pro Subset PPV", "number", displayed_by_default=False),
217
- "Gemini-1.5-Pro_npv": Field("Gemini-1.5-Pro Subset NPV", "number", displayed_by_default=False),
218
- "Llama-3.1-70B-Instruct_f1": Field("Llama-3.1-70B Subset F1", "number", tags=["splits"]),
219
- "Llama-3.1-70B-Instruct_tpr": Field("Llama-3.1-70B Subset TPR", "number", displayed_by_default=False),
220
- "Llama-3.1-70B-Instruct_tnr": Field("Llama-3.1-70B Subset TNR", "number", displayed_by_default=False),
221
- "Llama-3.1-70B-Instruct_ppv": Field("Llama-3.1-70B Subset PPV", "number", displayed_by_default=False),
222
- "Llama-3.1-70B-Instruct_npv": Field("Llama-3.1-70B Subset NPV", "number", displayed_by_default=False),
223
- "Qwen2.5-72B-Instruct_f1": Field("Qwen2.5-72B Subset F1", "number", tags=["splits"]),
224
- "Qwen2.5-72B-Instruct_tpr": Field("Qwen2.5-72B Subset TPR", "number", displayed_by_default=False),
225
- "Qwen2.5-72B-Instruct_tnr": Field("Qwen2.5-72B Subset TNR", "number", displayed_by_default=False),
226
- "Qwen2.5-72B-Instruct_ppv": Field("Qwen2.5-72B Subset PPV", "number", displayed_by_default=False),
227
- "Qwen2.5-72B-Instruct_npv": Field("Qwen2.5-72B Subset NPV", "number", displayed_by_default=False),
228
  }
229
  U_MATH_AND_MU_MATH_COLUMNS_DICT = {
230
  "u_math_rank": Field("U-MATH Rank", "number", never_hidden=True),
231
  "mu_math_rank": Field("μ-MATH Rank", "number", never_hidden=True),
232
  **MODEL_COLUMNS_DICT,
233
- "u_math_acc": Field("U-MATH Acc", "number", tags=["main", "u_math", "mu_math"]),
234
- "u_math_text_acc": Field("U-MATH Text Acc", "number", displayed_by_default=False, tags=["u_math"]),
235
- "u_math_visual_acc": Field("U-MATH Visual Acc", "number", displayed_by_default=False, tags=["u_math"]),
236
  "judge_model_name": Field("Judge Model Name", "markdown", displayed_by_default=False),
237
  "extract_model_name": Field("Extract Model Name", "markdown", displayed_by_default=False),
238
- "mu_math_f1": Field("μ-MATH F1", "number", tags=["main", "u_math", "mu_math"]),
239
- "mu_math_tpr": Field("μ-MATH TPR", "number", displayed_by_default=False, tags=["mu_math"]),
240
- "mu_math_tnr": Field("μ-MATH TNR", "number", displayed_by_default=False, tags=["mu_math"]),
241
- "mu_math_ppv": Field("μ-MATH PPV", "number", displayed_by_default=False, tags=["mu_math"]),
242
- "mu_math_npv": Field("μ-MATH NPV", "number", displayed_by_default=False, tags=["mu_math"]),
243
  }
244
 
245
 
@@ -306,6 +314,16 @@ def get_u_math_leaderboard_df(use_pretty_names: bool = True, add_meta: bool = Tr
306
  df_meta = get_model_meta_info_df(df["full_model_name"].unique())
307
  df = pd.merge(df, df_meta, on=["full_model_name"], how="left")
308
 
 
 
 
 
 
 
 
 
 
 
309
  # convert to pretty names and sort columns by order in dict
310
  if use_pretty_names:
311
  df = df[U_MATH_COLUMNS_DICT.keys()]
@@ -351,6 +369,16 @@ def get_mu_math_leaderboard_df(use_pretty_names: bool = True, add_meta: bool = T
351
  df_meta = get_model_meta_info_df(df["full_model_name"].unique())
352
  df = pd.merge(df, df_meta, on=["full_model_name"], how="left")
353
 
 
 
 
 
 
 
 
 
 
 
354
  # convert to pretty names and sort columns by order in dict
355
  if use_pretty_names:
356
  df = df[MU_MATH_COLUMNS_DICT.keys()]
 
8
 
9
 
10
  UNKNOWN_MODEL_SHOW_SIZE = 150
11
+ PERCENT_ROUND_DIGITS = 1
12
 
13
 
14
  def get_hf_model_info_card_or_none(model_name: str) -> ModelInfo | None:
 
138
  @dataclass
139
  class Field:
140
  pretty_name: str
141
+ column_type: str # rate (auto-convert to percent number), number, str, markdown
142
  displayed_by_default: bool = True
143
  never_hidden: bool = False
144
  fully_hidden: bool = False
145
  tags: list[str] = field(default_factory=list)
146
 
147
+ @property
148
+ def gradio_column_type(self) -> str:
149
+ if self.column_type == "rate":
150
+ return "number"
151
+ return self.column_type
152
+
153
 
154
  MODEL_COLUMNS_DICT = {
155
  "model_type_symbol": Field("T", "str", never_hidden=True),
 
162
  "model_architecture": Field("Architecture", "str", displayed_by_default=False),
163
  "model_license": Field("License", "markdown", displayed_by_default=False),
164
  "model_family": Field("Family", "str", displayed_by_default=False),
165
+ "model_url": Field("Model URL", "str", fully_hidden=True, displayed_by_default=False),
166
  }
167
 
168
  U_MATH_COLUMNS_DICT = {
169
  "rank": Field("Rank", "number", never_hidden=True),
170
  **MODEL_COLUMNS_DICT,
171
  "judge_model_name": Field("Judge Model Name", "markdown", displayed_by_default=False),
172
+ "u_math_acc": Field("U-MATH Acc", "rate", never_hidden=True, tags=["u_math"]),
173
+ "u_math_text_acc": Field("U-MATH Text Acc", "rate", tags=["u_math", "text"]),
174
+ "u_math_visual_acc": Field("U-MATH Visual Acc", "rate", tags=["u_math", "visual"]),
175
+ "differential_calc_acc": Field("Diff Calc Acc", "rate", displayed_by_default=False, tags=["subjects"]),
176
+ "differential_calc_text_acc": Field("Diff Calc Text Acc", "rate", displayed_by_default=False, tags=["text"]),
177
  "differential_calc_visual_acc": Field(
178
+ "Diff Calc Visual Acc", "rate", displayed_by_default=False, tags=["visual"]
179
  ),
180
+ "integral_calc_acc": Field("Integral Calc Acc", "rate", displayed_by_default=False, tags=["subjects"]),
181
+ "integral_calc_text_acc": Field("Integral Calc Text Acc", "rate", displayed_by_default=False, tags=["text"]),
182
  "integral_calc_visual_acc": Field(
183
+ "Integral Calc Visual Acc", "rate", displayed_by_default=False, tags=["visual"]
184
  ),
185
+ "algebra_acc": Field("Algebra Acc", "rate", displayed_by_default=False, tags=["subjects"]),
186
+ "algebra_text_acc": Field("Algebra Text Acc", "rate", displayed_by_default=False, tags=["text"]),
187
+ "algebra_visual_acc": Field("Algebra Visual Acc", "rate", displayed_by_default=False, tags=["visual"]),
188
+ "multivariable_calculus_acc": Field("Multivar Calc Acc", "rate", displayed_by_default=False, tags=["subjects"]),
189
  "multivariable_calculus_text_acc": Field(
190
+ "Multivar Calc Text Acc", "rate", displayed_by_default=False, tags=["text"]
191
  ),
192
  "multivariable_calculus_visual_acc": Field(
193
+ "Multivar Calc Visual Acc", "rate", displayed_by_default=False, tags=["visual"]
194
  ),
195
+ "precalculus_review_acc": Field("Precalc Acc", "rate", displayed_by_default=False, tags=["subjects"]),
196
+ "precalculus_review_text_acc": Field("Precalc Text Acc", "rate", displayed_by_default=False, tags=["text"]),
197
  "precalculus_review_visual_acc": Field(
198
+ "Precalc Visual Acc", "rate", displayed_by_default=False, tags=["visual"]
199
  ),
200
+ "sequences_series_acc": Field("Seq & Series Acc", "rate", displayed_by_default=False, tags=["subjects"]),
201
+ "sequences_series_text_acc": Field("Seq & Series Text Acc", "rate", displayed_by_default=False, tags=["text"]),
202
  "sequences_series_visual_acc": Field(
203
+ "Seq & Series Visual Acc", "rate", displayed_by_default=False, tags=["visual"]
204
  ),
205
  }
206
 
 
208
  "rank": Field("Rank", "number", never_hidden=True),
209
  **MODEL_COLUMNS_DICT,
210
  "extract_model_name": Field("Extract Model Name", "markdown", displayed_by_default=False),
211
+ "mu_math_f1": Field("μ-MATH F1", "rate", never_hidden=True, tags=["mu_math", "splits"]),
212
+ "mu_math_tpr": Field("μ-MATH TPR", "rate", displayed_by_default=False, tags=["mu_math"]),
213
+ "mu_math_tnr": Field("μ-MATH TNR", "rate", displayed_by_default=False, tags=["mu_math"]),
214
+ "mu_math_ppv": Field("μ-MATH PPV", "rate", displayed_by_default=False, tags=["mu_math"]),
215
+ "mu_math_npv": Field("μ-MATH NPV", "rate", displayed_by_default=False, tags=["mu_math"]),
216
+ "GPT-4o_f1": Field("GPT-4o Subset F1", "rate", tags=["splits"]),
217
+ "GPT-4o_tpr": Field("GPT-4o Subset TPR", "rate", displayed_by_default=False),
218
+ "GPT-4o_tnr": Field("GPT-4o Subset TNR", "rate", displayed_by_default=False),
219
+ "GPT-4o_ppv": Field("GPT-4o Subset PPV", "rate", displayed_by_default=False),
220
+ "GPT-4o_npv": Field("GPT-4o Subset NPV", "rate", displayed_by_default=False),
221
+ "Gemini-1.5-Pro_f1": Field("Gemini-1.5-Pro Subset F1", "rate", tags=["splits"]),
222
+ "Gemini-1.5-Pro_tpr": Field("Gemini-1.5-Pro Subset TPR", "rate", displayed_by_default=False),
223
+ "Gemini-1.5-Pro_tnr": Field("Gemini-1.5-Pro Subset TNR", "rate", displayed_by_default=False),
224
+ "Gemini-1.5-Pro_ppv": Field("Gemini-1.5-Pro Subset PPV", "rate", displayed_by_default=False),
225
+ "Gemini-1.5-Pro_npv": Field("Gemini-1.5-Pro Subset NPV", "rate", displayed_by_default=False),
226
+ "Llama-3.1-70B-Instruct_f1": Field("Llama-3.1-70B Subset F1", "rate", tags=["splits"]),
227
+ "Llama-3.1-70B-Instruct_tpr": Field("Llama-3.1-70B Subset TPR", "rate", displayed_by_default=False),
228
+ "Llama-3.1-70B-Instruct_tnr": Field("Llama-3.1-70B Subset TNR", "rate", displayed_by_default=False),
229
+ "Llama-3.1-70B-Instruct_ppv": Field("Llama-3.1-70B Subset PPV", "rate", displayed_by_default=False),
230
+ "Llama-3.1-70B-Instruct_npv": Field("Llama-3.1-70B Subset NPV", "rate", displayed_by_default=False),
231
+ "Qwen2.5-72B-Instruct_f1": Field("Qwen2.5-72B Subset F1", "rate", tags=["splits"]),
232
+ "Qwen2.5-72B-Instruct_tpr": Field("Qwen2.5-72B Subset TPR", "rate", displayed_by_default=False),
233
+ "Qwen2.5-72B-Instruct_tnr": Field("Qwen2.5-72B Subset TNR", "rate", displayed_by_default=False),
234
+ "Qwen2.5-72B-Instruct_ppv": Field("Qwen2.5-72B Subset PPV", "rate", displayed_by_default=False),
235
+ "Qwen2.5-72B-Instruct_npv": Field("Qwen2.5-72B Subset NPV", "rate", displayed_by_default=False),
236
  }
237
  U_MATH_AND_MU_MATH_COLUMNS_DICT = {
238
  "u_math_rank": Field("U-MATH Rank", "number", never_hidden=True),
239
  "mu_math_rank": Field("μ-MATH Rank", "number", never_hidden=True),
240
  **MODEL_COLUMNS_DICT,
241
+ "u_math_acc": Field("U-MATH Acc", "rate", tags=["main", "u_math", "mu_math"]),
242
+ "u_math_text_acc": Field("U-MATH Text Acc", "rate", displayed_by_default=False, tags=["u_math"]),
243
+ "u_math_visual_acc": Field("U-MATH Visual Acc", "rate", displayed_by_default=False, tags=["u_math"]),
244
  "judge_model_name": Field("Judge Model Name", "markdown", displayed_by_default=False),
245
  "extract_model_name": Field("Extract Model Name", "markdown", displayed_by_default=False),
246
+ "mu_math_f1": Field("μ-MATH F1", "rate", tags=["main", "u_math", "mu_math"]),
247
+ "mu_math_tpr": Field("μ-MATH TPR", "rate", displayed_by_default=False, tags=["mu_math"]),
248
+ "mu_math_tnr": Field("μ-MATH TNR", "rate", displayed_by_default=False, tags=["mu_math"]),
249
+ "mu_math_ppv": Field("μ-MATH PPV", "rate", displayed_by_default=False, tags=["mu_math"]),
250
+ "mu_math_npv": Field("μ-MATH NPV", "rate", displayed_by_default=False, tags=["mu_math"]),
251
  }
252
 
253
 
 
314
  df_meta = get_model_meta_info_df(df["full_model_name"].unique())
315
  df = pd.merge(df, df_meta, on=["full_model_name"], how="left")
316
 
317
+ # resolve rate columns to percent
318
+ for col in df.columns:
319
+ if U_MATH_COLUMNS_DICT[col].column_type == "rate":
320
+ if all(df[col] <= 1):
321
+ df[col] = (df[col] * 100).round(PERCENT_ROUND_DIGITS)
322
+ elif any(df[col] > 1) and all(df[col] <= 100):
323
+ df[col] = df[col].round(PERCENT_ROUND_DIGITS)
324
+ else:
325
+ raise ValueError(f"Column {col} has values {df[col]} that are not in [0, 1] or [0, 100]")
326
+
327
  # convert to pretty names and sort columns by order in dict
328
  if use_pretty_names:
329
  df = df[U_MATH_COLUMNS_DICT.keys()]
 
369
  df_meta = get_model_meta_info_df(df["full_model_name"].unique())
370
  df = pd.merge(df, df_meta, on=["full_model_name"], how="left")
371
 
372
+ # resolve rate columns to percent
373
+ for col in df.columns:
374
+ if MU_MATH_COLUMNS_DICT[col].column_type == "rate":
375
+ if all(df[col] <= 1):
376
+ df[col] = (df[col] * 100).round(2)
377
+ elif any(df[col] > 1) and all(df[col] <= 100):
378
+ df[col] = df[col].round(2)
379
+ else:
380
+ raise ValueError(f"Column {col} has values {df[col]} that are not in [0, 1] or [0, 100]")
381
+
382
  # convert to pretty names and sort columns by order in dict
383
  if use_pretty_names:
384
  df = df[MU_MATH_COLUMNS_DICT.keys()]