Spaces:
Running
Running
Konstantin Chernyshev
commited on
Commit
·
ea5ca15
1
Parent(s):
f7d4dba
chore: remove ScatterPlot -s
Browse files
app.py
CHANGED
@@ -216,46 +216,46 @@ with demo:
|
|
216 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
217 |
with gr.TabItem("🏆 U-MATH", elem_id="u-math-benchmark-tab-table", id=0):
|
218 |
leaderboard_umath = init_leaderboard(LEADERBOARD_U_MATH_DF, U_MATH_COLUMNS_DICT)
|
219 |
-
gr.ScatterPlot(
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
)
|
229 |
|
230 |
with gr.TabItem("🏅 μ-MATH (Meta-Benchmark)", elem_id="mu-math-benchmark-tab-table", id=1):
|
231 |
leaderboard_mumath = init_leaderboard(LEADERBOARD_MU_MATH_DF, MU_MATH_COLUMNS_DICT)
|
232 |
-
gr.ScatterPlot(
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
)
|
242 |
|
243 |
with gr.TabItem("📊 U-MATH vs μ-MATH", elem_id="u-math-vs-mu-math-tab-table", id=2):
|
244 |
leaderboard_aggregated = init_leaderboard(LEADERBOARD_U_MATH_MU_MATH_JOINED_DF, U_MATH_AND_MU_MATH_COLUMNS_DICT)
|
245 |
-
gr.ScatterPlot(
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
)
|
259 |
|
260 |
with gr.TabItem("📝 About", elem_id="about-tab-table", id=3):
|
261 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
|
|
216 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
217 |
with gr.TabItem("🏆 U-MATH", elem_id="u-math-benchmark-tab-table", id=0):
|
218 |
leaderboard_umath = init_leaderboard(LEADERBOARD_U_MATH_DF, U_MATH_COLUMNS_DICT)
|
219 |
+
# gr.ScatterPlot(
|
220 |
+
# value=LEADERBOARD_U_MATH_DF,
|
221 |
+
# title="U-MATH: Text vs Visual Accuracy",
|
222 |
+
# x=U_MATH_COLUMNS_DICT["u_math_text_acc"].pretty_name,
|
223 |
+
# y=U_MATH_COLUMNS_DICT["u_math_visual_acc"].pretty_name,
|
224 |
+
# color=U_MATH_COLUMNS_DICT["model_family"].pretty_name,
|
225 |
+
# tooltip=[U_MATH_COLUMNS_DICT["full_model_name"].pretty_name, U_MATH_COLUMNS_DICT["u_math_acc"].pretty_name],
|
226 |
+
# elem_classes="scatter-plot",
|
227 |
+
# height=500,
|
228 |
+
# )
|
229 |
|
230 |
with gr.TabItem("🏅 μ-MATH (Meta-Benchmark)", elem_id="mu-math-benchmark-tab-table", id=1):
|
231 |
leaderboard_mumath = init_leaderboard(LEADERBOARD_MU_MATH_DF, MU_MATH_COLUMNS_DICT)
|
232 |
+
# gr.ScatterPlot(
|
233 |
+
# value=LEADERBOARD_MU_MATH_DF,
|
234 |
+
# title="μ-MATH: True Positive Rate (Recall) vs True Negative Rate (Specificity)",
|
235 |
+
# x=MU_MATH_COLUMNS_DICT["mu_math_tpr"].pretty_name,
|
236 |
+
# y=MU_MATH_COLUMNS_DICT["mu_math_tnr"].pretty_name,
|
237 |
+
# color=MU_MATH_COLUMNS_DICT["model_family"].pretty_name,
|
238 |
+
# tooltip=[MU_MATH_COLUMNS_DICT["full_model_name"].pretty_name, MU_MATH_COLUMNS_DICT["mu_math_f1"].pretty_name],
|
239 |
+
# elem_classes="scatter-plot",
|
240 |
+
# height=500,
|
241 |
+
# )
|
242 |
|
243 |
with gr.TabItem("📊 U-MATH vs μ-MATH", elem_id="u-math-vs-mu-math-tab-table", id=2):
|
244 |
leaderboard_aggregated = init_leaderboard(LEADERBOARD_U_MATH_MU_MATH_JOINED_DF, U_MATH_AND_MU_MATH_COLUMNS_DICT)
|
245 |
+
# gr.ScatterPlot(
|
246 |
+
# value=LEADERBOARD_U_MATH_MU_MATH_JOINED_DF,
|
247 |
+
# title="U-MATH Accuracy (Solving) vs μ-MATH F1 Score (Judging)",
|
248 |
+
# x=U_MATH_AND_MU_MATH_COLUMNS_DICT["u_math_acc"].pretty_name,
|
249 |
+
# y=U_MATH_AND_MU_MATH_COLUMNS_DICT["mu_math_f1"].pretty_name,
|
250 |
+
# color=U_MATH_AND_MU_MATH_COLUMNS_DICT["model_family"].pretty_name,
|
251 |
+
# tooltip=[
|
252 |
+
# U_MATH_AND_MU_MATH_COLUMNS_DICT["full_model_name"].pretty_name,
|
253 |
+
# U_MATH_AND_MU_MATH_COLUMNS_DICT["u_math_text_acc"].pretty_name,
|
254 |
+
# U_MATH_AND_MU_MATH_COLUMNS_DICT["u_math_visual_acc"].pretty_name,
|
255 |
+
# ],
|
256 |
+
# elem_classes="scatter-plot",
|
257 |
+
# height=500,
|
258 |
+
# )
|
259 |
|
260 |
with gr.TabItem("📝 About", elem_id="about-tab-table", id=3):
|
261 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|