Update app.py
Browse files
app.py
CHANGED
|
@@ -10,7 +10,7 @@ DESCRIPTION = f"""
|
|
| 10 |
Evaluation of open-r1 models across a diverse range of benchmarks from [LightEval](https://github.com/huggingface/lighteval). All scores are reported as accuracy.
|
| 11 |
"""
|
| 12 |
|
| 13 |
-
BENCHMARKS_TO_SKIP = ["math", "mini_math", "aimo_math_integer_lvl4-5", "mini_math_v2"]
|
| 14 |
|
| 15 |
|
| 16 |
def get_leaderboard_df():
|
|
|
|
| 10 |
Evaluation of open-r1 models across a diverse range of benchmarks from [LightEval](https://github.com/huggingface/lighteval). All scores are reported as accuracy.
|
| 11 |
"""
|
| 12 |
|
| 13 |
+
BENCHMARKS_TO_SKIP = ["math", "mini_math", "aimo_math_integer_lvl4-5", "mini_math_v2", "aime25:part1", "aime25_part1", "gpqa"]
|
| 14 |
|
| 15 |
|
| 16 |
def get_leaderboard_df():
|