cogwheelhead commited on
Commit
c0d99c1
·
verified ·
1 Parent(s): 88c7969

fix: rescale gemini2 flash-thinking u-math scores

Browse files
Files changed (1) hide show
  1. data/u_math_eval_results.json +21 -21
data/u_math_eval_results.json CHANGED
@@ -3,39 +3,39 @@
3
  "model_name": "gemini-2.0-flash-thinking-exp-01-21",
4
  "judge_model_name": "gpt-4o-2024-08-06",
5
  "u_math": [
6
- 0.74,
7
- 0.78,
8
- 0.55
9
  ],
10
  "multivariable_calculus": [
11
- 0.74,
12
- 0.74,
13
- 0.75
14
  ],
15
  "integral_calc": [
16
- 0.55,
17
- 0.55,
18
- 0.57
19
  ],
20
  "sequences_series": [
21
- 0.79,
22
- 0.8,
23
- 0.25
24
  ],
25
  "differential_calc": [
26
- 0.62,
27
- 0.69,
28
- 0.47
29
  ],
30
  "algebra": [
31
- 0.87,
32
- 0.93,
33
- 0.57
34
  ],
35
  "precalculus_review": [
36
- 0.93,
37
- 0.95,
38
- 0.5
39
  ]
40
  },
41
  {
 
3
  "model_name": "gemini-2.0-flash-thinking-exp-01-21",
4
  "judge_model_name": "gpt-4o-2024-08-06",
5
  "u_math": [
6
+ 73.64,
7
+ 77.78,
8
+ 55.0
9
  ],
10
  "multivariable_calculus": [
11
+ 74.16,
12
+ 74.0,
13
+ 75.0
14
  ],
15
  "integral_calc": [
16
+ 55.29,
17
+ 54.67,
18
+ 56.9
19
  ],
20
  "sequences_series": [
21
+ 78.57,
22
+ 80.0,
23
+ 25.0
24
  ],
25
  "differential_calc": [
26
+ 62.27,
27
+ 69.33,
28
+ 47.14
29
  ],
30
  "algebra": [
31
+ 87.22,
32
+ 93.33,
33
+ 56.67
34
  ],
35
  "precalculus_review": [
36
+ 92.5,
37
+ 95.33,
38
+ 50.0
39
  ]
40
  },
41
  {