Muennighoff commited on
Commit
5c44cd5
·
1 Parent(s): f6fa33e
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. 2b855b18bc4seed1/evaluation/rankeval/2b855b18bc4seed1_0.json +87 -0
  2. 2b855b18bc4seed1/evaluation/rankeval/2b855b18bc4seed1_0_lm-eval_global_step52452_2023-02-13-10-25-20_0shots_backup.json +87 -0
  3. 2b855b18bc4seed1/evaluation/rankeval/2b855b18bc4seed1_1.json +87 -0
  4. 2b855b18bc4seed1/evaluation/rankeval/2b855b18bc4seed1_1_lm-eval_global_step52452_2023-02-13-10-25-19_1shots_backup.json +87 -0
  5. 2b855b18bc4seed1/evaluation/rankeval/2b855b18bc4seed1_2.json +87 -0
  6. 2b855b18bc4seed1/evaluation/rankeval/2b855b18bc4seed1_2_lm-eval_global_step52452_2023-02-13-10-25-19_2shots_backup.json +87 -0
  7. 2b855b18bc4seed1/evaluation/rankeval/2b855b18bc4seed1_3.json +87 -0
  8. 2b855b18bc4seed1/evaluation/rankeval/2b855b18bc4seed1_3_lm-eval_global_step52452_2023-02-13-10-25-19_3shots_backup.json +87 -0
  9. 2b855b18bc4seed1/evaluation/rankeval/2b855b18bc4seed1_4.json +87 -0
  10. 2b855b18bc4seed1/evaluation/rankeval/2b855b18bc4seed1_4_lm-eval_global_step52452_2023-02-13-10-25-19_4shots_backup.json +87 -0
  11. 2b855b18bc4seed1/evaluation/rankeval/2b855b18bc4seed1_5.json +87 -0
  12. 2b855b18bc4seed1/evaluation/rankeval/2b855b18bc4seed1_5_lm-eval_global_step52452_2023-02-13-10-25-19_5shots_backup.json +87 -0
  13. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  14. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_100_mp_rank_00_optim_states.pt +3 -0
  15. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_101_mp_rank_00_optim_states.pt +3 -0
  16. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_102_mp_rank_00_optim_states.pt +3 -0
  17. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_103_mp_rank_00_optim_states.pt +3 -0
  18. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_104_mp_rank_00_optim_states.pt +3 -0
  19. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_105_mp_rank_00_optim_states.pt +3 -0
  20. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_106_mp_rank_00_optim_states.pt +3 -0
  21. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_107_mp_rank_00_optim_states.pt +3 -0
  22. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_108_mp_rank_00_optim_states.pt +3 -0
  23. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_109_mp_rank_00_optim_states.pt +3 -0
  24. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +3 -0
  25. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_110_mp_rank_00_optim_states.pt +3 -0
  26. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_111_mp_rank_00_optim_states.pt +3 -0
  27. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_112_mp_rank_00_optim_states.pt +3 -0
  28. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_113_mp_rank_00_optim_states.pt +3 -0
  29. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_114_mp_rank_00_optim_states.pt +3 -0
  30. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_115_mp_rank_00_optim_states.pt +3 -0
  31. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_116_mp_rank_00_optim_states.pt +3 -0
  32. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_117_mp_rank_00_optim_states.pt +3 -0
  33. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_118_mp_rank_00_optim_states.pt +3 -0
  34. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_119_mp_rank_00_optim_states.pt +3 -0
  35. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +3 -0
  36. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_120_mp_rank_00_optim_states.pt +3 -0
  37. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_121_mp_rank_00_optim_states.pt +3 -0
  38. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_122_mp_rank_00_optim_states.pt +3 -0
  39. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_123_mp_rank_00_optim_states.pt +3 -0
  40. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_124_mp_rank_00_optim_states.pt +3 -0
  41. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_125_mp_rank_00_optim_states.pt +3 -0
  42. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_126_mp_rank_00_optim_states.pt +3 -0
  43. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_127_mp_rank_00_optim_states.pt +3 -0
  44. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_128_mp_rank_00_optim_states.pt +3 -0
  45. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_129_mp_rank_00_optim_states.pt +3 -0
  46. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +3 -0
  47. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_130_mp_rank_00_optim_states.pt +3 -0
  48. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_131_mp_rank_00_optim_states.pt +3 -0
  49. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_132_mp_rank_00_optim_states.pt +3 -0
  50. 2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_133_mp_rank_00_optim_states.pt +3 -0
2b855b18bc4seed1/evaluation/rankeval/2b855b18bc4seed1_0.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.336,
5
+ "acc_stderr": 0.014944140233795023
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.334,
9
+ "acc_stderr": 0.014922019523732963
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.34,
13
+ "acc_stderr": 0.013680495725767787
14
+ },
15
+ "cb": {
16
+ "acc": 0.48214285714285715,
17
+ "acc_stderr": 0.0673769750864465,
18
+ "f1": 0.30810810810810807
19
+ },
20
+ "copa": {
21
+ "acc": 0.77,
22
+ "acc_stderr": 0.04229525846816506
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4366660027882892,
26
+ "acc_stderr": 0.0049495895676788925,
27
+ "acc_norm": 0.5616411073491336,
28
+ "acc_norm_stderr": 0.00495171762200798
29
+ },
30
+ "rte": {
31
+ "acc": 0.5379061371841155,
32
+ "acc_stderr": 0.030009848912529117
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5611681136543015,
36
+ "acc_stderr": 0.013946933444507034
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.6916087653661144,
40
+ "acc_stderr": 0.0106797344454878
41
+ },
42
+ "boolq": {
43
+ "acc": 0.6122324159021407,
44
+ "acc_stderr": 0.0085219003280139
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.5627104377104377,
48
+ "acc_stderr": 0.010178768429321602,
49
+ "acc_norm": 0.5088383838383839,
50
+ "acc_norm_stderr": 0.010258180468004831
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.2568259385665529,
54
+ "acc_stderr": 0.0127669237941168,
55
+ "acc_norm": 0.27303754266211605,
56
+ "acc_norm_stderr": 0.01301933276263575
57
+ },
58
+ "sciq": {
59
+ "acc": 0.799,
60
+ "acc_stderr": 0.012679107214617324,
61
+ "acc_norm": 0.735,
62
+ "acc_norm_stderr": 0.013963164754809953
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7437431991294886,
66
+ "acc_stderr": 0.010185787831565062,
67
+ "acc_norm": 0.7459194776931447,
68
+ "acc_norm_stderr": 0.010157271999135053
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
2b855b18bc4seed1/evaluation/rankeval/2b855b18bc4seed1_0_lm-eval_global_step52452_2023-02-13-10-25-20_0shots_backup.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.336,
5
+ "acc_stderr": 0.014944140233795023
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.334,
9
+ "acc_stderr": 0.014922019523732963
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.34,
13
+ "acc_stderr": 0.013680495725767787
14
+ },
15
+ "cb": {
16
+ "acc": 0.48214285714285715,
17
+ "acc_stderr": 0.0673769750864465,
18
+ "f1": 0.30810810810810807
19
+ },
20
+ "copa": {
21
+ "acc": 0.77,
22
+ "acc_stderr": 0.04229525846816506
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4366660027882892,
26
+ "acc_stderr": 0.0049495895676788925,
27
+ "acc_norm": 0.5616411073491336,
28
+ "acc_norm_stderr": 0.00495171762200798
29
+ },
30
+ "rte": {
31
+ "acc": 0.5379061371841155,
32
+ "acc_stderr": 0.030009848912529117
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5611681136543015,
36
+ "acc_stderr": 0.013946933444507034
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.6916087653661144,
40
+ "acc_stderr": 0.0106797344454878
41
+ },
42
+ "boolq": {
43
+ "acc": 0.6122324159021407,
44
+ "acc_stderr": 0.0085219003280139
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.5627104377104377,
48
+ "acc_stderr": 0.010178768429321602,
49
+ "acc_norm": 0.5088383838383839,
50
+ "acc_norm_stderr": 0.010258180468004831
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.2568259385665529,
54
+ "acc_stderr": 0.0127669237941168,
55
+ "acc_norm": 0.27303754266211605,
56
+ "acc_norm_stderr": 0.01301933276263575
57
+ },
58
+ "sciq": {
59
+ "acc": 0.799,
60
+ "acc_stderr": 0.012679107214617324,
61
+ "acc_norm": 0.735,
62
+ "acc_norm_stderr": 0.013963164754809953
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7437431991294886,
66
+ "acc_stderr": 0.010185787831565062,
67
+ "acc_norm": 0.7459194776931447,
68
+ "acc_norm_stderr": 0.010157271999135053
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
2b855b18bc4seed1/evaluation/rankeval/2b855b18bc4seed1_1.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.343,
5
+ "acc_stderr": 0.015019206922356951
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.331,
9
+ "acc_stderr": 0.014888272588203938
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.35083333333333333,
13
+ "acc_stderr": 0.013782212417178197
14
+ },
15
+ "cb": {
16
+ "acc": 0.44642857142857145,
17
+ "acc_stderr": 0.06703189227942397,
18
+ "f1": 0.3134878193701723
19
+ },
20
+ "copa": {
21
+ "acc": 0.72,
22
+ "acc_stderr": 0.04512608598542127
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4342760406293567,
26
+ "acc_stderr": 0.0049464854665446254,
27
+ "acc_norm": 0.5609440350527783,
28
+ "acc_norm_stderr": 0.0049525768633152155
29
+ },
30
+ "rte": {
31
+ "acc": 0.5523465703971119,
32
+ "acc_stderr": 0.029931070362939526
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5666929755327546,
36
+ "acc_stderr": 0.013926915052757347
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.6835916622127205,
40
+ "acc_stderr": 0.010754780097940887
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5712538226299694,
44
+ "acc_stderr": 0.008655800332760226
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.5812289562289562,
48
+ "acc_stderr": 0.010123487160167808,
49
+ "acc_norm": 0.5437710437710438,
50
+ "acc_norm_stderr": 0.010220394383722024
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.26109215017064846,
54
+ "acc_stderr": 0.012835523909473841,
55
+ "acc_norm": 0.28668941979522183,
56
+ "acc_norm_stderr": 0.013214986329274783
57
+ },
58
+ "sciq": {
59
+ "acc": 0.871,
60
+ "acc_stderr": 0.010605256784796563,
61
+ "acc_norm": 0.836,
62
+ "acc_norm_stderr": 0.011715000693181325
63
+ },
64
+ "piqa": {
65
+ "acc": 0.735582154515778,
66
+ "acc_stderr": 0.010289787244767172,
67
+ "acc_norm": 0.7301414581066377,
68
+ "acc_norm_stderr": 0.010356595421852195
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
2b855b18bc4seed1/evaluation/rankeval/2b855b18bc4seed1_1_lm-eval_global_step52452_2023-02-13-10-25-19_1shots_backup.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.343,
5
+ "acc_stderr": 0.015019206922356951
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.331,
9
+ "acc_stderr": 0.014888272588203938
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.35083333333333333,
13
+ "acc_stderr": 0.013782212417178197
14
+ },
15
+ "cb": {
16
+ "acc": 0.44642857142857145,
17
+ "acc_stderr": 0.06703189227942397,
18
+ "f1": 0.3134878193701723
19
+ },
20
+ "copa": {
21
+ "acc": 0.72,
22
+ "acc_stderr": 0.04512608598542127
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4342760406293567,
26
+ "acc_stderr": 0.0049464854665446254,
27
+ "acc_norm": 0.5609440350527783,
28
+ "acc_norm_stderr": 0.0049525768633152155
29
+ },
30
+ "rte": {
31
+ "acc": 0.5523465703971119,
32
+ "acc_stderr": 0.029931070362939526
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5666929755327546,
36
+ "acc_stderr": 0.013926915052757347
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.6835916622127205,
40
+ "acc_stderr": 0.010754780097940887
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5712538226299694,
44
+ "acc_stderr": 0.008655800332760226
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.5812289562289562,
48
+ "acc_stderr": 0.010123487160167808,
49
+ "acc_norm": 0.5437710437710438,
50
+ "acc_norm_stderr": 0.010220394383722024
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.26109215017064846,
54
+ "acc_stderr": 0.012835523909473841,
55
+ "acc_norm": 0.28668941979522183,
56
+ "acc_norm_stderr": 0.013214986329274783
57
+ },
58
+ "sciq": {
59
+ "acc": 0.871,
60
+ "acc_stderr": 0.010605256784796563,
61
+ "acc_norm": 0.836,
62
+ "acc_norm_stderr": 0.011715000693181325
63
+ },
64
+ "piqa": {
65
+ "acc": 0.735582154515778,
66
+ "acc_stderr": 0.010289787244767172,
67
+ "acc_norm": 0.7301414581066377,
68
+ "acc_norm_stderr": 0.010356595421852195
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
2b855b18bc4seed1/evaluation/rankeval/2b855b18bc4seed1_2.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.339,
5
+ "acc_stderr": 0.014976758771620347
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.338,
9
+ "acc_stderr": 0.01496596071022448
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3358333333333333,
13
+ "acc_stderr": 0.013639261190932877
14
+ },
15
+ "cb": {
16
+ "acc": 0.3392857142857143,
17
+ "acc_stderr": 0.06384226561930825,
18
+ "f1": 0.24689440993788817
19
+ },
20
+ "copa": {
21
+ "acc": 0.73,
22
+ "acc_stderr": 0.0446196043338474
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4325831507667795,
26
+ "acc_stderr": 0.004944215937021391,
27
+ "acc_norm": 0.5642302330213105,
28
+ "acc_norm_stderr": 0.004948439229523912
29
+ },
30
+ "rte": {
31
+ "acc": 0.5234657039711191,
32
+ "acc_stderr": 0.03006330041190266
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5611681136543015,
36
+ "acc_stderr": 0.013946933444507032
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.6846606092998396,
40
+ "acc_stderr": 0.010744989116260668
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5651376146788991,
44
+ "acc_stderr": 0.00867052847184156
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.5938552188552189,
48
+ "acc_stderr": 0.010077409815364057,
49
+ "acc_norm": 0.5749158249158249,
50
+ "acc_norm_stderr": 0.010143966195717845
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.2696245733788396,
54
+ "acc_stderr": 0.012968040686869143,
55
+ "acc_norm": 0.28668941979522183,
56
+ "acc_norm_stderr": 0.013214986329274776
57
+ },
58
+ "sciq": {
59
+ "acc": 0.886,
60
+ "acc_stderr": 0.010055103435823332,
61
+ "acc_norm": 0.864,
62
+ "acc_norm_stderr": 0.010845350230472988
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7415669205658324,
66
+ "acc_stderr": 0.010213971636773313,
67
+ "acc_norm": 0.7328618063112078,
68
+ "acc_norm_stderr": 0.010323440492612418
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
2b855b18bc4seed1/evaluation/rankeval/2b855b18bc4seed1_2_lm-eval_global_step52452_2023-02-13-10-25-19_2shots_backup.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.339,
5
+ "acc_stderr": 0.014976758771620347
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.338,
9
+ "acc_stderr": 0.01496596071022448
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3358333333333333,
13
+ "acc_stderr": 0.013639261190932877
14
+ },
15
+ "cb": {
16
+ "acc": 0.3392857142857143,
17
+ "acc_stderr": 0.06384226561930825,
18
+ "f1": 0.24689440993788817
19
+ },
20
+ "copa": {
21
+ "acc": 0.73,
22
+ "acc_stderr": 0.0446196043338474
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4325831507667795,
26
+ "acc_stderr": 0.004944215937021391,
27
+ "acc_norm": 0.5642302330213105,
28
+ "acc_norm_stderr": 0.004948439229523912
29
+ },
30
+ "rte": {
31
+ "acc": 0.5234657039711191,
32
+ "acc_stderr": 0.03006330041190266
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5611681136543015,
36
+ "acc_stderr": 0.013946933444507032
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.6846606092998396,
40
+ "acc_stderr": 0.010744989116260668
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5651376146788991,
44
+ "acc_stderr": 0.00867052847184156
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.5938552188552189,
48
+ "acc_stderr": 0.010077409815364057,
49
+ "acc_norm": 0.5749158249158249,
50
+ "acc_norm_stderr": 0.010143966195717845
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.2696245733788396,
54
+ "acc_stderr": 0.012968040686869143,
55
+ "acc_norm": 0.28668941979522183,
56
+ "acc_norm_stderr": 0.013214986329274776
57
+ },
58
+ "sciq": {
59
+ "acc": 0.886,
60
+ "acc_stderr": 0.010055103435823332,
61
+ "acc_norm": 0.864,
62
+ "acc_norm_stderr": 0.010845350230472988
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7415669205658324,
66
+ "acc_stderr": 0.010213971636773313,
67
+ "acc_norm": 0.7328618063112078,
68
+ "acc_norm_stderr": 0.010323440492612418
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
2b855b18bc4seed1/evaluation/rankeval/2b855b18bc4seed1_3.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.33,
5
+ "acc_stderr": 0.014876872027456732
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.355,
9
+ "acc_stderr": 0.01513949154378053
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3283333333333333,
13
+ "acc_stderr": 0.01356203291952902
14
+ },
15
+ "cb": {
16
+ "acc": 0.375,
17
+ "acc_stderr": 0.06527912098338669,
18
+ "f1": 0.33040639423618146
19
+ },
20
+ "copa": {
21
+ "acc": 0.76,
22
+ "acc_stderr": 0.04292346959909283
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.43178649671380204,
26
+ "acc_stderr": 0.004943127583290518,
27
+ "acc_norm": 0.5643298147779326,
28
+ "acc_norm_stderr": 0.0049483103997460835
29
+ },
30
+ "rte": {
31
+ "acc": 0.5703971119133574,
32
+ "acc_stderr": 0.02979666882912467
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5414364640883977,
36
+ "acc_stderr": 0.014004146853791902
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.6910742918225548,
40
+ "acc_stderr": 0.010684853966268454
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5477064220183486,
44
+ "acc_stderr": 0.008705158179072315
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.5883838383838383,
48
+ "acc_stderr": 0.01009821864671491,
49
+ "acc_norm": 0.5787037037037037,
50
+ "acc_norm_stderr": 0.010131882498193127
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.27303754266211605,
54
+ "acc_stderr": 0.013019332762635743,
55
+ "acc_norm": 0.28071672354948807,
56
+ "acc_norm_stderr": 0.013131238126975576
57
+ },
58
+ "sciq": {
59
+ "acc": 0.881,
60
+ "acc_stderr": 0.010244215145336664,
61
+ "acc_norm": 0.876,
62
+ "acc_norm_stderr": 0.010427498872343961
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7388465723612623,
66
+ "acc_stderr": 0.01024873864993558,
67
+ "acc_norm": 0.7393906420021763,
68
+ "acc_norm_stderr": 0.01024182615581163
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
2b855b18bc4seed1/evaluation/rankeval/2b855b18bc4seed1_3_lm-eval_global_step52452_2023-02-13-10-25-19_3shots_backup.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.33,
5
+ "acc_stderr": 0.014876872027456732
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.355,
9
+ "acc_stderr": 0.01513949154378053
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3283333333333333,
13
+ "acc_stderr": 0.01356203291952902
14
+ },
15
+ "cb": {
16
+ "acc": 0.375,
17
+ "acc_stderr": 0.06527912098338669,
18
+ "f1": 0.33040639423618146
19
+ },
20
+ "copa": {
21
+ "acc": 0.76,
22
+ "acc_stderr": 0.04292346959909283
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.43178649671380204,
26
+ "acc_stderr": 0.004943127583290518,
27
+ "acc_norm": 0.5643298147779326,
28
+ "acc_norm_stderr": 0.0049483103997460835
29
+ },
30
+ "rte": {
31
+ "acc": 0.5703971119133574,
32
+ "acc_stderr": 0.02979666882912467
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5414364640883977,
36
+ "acc_stderr": 0.014004146853791902
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.6910742918225548,
40
+ "acc_stderr": 0.010684853966268454
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5477064220183486,
44
+ "acc_stderr": 0.008705158179072315
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.5883838383838383,
48
+ "acc_stderr": 0.01009821864671491,
49
+ "acc_norm": 0.5787037037037037,
50
+ "acc_norm_stderr": 0.010131882498193127
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.27303754266211605,
54
+ "acc_stderr": 0.013019332762635743,
55
+ "acc_norm": 0.28071672354948807,
56
+ "acc_norm_stderr": 0.013131238126975576
57
+ },
58
+ "sciq": {
59
+ "acc": 0.881,
60
+ "acc_stderr": 0.010244215145336664,
61
+ "acc_norm": 0.876,
62
+ "acc_norm_stderr": 0.010427498872343961
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7388465723612623,
66
+ "acc_stderr": 0.01024873864993558,
67
+ "acc_norm": 0.7393906420021763,
68
+ "acc_norm_stderr": 0.01024182615581163
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
2b855b18bc4seed1/evaluation/rankeval/2b855b18bc4seed1_4.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.34,
5
+ "acc_stderr": 0.014987482264363937
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.338,
9
+ "acc_stderr": 0.014965960710224489
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.33916666666666667,
13
+ "acc_stderr": 0.013672343491681815
14
+ },
15
+ "cb": {
16
+ "acc": 0.375,
17
+ "acc_stderr": 0.06527912098338669,
18
+ "f1": 0.2631016042780749
19
+ },
20
+ "copa": {
21
+ "acc": 0.72,
22
+ "acc_stderr": 0.04512608598542127
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4298944433379805,
26
+ "acc_stderr": 0.004940490508240648,
27
+ "acc_norm": 0.5660227046405099,
28
+ "acc_norm_stderr": 0.004946089230153028
29
+ },
30
+ "rte": {
31
+ "acc": 0.51985559566787,
32
+ "acc_stderr": 0.030072723167317184
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5461720599842147,
36
+ "acc_stderr": 0.013992441563707068
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.6953500801710315,
40
+ "acc_stderr": 0.0106434269886468
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5314984709480123,
44
+ "acc_stderr": 0.00872768484861531
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.5980639730639731,
48
+ "acc_stderr": 0.010060521220920566,
49
+ "acc_norm": 0.5778619528619529,
50
+ "acc_norm_stderr": 0.01013462052459227
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.26109215017064846,
54
+ "acc_stderr": 0.012835523909473841,
55
+ "acc_norm": 0.2909556313993174,
56
+ "acc_norm_stderr": 0.013273077865907595
57
+ },
58
+ "sciq": {
59
+ "acc": 0.896,
60
+ "acc_stderr": 0.009658016218524293,
61
+ "acc_norm": 0.873,
62
+ "acc_norm_stderr": 0.010534798620855745
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7388465723612623,
66
+ "acc_stderr": 0.010248738649935573,
67
+ "acc_norm": 0.7442872687704026,
68
+ "acc_norm_stderr": 0.010178690109459872
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
2b855b18bc4seed1/evaluation/rankeval/2b855b18bc4seed1_4_lm-eval_global_step52452_2023-02-13-10-25-19_4shots_backup.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.34,
5
+ "acc_stderr": 0.014987482264363937
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.338,
9
+ "acc_stderr": 0.014965960710224489
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.33916666666666667,
13
+ "acc_stderr": 0.013672343491681815
14
+ },
15
+ "cb": {
16
+ "acc": 0.375,
17
+ "acc_stderr": 0.06527912098338669,
18
+ "f1": 0.2631016042780749
19
+ },
20
+ "copa": {
21
+ "acc": 0.72,
22
+ "acc_stderr": 0.04512608598542127
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4298944433379805,
26
+ "acc_stderr": 0.004940490508240648,
27
+ "acc_norm": 0.5660227046405099,
28
+ "acc_norm_stderr": 0.004946089230153028
29
+ },
30
+ "rte": {
31
+ "acc": 0.51985559566787,
32
+ "acc_stderr": 0.030072723167317184
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5461720599842147,
36
+ "acc_stderr": 0.013992441563707068
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.6953500801710315,
40
+ "acc_stderr": 0.0106434269886468
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5314984709480123,
44
+ "acc_stderr": 0.00872768484861531
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.5980639730639731,
48
+ "acc_stderr": 0.010060521220920566,
49
+ "acc_norm": 0.5778619528619529,
50
+ "acc_norm_stderr": 0.01013462052459227
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.26109215017064846,
54
+ "acc_stderr": 0.012835523909473841,
55
+ "acc_norm": 0.2909556313993174,
56
+ "acc_norm_stderr": 0.013273077865907595
57
+ },
58
+ "sciq": {
59
+ "acc": 0.896,
60
+ "acc_stderr": 0.009658016218524293,
61
+ "acc_norm": 0.873,
62
+ "acc_norm_stderr": 0.010534798620855745
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7388465723612623,
66
+ "acc_stderr": 0.010248738649935573,
67
+ "acc_norm": 0.7442872687704026,
68
+ "acc_norm_stderr": 0.010178690109459872
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
2b855b18bc4seed1/evaluation/rankeval/2b855b18bc4seed1_5.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.317,
5
+ "acc_stderr": 0.014721675438880229
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.362,
9
+ "acc_stderr": 0.0152048409129195
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.34,
13
+ "acc_stderr": 0.013680495725767797
14
+ },
15
+ "cb": {
16
+ "acc": 0.44642857142857145,
17
+ "acc_stderr": 0.06703189227942398,
18
+ "f1": 0.31333333333333335
19
+ },
20
+ "copa": {
21
+ "acc": 0.72,
22
+ "acc_stderr": 0.045126085985421276
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.43218482374029077,
26
+ "acc_stderr": 0.004943673388276271,
27
+ "acc_norm": 0.5679147580163314,
28
+ "acc_norm_stderr": 0.004943537242344413
29
+ },
30
+ "rte": {
31
+ "acc": 0.5848375451263538,
32
+ "acc_stderr": 0.02966006629089348
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.55327545382794,
36
+ "acc_stderr": 0.0139724883716167
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.692143238909674,
40
+ "acc_stderr": 0.010674598158758175
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5262996941896024,
44
+ "acc_stderr": 0.008732949144494798
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.5934343434343434,
48
+ "acc_stderr": 0.010079056419223523,
49
+ "acc_norm": 0.5803872053872053,
50
+ "acc_norm_stderr": 0.010126315840891536
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.26706484641638223,
54
+ "acc_stderr": 0.012928933196496356,
55
+ "acc_norm": 0.2841296928327645,
56
+ "acc_norm_stderr": 0.013179442447653887
57
+ },
58
+ "sciq": {
59
+ "acc": 0.899,
60
+ "acc_stderr": 0.009533618929340997,
61
+ "acc_norm": 0.888,
62
+ "acc_norm_stderr": 0.00997775303139725
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7295973884657236,
66
+ "acc_stderr": 0.010363167031620789,
67
+ "acc_norm": 0.7388465723612623,
68
+ "acc_norm_stderr": 0.010248738649935592
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
2b855b18bc4seed1/evaluation/rankeval/2b855b18bc4seed1_5_lm-eval_global_step52452_2023-02-13-10-25-19_5shots_backup.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.317,
5
+ "acc_stderr": 0.014721675438880229
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.362,
9
+ "acc_stderr": 0.0152048409129195
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.34,
13
+ "acc_stderr": 0.013680495725767797
14
+ },
15
+ "cb": {
16
+ "acc": 0.44642857142857145,
17
+ "acc_stderr": 0.06703189227942398,
18
+ "f1": 0.31333333333333335
19
+ },
20
+ "copa": {
21
+ "acc": 0.72,
22
+ "acc_stderr": 0.045126085985421276
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.43218482374029077,
26
+ "acc_stderr": 0.004943673388276271,
27
+ "acc_norm": 0.5679147580163314,
28
+ "acc_norm_stderr": 0.004943537242344413
29
+ },
30
+ "rte": {
31
+ "acc": 0.5848375451263538,
32
+ "acc_stderr": 0.02966006629089348
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.55327545382794,
36
+ "acc_stderr": 0.0139724883716167
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.692143238909674,
40
+ "acc_stderr": 0.010674598158758175
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5262996941896024,
44
+ "acc_stderr": 0.008732949144494798
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.5934343434343434,
48
+ "acc_stderr": 0.010079056419223523,
49
+ "acc_norm": 0.5803872053872053,
50
+ "acc_norm_stderr": 0.010126315840891536
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.26706484641638223,
54
+ "acc_stderr": 0.012928933196496356,
55
+ "acc_norm": 0.2841296928327645,
56
+ "acc_norm_stderr": 0.013179442447653887
57
+ },
58
+ "sciq": {
59
+ "acc": 0.899,
60
+ "acc_stderr": 0.009533618929340997,
61
+ "acc_norm": 0.888,
62
+ "acc_norm_stderr": 0.00997775303139725
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7295973884657236,
66
+ "acc_stderr": 0.010363167031620789,
67
+ "acc_norm": 0.7388465723612623,
68
+ "acc_norm_stderr": 0.010248738649935592
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63ecb011d419a12f31bb4cde0f62213bb5beb99f0b434a1e56489ec1986c1bce
3
+ size 131677719
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_100_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bca7fca8a58e1a907e9007af7b81c54849fcb7822dcdd0e61f9a2e7e23840c4
3
+ size 131677805
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_101_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55ab1c34806ed95f85175da5c0259b532d8a11c7bfbb772525be1f619357af05
3
+ size 131677741
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_102_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f54e4b3629a06a8ceedf72fb378779d1f71115700ce95e883dd072122f8a78b7
3
+ size 131677741
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_103_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dee71d54fc8e3529d791bb340a80ac30954426c3298fc3e08d0996b061e98f99
3
+ size 131677741
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_104_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:498b1bb2c78a2e0fc98c660b38d082694758de3c8116cf4735ab9d3d627f3345
3
+ size 131677741
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_105_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6af14f9d37480b3e872fcd790f7a496f364ec67750ec1d70f9d82f538d09115
3
+ size 131677677
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_106_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec8d60e0646e1d155ed631221ef6bd3346c72ba479101104bc4522294cb61856
3
+ size 131677741
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_107_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37d39a960726be274a0f7c19bb3053727627a653bcc4c819ef12bb983bc290b8
3
+ size 131677805
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_108_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14274f62cc967f180a8ebaf19fae3158ef1bbb49f32d56b935dda07a3a28dce8
3
+ size 131677869
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_109_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13bc459a3551c90cd30c4e230f4a058f9ebf049b53a1965b3e3cb462120ee410
3
+ size 131677741
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dee168da90b8e855c77d3b3c267347143d4ba2aca55e831b3d85f4083f74cd2
3
+ size 131677794
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_110_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a5087ad8c8b42d7f0fb54843dded38c408c6e70cee9aa8336dc8b5f50efba8d
3
+ size 131677741
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_111_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47ce9ebc38b1a773630230747a1c253424494dd12f31eb6811906d3152e173cf
3
+ size 131677741
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_112_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95f9a152bd3bcac686b705b4621dbd3e2b29b64bfa8d45fabe2be5da88576759
3
+ size 131677741
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_113_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3692fc302551ea46e7341625a6d3b9c59ef6049cb4879f0b2d3b532259461923
3
+ size 131677805
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_114_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13e80720bed325a6ad3e8f94ee4db78607968b1a834022e8580827c990919451
3
+ size 131677677
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_115_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfbc8767051a9213407eb7773f69a8eaceacd60d2bbf6290f5d09967afaf2e90
3
+ size 131677805
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_116_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6bd9ec928f5aa63b4ae1d623aea92a59d377e5b6f492ed96b8b4a895afaf2e2
3
+ size 131677805
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_117_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:308d4e8952fee55544cf1f1cec913e24c65d336685e0422f8532306ecb440fda
3
+ size 131677677
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_118_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f50f899e367d5f5534d1fa738c3ece6a813bcc5ffbb3e3a3a9f7b83ef5c901a
3
+ size 131677741
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_119_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb3519f4d8d44d9139fe4225961fd2041788c8ed035605172bc5a2d90cf8f876
3
+ size 131677741
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f13da38c37c6a14504e1d4509b6fdd3c72f048e0f8d432b1a8df6dff348d4c1f
3
+ size 131677730
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_120_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fb05c83ae53b22930353fbb1d804dca1b5a4bda372b609f8fe3eef485351890
3
+ size 131677677
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_121_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0d4358d4c54c236434877ef5ba88e8b04622e3e0dd1ae2d5428ce8072e446c2
3
+ size 131677805
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_122_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4610c1ae4610651337ffed54f4e6bd49c3d3002d3e2951379db70544271fe8d
3
+ size 131677741
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_123_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d378fdcefadef7de1a665a1022a32f847428c4d8979b124a228443d9743e0397
3
+ size 131677869
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_124_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e49827769f1322dad114032857dba69106606e183c59743cff099956a182f5e
3
+ size 131677805
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_125_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ab874d4847776f11482e8ae684fee0005ec6d2c8203b42e48e4c32fb68e7159
3
+ size 131677741
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_126_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2694579a0829fb34dd955b0c7fabdcf09d28edbe820ad82e615498174d03eab3
3
+ size 131677805
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_127_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e8261cd98ad6f06e98c6ea0ca5d3cc935f005b1ff1fe7db3421bb729d31859d
3
+ size 131677677
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_128_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a54f165983922b3381f6f99c7d057afe014f39f33620059c1d8dc3679ef4dde
3
+ size 131677741
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_129_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1af342a44d7a933cbfd959eacadb87ae5278e0abfea9427b8840e3436143a697
3
+ size 131677677
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24dbb11294b7b88a9a051b4c677fc0d73ca5883aace40eabde688e77eb6dda09
3
+ size 131677730
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_130_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48de1e8dd6d8f12fce2a7bcdcdb20573671fb9762ce47658b5366aecf0c235c2
3
+ size 131677869
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_131_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31255d103f01d69a0e15e4a898a282818e6363dec93455da7ecc83d57225ed00
3
+ size 131677741
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_132_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d1e5bf0266ef36aa5f8bc45d0d8cca15114ecb5e16e49bd6e51d1154fe9342c
3
+ size 131677741
2b855b18bc4seed1/global_step52452/bf16_zero_pp_rank_133_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5263f6b74b334d12bb748ed1fc2cd1fcec1449af33be86de848339625c81cb90
3
+ size 131677677