Commit
·
b9a33e8
1
Parent(s):
f392bed
Add
Browse files- perplexity/evaluation/rankeval/perplexity_4.json +29 -1
- perplexity/evaluation/rankeval/perplexity_4_lm-eval_global_step80108_2023-05-13-09-53-07_4shots_backup.json +29 -1
- perplexity/evaluation/rankeval/perplexity_5.json +34 -1
- perplexity/evaluation/rankeval/perplexity_5_lm-eval_global_step80108_2023-05-13-09-53-07_5shots_backup.json +34 -1
- perplexity25/evaluation/rankeval/perplexity25_4.json +15 -1
- perplexity25/evaluation/rankeval/perplexity25_4_lm-eval_global_step80108_2023-05-13-09-53-07_4shots_backup.json +15 -1
- perplexity25/evaluation/rankeval/perplexity25_5.json +34 -1
- perplexity25/evaluation/rankeval/perplexity25_5_lm-eval_global_step80108_2023-05-13-09-53-07_5shots_backup.json +34 -1
- perplexity50/evaluation/rankeval/perplexity50_5.json +29 -1
- perplexity50/evaluation/rankeval/perplexity50_5_lm-eval_global_step80108_2023-05-13-09-53-07_5shots_backup.json +29 -1
perplexity/evaluation/rankeval/perplexity_4.json
CHANGED
|
@@ -42,6 +42,30 @@
|
|
| 42 |
"boolq": {
|
| 43 |
"acc": 0.6425076452599389,
|
| 44 |
"acc_stderr": 0.008382336069484898
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
}
|
| 46 |
},
|
| 47 |
"versions": {
|
|
@@ -54,6 +78,10 @@
|
|
| 54 |
"rte": 0,
|
| 55 |
"winogrande": 0,
|
| 56 |
"storycloze_2016": 0,
|
| 57 |
-
"boolq": 1
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
}
|
| 59 |
}
|
|
|
|
| 42 |
"boolq": {
|
| 43 |
"acc": 0.6425076452599389,
|
| 44 |
"acc_stderr": 0.008382336069484898
|
| 45 |
+
},
|
| 46 |
+
"arc_easy": {
|
| 47 |
+
"acc": 0.6506734006734006,
|
| 48 |
+
"acc_stderr": 0.009782853449399284,
|
| 49 |
+
"acc_norm": 0.6300505050505051,
|
| 50 |
+
"acc_norm_stderr": 0.009906656266021148
|
| 51 |
+
},
|
| 52 |
+
"arc_challenge": {
|
| 53 |
+
"acc": 0.3165529010238908,
|
| 54 |
+
"acc_stderr": 0.01359243151906808,
|
| 55 |
+
"acc_norm": 0.3378839590443686,
|
| 56 |
+
"acc_norm_stderr": 0.013822047922283509
|
| 57 |
+
},
|
| 58 |
+
"sciq": {
|
| 59 |
+
"acc": 0.935,
|
| 60 |
+
"acc_stderr": 0.007799733061832011,
|
| 61 |
+
"acc_norm": 0.925,
|
| 62 |
+
"acc_norm_stderr": 0.008333333333333364
|
| 63 |
+
},
|
| 64 |
+
"piqa": {
|
| 65 |
+
"acc": 0.7622415669205659,
|
| 66 |
+
"acc_stderr": 0.009932525779525489,
|
| 67 |
+
"acc_norm": 0.779651795429815,
|
| 68 |
+
"acc_norm_stderr": 0.009670535456853148
|
| 69 |
}
|
| 70 |
},
|
| 71 |
"versions": {
|
|
|
|
| 78 |
"rte": 0,
|
| 79 |
"winogrande": 0,
|
| 80 |
"storycloze_2016": 0,
|
| 81 |
+
"boolq": 1,
|
| 82 |
+
"arc_easy": 0,
|
| 83 |
+
"arc_challenge": 0,
|
| 84 |
+
"sciq": 0,
|
| 85 |
+
"piqa": 0
|
| 86 |
}
|
| 87 |
}
|
perplexity/evaluation/rankeval/perplexity_4_lm-eval_global_step80108_2023-05-13-09-53-07_4shots_backup.json
CHANGED
|
@@ -42,6 +42,30 @@
|
|
| 42 |
"boolq": {
|
| 43 |
"acc": 0.6425076452599389,
|
| 44 |
"acc_stderr": 0.008382336069484898
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
}
|
| 46 |
},
|
| 47 |
"versions": {
|
|
@@ -54,6 +78,10 @@
|
|
| 54 |
"rte": 0,
|
| 55 |
"winogrande": 0,
|
| 56 |
"storycloze_2016": 0,
|
| 57 |
-
"boolq": 1
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
}
|
| 59 |
}
|
|
|
|
| 42 |
"boolq": {
|
| 43 |
"acc": 0.6425076452599389,
|
| 44 |
"acc_stderr": 0.008382336069484898
|
| 45 |
+
},
|
| 46 |
+
"arc_easy": {
|
| 47 |
+
"acc": 0.6506734006734006,
|
| 48 |
+
"acc_stderr": 0.009782853449399284,
|
| 49 |
+
"acc_norm": 0.6300505050505051,
|
| 50 |
+
"acc_norm_stderr": 0.009906656266021148
|
| 51 |
+
},
|
| 52 |
+
"arc_challenge": {
|
| 53 |
+
"acc": 0.3165529010238908,
|
| 54 |
+
"acc_stderr": 0.01359243151906808,
|
| 55 |
+
"acc_norm": 0.3378839590443686,
|
| 56 |
+
"acc_norm_stderr": 0.013822047922283509
|
| 57 |
+
},
|
| 58 |
+
"sciq": {
|
| 59 |
+
"acc": 0.935,
|
| 60 |
+
"acc_stderr": 0.007799733061832011,
|
| 61 |
+
"acc_norm": 0.925,
|
| 62 |
+
"acc_norm_stderr": 0.008333333333333364
|
| 63 |
+
},
|
| 64 |
+
"piqa": {
|
| 65 |
+
"acc": 0.7622415669205659,
|
| 66 |
+
"acc_stderr": 0.009932525779525489,
|
| 67 |
+
"acc_norm": 0.779651795429815,
|
| 68 |
+
"acc_norm_stderr": 0.009670535456853148
|
| 69 |
}
|
| 70 |
},
|
| 71 |
"versions": {
|
|
|
|
| 78 |
"rte": 0,
|
| 79 |
"winogrande": 0,
|
| 80 |
"storycloze_2016": 0,
|
| 81 |
+
"boolq": 1,
|
| 82 |
+
"arc_easy": 0,
|
| 83 |
+
"arc_challenge": 0,
|
| 84 |
+
"sciq": 0,
|
| 85 |
+
"piqa": 0
|
| 86 |
}
|
| 87 |
}
|
perplexity/evaluation/rankeval/perplexity_5.json
CHANGED
|
@@ -38,6 +38,34 @@
|
|
| 38 |
"storycloze_2016": {
|
| 39 |
"acc": 0.7477284874398717,
|
| 40 |
"acc_stderr": 0.010043504206387307
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
}
|
| 42 |
},
|
| 43 |
"versions": {
|
|
@@ -49,6 +77,11 @@
|
|
| 49 |
"hellaswag": 0,
|
| 50 |
"rte": 0,
|
| 51 |
"winogrande": 0,
|
| 52 |
-
"storycloze_2016": 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
}
|
| 54 |
}
|
|
|
|
| 38 |
"storycloze_2016": {
|
| 39 |
"acc": 0.7477284874398717,
|
| 40 |
"acc_stderr": 0.010043504206387307
|
| 41 |
+
},
|
| 42 |
+
"boolq": {
|
| 43 |
+
"acc": 0.634862385321101,
|
| 44 |
+
"acc_stderr": 0.008420941009417812
|
| 45 |
+
},
|
| 46 |
+
"arc_easy": {
|
| 47 |
+
"acc": 0.6447811447811448,
|
| 48 |
+
"acc_stderr": 0.009820245899287117,
|
| 49 |
+
"acc_norm": 0.627104377104377,
|
| 50 |
+
"acc_norm_stderr": 0.009922743197129238
|
| 51 |
+
},
|
| 52 |
+
"arc_challenge": {
|
| 53 |
+
"acc": 0.3250853242320819,
|
| 54 |
+
"acc_stderr": 0.013688147309729119,
|
| 55 |
+
"acc_norm": 0.3515358361774744,
|
| 56 |
+
"acc_norm_stderr": 0.013952413699600938
|
| 57 |
+
},
|
| 58 |
+
"sciq": {
|
| 59 |
+
"acc": 0.935,
|
| 60 |
+
"acc_stderr": 0.007799733061832013,
|
| 61 |
+
"acc_norm": 0.933,
|
| 62 |
+
"acc_norm_stderr": 0.007910345983177549
|
| 63 |
+
},
|
| 64 |
+
"piqa": {
|
| 65 |
+
"acc": 0.763873775843308,
|
| 66 |
+
"acc_stderr": 0.009908965890558213,
|
| 67 |
+
"acc_norm": 0.7834602829162133,
|
| 68 |
+
"acc_norm_stderr": 0.009609984714384593
|
| 69 |
}
|
| 70 |
},
|
| 71 |
"versions": {
|
|
|
|
| 77 |
"hellaswag": 0,
|
| 78 |
"rte": 0,
|
| 79 |
"winogrande": 0,
|
| 80 |
+
"storycloze_2016": 0,
|
| 81 |
+
"boolq": 1,
|
| 82 |
+
"arc_easy": 0,
|
| 83 |
+
"arc_challenge": 0,
|
| 84 |
+
"sciq": 0,
|
| 85 |
+
"piqa": 0
|
| 86 |
}
|
| 87 |
}
|
perplexity/evaluation/rankeval/perplexity_5_lm-eval_global_step80108_2023-05-13-09-53-07_5shots_backup.json
CHANGED
|
@@ -38,6 +38,34 @@
|
|
| 38 |
"storycloze_2016": {
|
| 39 |
"acc": 0.7477284874398717,
|
| 40 |
"acc_stderr": 0.010043504206387307
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
}
|
| 42 |
},
|
| 43 |
"versions": {
|
|
@@ -49,6 +77,11 @@
|
|
| 49 |
"hellaswag": 0,
|
| 50 |
"rte": 0,
|
| 51 |
"winogrande": 0,
|
| 52 |
-
"storycloze_2016": 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
}
|
| 54 |
}
|
|
|
|
| 38 |
"storycloze_2016": {
|
| 39 |
"acc": 0.7477284874398717,
|
| 40 |
"acc_stderr": 0.010043504206387307
|
| 41 |
+
},
|
| 42 |
+
"boolq": {
|
| 43 |
+
"acc": 0.634862385321101,
|
| 44 |
+
"acc_stderr": 0.008420941009417812
|
| 45 |
+
},
|
| 46 |
+
"arc_easy": {
|
| 47 |
+
"acc": 0.6447811447811448,
|
| 48 |
+
"acc_stderr": 0.009820245899287117,
|
| 49 |
+
"acc_norm": 0.627104377104377,
|
| 50 |
+
"acc_norm_stderr": 0.009922743197129238
|
| 51 |
+
},
|
| 52 |
+
"arc_challenge": {
|
| 53 |
+
"acc": 0.3250853242320819,
|
| 54 |
+
"acc_stderr": 0.013688147309729119,
|
| 55 |
+
"acc_norm": 0.3515358361774744,
|
| 56 |
+
"acc_norm_stderr": 0.013952413699600938
|
| 57 |
+
},
|
| 58 |
+
"sciq": {
|
| 59 |
+
"acc": 0.935,
|
| 60 |
+
"acc_stderr": 0.007799733061832013,
|
| 61 |
+
"acc_norm": 0.933,
|
| 62 |
+
"acc_norm_stderr": 0.007910345983177549
|
| 63 |
+
},
|
| 64 |
+
"piqa": {
|
| 65 |
+
"acc": 0.763873775843308,
|
| 66 |
+
"acc_stderr": 0.009908965890558213,
|
| 67 |
+
"acc_norm": 0.7834602829162133,
|
| 68 |
+
"acc_norm_stderr": 0.009609984714384593
|
| 69 |
}
|
| 70 |
},
|
| 71 |
"versions": {
|
|
|
|
| 77 |
"hellaswag": 0,
|
| 78 |
"rte": 0,
|
| 79 |
"winogrande": 0,
|
| 80 |
+
"storycloze_2016": 0,
|
| 81 |
+
"boolq": 1,
|
| 82 |
+
"arc_easy": 0,
|
| 83 |
+
"arc_challenge": 0,
|
| 84 |
+
"sciq": 0,
|
| 85 |
+
"piqa": 0
|
| 86 |
}
|
| 87 |
}
|
perplexity25/evaluation/rankeval/perplexity25_4.json
CHANGED
|
@@ -54,6 +54,18 @@
|
|
| 54 |
"acc_stderr": 0.013715847940719346,
|
| 55 |
"acc_norm": 0.3728668941979522,
|
| 56 |
"acc_norm_stderr": 0.014131176760131163
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
}
|
| 58 |
},
|
| 59 |
"versions": {
|
|
@@ -68,6 +80,8 @@
|
|
| 68 |
"storycloze_2016": 0,
|
| 69 |
"boolq": 1,
|
| 70 |
"arc_easy": 0,
|
| 71 |
-
"arc_challenge": 0
|
|
|
|
|
|
|
| 72 |
}
|
| 73 |
}
|
|
|
|
| 54 |
"acc_stderr": 0.013715847940719346,
|
| 55 |
"acc_norm": 0.3728668941979522,
|
| 56 |
"acc_norm_stderr": 0.014131176760131163
|
| 57 |
+
},
|
| 58 |
+
"sciq": {
|
| 59 |
+
"acc": 0.927,
|
| 60 |
+
"acc_stderr": 0.008230354715244054,
|
| 61 |
+
"acc_norm": 0.921,
|
| 62 |
+
"acc_norm_stderr": 0.008534156773333435
|
| 63 |
+
},
|
| 64 |
+
"piqa": {
|
| 65 |
+
"acc": 0.73449401523395,
|
| 66 |
+
"acc_stderr": 0.010303308653024427,
|
| 67 |
+
"acc_norm": 0.7383025027203483,
|
| 68 |
+
"acc_norm_stderr": 0.010255630772708227
|
| 69 |
}
|
| 70 |
},
|
| 71 |
"versions": {
|
|
|
|
| 80 |
"storycloze_2016": 0,
|
| 81 |
"boolq": 1,
|
| 82 |
"arc_easy": 0,
|
| 83 |
+
"arc_challenge": 0,
|
| 84 |
+
"sciq": 0,
|
| 85 |
+
"piqa": 0
|
| 86 |
}
|
| 87 |
}
|
perplexity25/evaluation/rankeval/perplexity25_4_lm-eval_global_step80108_2023-05-13-09-53-07_4shots_backup.json
CHANGED
|
@@ -54,6 +54,18 @@
|
|
| 54 |
"acc_stderr": 0.013715847940719346,
|
| 55 |
"acc_norm": 0.3728668941979522,
|
| 56 |
"acc_norm_stderr": 0.014131176760131163
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
}
|
| 58 |
},
|
| 59 |
"versions": {
|
|
@@ -68,6 +80,8 @@
|
|
| 68 |
"storycloze_2016": 0,
|
| 69 |
"boolq": 1,
|
| 70 |
"arc_easy": 0,
|
| 71 |
-
"arc_challenge": 0
|
|
|
|
|
|
|
| 72 |
}
|
| 73 |
}
|
|
|
|
| 54 |
"acc_stderr": 0.013715847940719346,
|
| 55 |
"acc_norm": 0.3728668941979522,
|
| 56 |
"acc_norm_stderr": 0.014131176760131163
|
| 57 |
+
},
|
| 58 |
+
"sciq": {
|
| 59 |
+
"acc": 0.927,
|
| 60 |
+
"acc_stderr": 0.008230354715244054,
|
| 61 |
+
"acc_norm": 0.921,
|
| 62 |
+
"acc_norm_stderr": 0.008534156773333435
|
| 63 |
+
},
|
| 64 |
+
"piqa": {
|
| 65 |
+
"acc": 0.73449401523395,
|
| 66 |
+
"acc_stderr": 0.010303308653024427,
|
| 67 |
+
"acc_norm": 0.7383025027203483,
|
| 68 |
+
"acc_norm_stderr": 0.010255630772708227
|
| 69 |
}
|
| 70 |
},
|
| 71 |
"versions": {
|
|
|
|
| 80 |
"storycloze_2016": 0,
|
| 81 |
"boolq": 1,
|
| 82 |
"arc_easy": 0,
|
| 83 |
+
"arc_challenge": 0,
|
| 84 |
+
"sciq": 0,
|
| 85 |
+
"piqa": 0
|
| 86 |
}
|
| 87 |
}
|
perplexity25/evaluation/rankeval/perplexity25_5.json
CHANGED
|
@@ -38,6 +38,34 @@
|
|
| 38 |
"storycloze_2016": {
|
| 39 |
"acc": 0.7493319080705505,
|
| 40 |
"acc_stderr": 0.010022263975606228
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
}
|
| 42 |
},
|
| 43 |
"versions": {
|
|
@@ -49,6 +77,11 @@
|
|
| 49 |
"hellaswag": 0,
|
| 50 |
"rte": 0,
|
| 51 |
"winogrande": 0,
|
| 52 |
-
"storycloze_2016": 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
}
|
| 54 |
}
|
|
|
|
| 38 |
"storycloze_2016": {
|
| 39 |
"acc": 0.7493319080705505,
|
| 40 |
"acc_stderr": 0.010022263975606228
|
| 41 |
+
},
|
| 42 |
+
"boolq": {
|
| 43 |
+
"acc": 0.6501529051987768,
|
| 44 |
+
"acc_stderr": 0.008341409251946758
|
| 45 |
+
},
|
| 46 |
+
"arc_easy": {
|
| 47 |
+
"acc": 0.680976430976431,
|
| 48 |
+
"acc_stderr": 0.009564133249441074,
|
| 49 |
+
"acc_norm": 0.6658249158249159,
|
| 50 |
+
"acc_norm_stderr": 0.009679106032919058
|
| 51 |
+
},
|
| 52 |
+
"arc_challenge": {
|
| 53 |
+
"acc": 0.34044368600682595,
|
| 54 |
+
"acc_stderr": 0.013847460518892981,
|
| 55 |
+
"acc_norm": 0.36945392491467577,
|
| 56 |
+
"acc_norm_stderr": 0.0141045783664919
|
| 57 |
+
},
|
| 58 |
+
"sciq": {
|
| 59 |
+
"acc": 0.927,
|
| 60 |
+
"acc_stderr": 0.00823035471524406,
|
| 61 |
+
"acc_norm": 0.921,
|
| 62 |
+
"acc_norm_stderr": 0.008534156773333442
|
| 63 |
+
},
|
| 64 |
+
"piqa": {
|
| 65 |
+
"acc": 0.7383025027203483,
|
| 66 |
+
"acc_stderr": 0.010255630772708229,
|
| 67 |
+
"acc_norm": 0.735038084874864,
|
| 68 |
+
"acc_norm_stderr": 0.010296557993316037
|
| 69 |
}
|
| 70 |
},
|
| 71 |
"versions": {
|
|
|
|
| 77 |
"hellaswag": 0,
|
| 78 |
"rte": 0,
|
| 79 |
"winogrande": 0,
|
| 80 |
+
"storycloze_2016": 0,
|
| 81 |
+
"boolq": 1,
|
| 82 |
+
"arc_easy": 0,
|
| 83 |
+
"arc_challenge": 0,
|
| 84 |
+
"sciq": 0,
|
| 85 |
+
"piqa": 0
|
| 86 |
}
|
| 87 |
}
|
perplexity25/evaluation/rankeval/perplexity25_5_lm-eval_global_step80108_2023-05-13-09-53-07_5shots_backup.json
CHANGED
|
@@ -38,6 +38,34 @@
|
|
| 38 |
"storycloze_2016": {
|
| 39 |
"acc": 0.7493319080705505,
|
| 40 |
"acc_stderr": 0.010022263975606228
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
}
|
| 42 |
},
|
| 43 |
"versions": {
|
|
@@ -49,6 +77,11 @@
|
|
| 49 |
"hellaswag": 0,
|
| 50 |
"rte": 0,
|
| 51 |
"winogrande": 0,
|
| 52 |
-
"storycloze_2016": 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
}
|
| 54 |
}
|
|
|
|
| 38 |
"storycloze_2016": {
|
| 39 |
"acc": 0.7493319080705505,
|
| 40 |
"acc_stderr": 0.010022263975606228
|
| 41 |
+
},
|
| 42 |
+
"boolq": {
|
| 43 |
+
"acc": 0.6501529051987768,
|
| 44 |
+
"acc_stderr": 0.008341409251946758
|
| 45 |
+
},
|
| 46 |
+
"arc_easy": {
|
| 47 |
+
"acc": 0.680976430976431,
|
| 48 |
+
"acc_stderr": 0.009564133249441074,
|
| 49 |
+
"acc_norm": 0.6658249158249159,
|
| 50 |
+
"acc_norm_stderr": 0.009679106032919058
|
| 51 |
+
},
|
| 52 |
+
"arc_challenge": {
|
| 53 |
+
"acc": 0.34044368600682595,
|
| 54 |
+
"acc_stderr": 0.013847460518892981,
|
| 55 |
+
"acc_norm": 0.36945392491467577,
|
| 56 |
+
"acc_norm_stderr": 0.0141045783664919
|
| 57 |
+
},
|
| 58 |
+
"sciq": {
|
| 59 |
+
"acc": 0.927,
|
| 60 |
+
"acc_stderr": 0.00823035471524406,
|
| 61 |
+
"acc_norm": 0.921,
|
| 62 |
+
"acc_norm_stderr": 0.008534156773333442
|
| 63 |
+
},
|
| 64 |
+
"piqa": {
|
| 65 |
+
"acc": 0.7383025027203483,
|
| 66 |
+
"acc_stderr": 0.010255630772708229,
|
| 67 |
+
"acc_norm": 0.735038084874864,
|
| 68 |
+
"acc_norm_stderr": 0.010296557993316037
|
| 69 |
}
|
| 70 |
},
|
| 71 |
"versions": {
|
|
|
|
| 77 |
"hellaswag": 0,
|
| 78 |
"rte": 0,
|
| 79 |
"winogrande": 0,
|
| 80 |
+
"storycloze_2016": 0,
|
| 81 |
+
"boolq": 1,
|
| 82 |
+
"arc_easy": 0,
|
| 83 |
+
"arc_challenge": 0,
|
| 84 |
+
"sciq": 0,
|
| 85 |
+
"piqa": 0
|
| 86 |
}
|
| 87 |
}
|
perplexity50/evaluation/rankeval/perplexity50_5.json
CHANGED
|
@@ -42,6 +42,30 @@
|
|
| 42 |
"boolq": {
|
| 43 |
"acc": 0.6204892966360857,
|
| 44 |
"acc_stderr": 0.00848734197575683
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
}
|
| 46 |
},
|
| 47 |
"versions": {
|
|
@@ -54,6 +78,10 @@
|
|
| 54 |
"rte": 0,
|
| 55 |
"winogrande": 0,
|
| 56 |
"storycloze_2016": 0,
|
| 57 |
-
"boolq": 1
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
}
|
| 59 |
}
|
|
|
|
| 42 |
"boolq": {
|
| 43 |
"acc": 0.6204892966360857,
|
| 44 |
"acc_stderr": 0.00848734197575683
|
| 45 |
+
},
|
| 46 |
+
"arc_easy": {
|
| 47 |
+
"acc": 0.6662457912457912,
|
| 48 |
+
"acc_stderr": 0.009676065683575472,
|
| 49 |
+
"acc_norm": 0.656986531986532,
|
| 50 |
+
"acc_norm_stderr": 0.009740965666489234
|
| 51 |
+
},
|
| 52 |
+
"arc_challenge": {
|
| 53 |
+
"acc": 0.3302047781569966,
|
| 54 |
+
"acc_stderr": 0.013743085603760427,
|
| 55 |
+
"acc_norm": 0.3464163822525597,
|
| 56 |
+
"acc_norm_stderr": 0.013905011180063246
|
| 57 |
+
},
|
| 58 |
+
"sciq": {
|
| 59 |
+
"acc": 0.922,
|
| 60 |
+
"acc_stderr": 0.008484573530118585,
|
| 61 |
+
"acc_norm": 0.922,
|
| 62 |
+
"acc_norm_stderr": 0.008484573530118585
|
| 63 |
+
},
|
| 64 |
+
"piqa": {
|
| 65 |
+
"acc": 0.7573449401523396,
|
| 66 |
+
"acc_stderr": 0.010002002569708698,
|
| 67 |
+
"acc_norm": 0.7633297062023939,
|
| 68 |
+
"acc_norm_stderr": 0.009916841655042809
|
| 69 |
}
|
| 70 |
},
|
| 71 |
"versions": {
|
|
|
|
| 78 |
"rte": 0,
|
| 79 |
"winogrande": 0,
|
| 80 |
"storycloze_2016": 0,
|
| 81 |
+
"boolq": 1,
|
| 82 |
+
"arc_easy": 0,
|
| 83 |
+
"arc_challenge": 0,
|
| 84 |
+
"sciq": 0,
|
| 85 |
+
"piqa": 0
|
| 86 |
}
|
| 87 |
}
|
perplexity50/evaluation/rankeval/perplexity50_5_lm-eval_global_step80108_2023-05-13-09-53-07_5shots_backup.json
CHANGED
|
@@ -42,6 +42,30 @@
|
|
| 42 |
"boolq": {
|
| 43 |
"acc": 0.6204892966360857,
|
| 44 |
"acc_stderr": 0.00848734197575683
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
}
|
| 46 |
},
|
| 47 |
"versions": {
|
|
@@ -54,6 +78,10 @@
|
|
| 54 |
"rte": 0,
|
| 55 |
"winogrande": 0,
|
| 56 |
"storycloze_2016": 0,
|
| 57 |
-
"boolq": 1
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
}
|
| 59 |
}
|
|
|
|
| 42 |
"boolq": {
|
| 43 |
"acc": 0.6204892966360857,
|
| 44 |
"acc_stderr": 0.00848734197575683
|
| 45 |
+
},
|
| 46 |
+
"arc_easy": {
|
| 47 |
+
"acc": 0.6662457912457912,
|
| 48 |
+
"acc_stderr": 0.009676065683575472,
|
| 49 |
+
"acc_norm": 0.656986531986532,
|
| 50 |
+
"acc_norm_stderr": 0.009740965666489234
|
| 51 |
+
},
|
| 52 |
+
"arc_challenge": {
|
| 53 |
+
"acc": 0.3302047781569966,
|
| 54 |
+
"acc_stderr": 0.013743085603760427,
|
| 55 |
+
"acc_norm": 0.3464163822525597,
|
| 56 |
+
"acc_norm_stderr": 0.013905011180063246
|
| 57 |
+
},
|
| 58 |
+
"sciq": {
|
| 59 |
+
"acc": 0.922,
|
| 60 |
+
"acc_stderr": 0.008484573530118585,
|
| 61 |
+
"acc_norm": 0.922,
|
| 62 |
+
"acc_norm_stderr": 0.008484573530118585
|
| 63 |
+
},
|
| 64 |
+
"piqa": {
|
| 65 |
+
"acc": 0.7573449401523396,
|
| 66 |
+
"acc_stderr": 0.010002002569708698,
|
| 67 |
+
"acc_norm": 0.7633297062023939,
|
| 68 |
+
"acc_norm_stderr": 0.009916841655042809
|
| 69 |
}
|
| 70 |
},
|
| 71 |
"versions": {
|
|
|
|
| 78 |
"rte": 0,
|
| 79 |
"winogrande": 0,
|
| 80 |
"storycloze_2016": 0,
|
| 81 |
+
"boolq": 1,
|
| 82 |
+
"arc_easy": 0,
|
| 83 |
+
"arc_challenge": 0,
|
| 84 |
+
"sciq": 0,
|
| 85 |
+
"piqa": 0
|
| 86 |
}
|
| 87 |
}
|