| task,metric,value,err,version | |
| anli_r1,acc,0.33,0.014876872027456732,0 | |
| anli_r2,acc,0.33,0.014876872027456736,0 | |
| anli_r3,acc,0.33166666666666667,0.01359683672948516,0 | |
| arc_challenge,acc,0.3378839590443686,0.013822047922283507,0 | |
| arc_challenge,acc_norm,0.3660409556313993,0.014077223108470144,0 | |
| arc_easy,acc,0.6742424242424242,0.009616642976885964,0 | |
| arc_easy,acc_norm,0.6523569023569024,0.009771868846830909,0 | |
| boolq,acc,0.6428134556574924,0.008380743796951404,1 | |
| cb,acc,0.5178571428571429,0.06737697508644648,1 | |
| cb,f1,0.35968427443837275,,1 | |
| copa,acc,0.79,0.040936018074033256,0 | |
| hellaswag,acc,0.4790878311093408,0.004985415250690917,0 | |
| hellaswag,acc_norm,0.6304521011750648,0.004816958817726088,0 | |
| piqa,acc,0.7404787812840044,0.010227939888173918,0 | |
| piqa,acc_norm,0.7388465723612623,0.010248738649935587,0 | |
| rte,acc,0.5703971119133574,0.02979666882912467,0 | |
| sciq,acc,0.923,0.008434580140240644,0 | |
| sciq,acc_norm,0.901,0.009449248027662746,0 | |
| storycloze_2016,acc,0.7365045430251203,0.010187168219156485,0 | |
| winogrande,acc,0.6235201262825573,0.013616931960667187,0 | |