| task,metric,value,err,version | |
| anli_r1,acc,0.335,0.014933117490932573,0 | |
| anli_r2,acc,0.332,0.014899597242811476,0 | |
| anli_r3,acc,0.33666666666666667,0.01364760294240639,0 | |
| arc_challenge,acc,0.28498293515358364,0.013191348179838793,0 | |
| arc_challenge,acc_norm,0.3037542662116041,0.013438909184778757,0 | |
| arc_easy,acc,0.6031144781144782,0.010039236800583209,0 | |
| arc_easy,acc_norm,0.5332491582491582,0.010237073872130745,0 | |
| boolq,acc,0.5923547400611621,0.008594580270731613,1 | |
| cb,acc,0.4107142857142857,0.0663363415035954,1 | |
| cb,f1,0.1940928270042194,,1 | |
| copa,acc,0.79,0.040936018074033256,0 | |
| hellaswag,acc,0.4989046006771559,0.004989769436956927,0 | |
| hellaswag,acc_norm,0.6554471220872337,0.004742510354777903,0 | |
| piqa,acc,0.7616974972796517,0.009940334245876207,0 | |
| piqa,acc_norm,0.7693144722524483,0.009828959550983103,0 | |
| rte,acc,0.5342960288808665,0.030025579819366426,0 | |
| sciq,acc,0.862,0.010912152632504411,0 | |
| sciq,acc_norm,0.782,0.013063179040595296,0 | |
| storycloze_2016,acc,0.7349011223944415,0.01020698782076139,0 | |
| winogrande,acc,0.5895816890292028,0.013825107120035865,0 | |