{"doc_id": 0, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7282954454421997, "incorrect_loss_raw": 1.3860892454783122, "correct_loss_per_char": 0.8641477227210999, "incorrect_loss_per_char": 0.6930446227391561, "correct_loss_per_token": 1.7282954454421997, "incorrect_loss_per_token": 1.3860892454783122, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0256636142730713, "num_tokens": 1, "num_tokens_all": 1133, "is_greedy": true, "logits_per_token": -1.0256636142730713, "logits_per_char": -0.5128318071365356, "num_chars": 2}, {"sum_logits": -1.1520146131515503, "num_tokens": 1, "num_tokens_all": 1133, "is_greedy": false, "logits_per_token": -1.1520146131515503, "logits_per_char": -0.5760073065757751, "num_chars": 2}, {"sum_logits": -1.980589509010315, "num_tokens": 1, "num_tokens_all": 1133, "is_greedy": false, "logits_per_token": -1.980589509010315, "logits_per_char": -0.9902947545051575, "num_chars": 2}, {"sum_logits": -1.7282954454421997, "num_tokens": 1, "num_tokens_all": 1133, "is_greedy": false, "logits_per_token": -1.7282954454421997, "logits_per_char": -0.8641477227210999, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 1, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.404231071472168, "incorrect_loss_raw": 1.4123598337173462, "correct_loss_per_char": 0.702115535736084, "incorrect_loss_per_char": 0.7061799168586731, "correct_loss_per_token": 1.404231071472168, "incorrect_loss_per_token": 1.4123598337173462, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.212708830833435, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": true, "logits_per_token": -1.212708830833435, "logits_per_char": -0.6063544154167175, "num_chars": 2}, {"sum_logits": -1.322885513305664, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -1.322885513305664, "logits_per_char": -0.661442756652832, "num_chars": 2}, {"sum_logits": -1.7014851570129395, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -1.7014851570129395, "logits_per_char": -0.8507425785064697, "num_chars": 2}, {"sum_logits": -1.404231071472168, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -1.404231071472168, "logits_per_char": -0.702115535736084, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 2, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4119210243225098, "incorrect_loss_raw": 1.4130090872446697, "correct_loss_per_char": 0.7059605121612549, "incorrect_loss_per_char": 0.7065045436223348, "correct_loss_per_token": 1.4119210243225098, "incorrect_loss_per_token": 1.4130090872446697, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1343965530395508, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -1.1343965530395508, "logits_per_char": -0.5671982765197754, "num_chars": 2}, {"sum_logits": -1.4119210243225098, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.4119210243225098, "logits_per_char": -0.7059605121612549, "num_chars": 2}, {"sum_logits": -1.6854010820388794, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.6854010820388794, "logits_per_char": -0.8427005410194397, "num_chars": 2}, {"sum_logits": -1.4192296266555786, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.4192296266555786, "logits_per_char": -0.7096148133277893, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 3, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.033879280090332, "incorrect_loss_raw": 1.5559337536493938, "correct_loss_per_char": 0.516939640045166, "incorrect_loss_per_char": 0.7779668768246969, "correct_loss_per_token": 1.033879280090332, "incorrect_loss_per_token": 1.5559337536493938, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.033879280090332, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": true, "logits_per_token": -1.033879280090332, "logits_per_char": -0.516939640045166, "num_chars": 2}, {"sum_logits": -1.4683303833007812, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.4683303833007812, "logits_per_char": -0.7341651916503906, "num_chars": 2}, {"sum_logits": -1.6682446002960205, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.6682446002960205, "logits_per_char": -0.8341223001480103, "num_chars": 2}, {"sum_logits": -1.5312262773513794, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.5312262773513794, "logits_per_char": -0.7656131386756897, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 4, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9365841150283813, "incorrect_loss_raw": 1.3054476181666057, "correct_loss_per_char": 0.9682920575141907, "incorrect_loss_per_char": 0.6527238090833029, "correct_loss_per_token": 1.9365841150283813, "incorrect_loss_per_token": 1.3054476181666057, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0252090692520142, "num_tokens": 1, "num_tokens_all": 1096, "is_greedy": true, "logits_per_token": -1.0252090692520142, "logits_per_char": -0.5126045346260071, "num_chars": 2}, {"sum_logits": -1.185020923614502, "num_tokens": 1, "num_tokens_all": 1096, "is_greedy": false, "logits_per_token": -1.185020923614502, "logits_per_char": -0.592510461807251, "num_chars": 2}, {"sum_logits": -1.9365841150283813, "num_tokens": 1, "num_tokens_all": 1096, "is_greedy": false, "logits_per_token": -1.9365841150283813, "logits_per_char": -0.9682920575141907, "num_chars": 2}, {"sum_logits": -1.7061128616333008, "num_tokens": 1, "num_tokens_all": 1096, "is_greedy": false, "logits_per_token": -1.7061128616333008, "logits_per_char": -0.8530564308166504, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 5, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.324195146560669, "incorrect_loss_raw": 1.4517327547073364, "correct_loss_per_char": 0.6620975732803345, "incorrect_loss_per_char": 0.7258663773536682, "correct_loss_per_token": 1.324195146560669, "incorrect_loss_per_token": 1.4517327547073364, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1262465715408325, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": true, "logits_per_token": -1.1262465715408325, "logits_per_char": -0.5631232857704163, "num_chars": 2}, {"sum_logits": -1.324195146560669, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": false, "logits_per_token": -1.324195146560669, "logits_per_char": -0.6620975732803345, "num_chars": 2}, {"sum_logits": -1.7135149240493774, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": false, "logits_per_token": -1.7135149240493774, "logits_per_char": -0.8567574620246887, "num_chars": 2}, {"sum_logits": -1.5154367685317993, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": false, "logits_per_token": -1.5154367685317993, "logits_per_char": -0.7577183842658997, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 6, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.643228530883789, "incorrect_loss_raw": 1.3355173269907634, "correct_loss_per_char": 0.8216142654418945, "incorrect_loss_per_char": 0.6677586634953817, "correct_loss_per_token": 1.643228530883789, "incorrect_loss_per_token": 1.3355173269907634, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1360944509506226, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": true, "logits_per_token": -1.1360944509506226, "logits_per_char": -0.5680472254753113, "num_chars": 2}, {"sum_logits": -1.3849797248840332, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.3849797248840332, "logits_per_char": -0.6924898624420166, "num_chars": 2}, {"sum_logits": -1.643228530883789, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.643228530883789, "logits_per_char": -0.8216142654418945, "num_chars": 2}, {"sum_logits": -1.4854778051376343, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.4854778051376343, "logits_per_char": -0.7427389025688171, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 7, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7483030557632446, "incorrect_loss_raw": 1.31424085299174, "correct_loss_per_char": 0.8741515278816223, "incorrect_loss_per_char": 0.65712042649587, "correct_loss_per_token": 1.7483030557632446, "incorrect_loss_per_token": 1.31424085299174, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1152284145355225, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": true, "logits_per_token": -1.1152284145355225, "logits_per_char": -0.5576142072677612, "num_chars": 2}, {"sum_logits": -1.2959638833999634, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.2959638833999634, "logits_per_char": -0.6479819416999817, "num_chars": 2}, {"sum_logits": -1.7483030557632446, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.7483030557632446, "logits_per_char": -0.8741515278816223, "num_chars": 2}, {"sum_logits": -1.5315302610397339, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.5315302610397339, "logits_per_char": -0.7657651305198669, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 8, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.160776972770691, "incorrect_loss_raw": 1.5408924420674641, "correct_loss_per_char": 0.5803884863853455, "incorrect_loss_per_char": 0.7704462210337321, "correct_loss_per_token": 1.160776972770691, "incorrect_loss_per_token": 1.5408924420674641, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1086920499801636, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": true, "logits_per_token": -1.1086920499801636, "logits_per_char": -0.5543460249900818, "num_chars": 2}, {"sum_logits": -1.160776972770691, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -1.160776972770691, "logits_per_char": -0.5803884863853455, "num_chars": 2}, {"sum_logits": -1.869226336479187, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -1.869226336479187, "logits_per_char": -0.9346131682395935, "num_chars": 2}, {"sum_logits": -1.644758939743042, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -1.644758939743042, "logits_per_char": -0.822379469871521, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 9, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6480509042739868, "incorrect_loss_raw": 1.3855095704396565, "correct_loss_per_char": 0.8240254521369934, "incorrect_loss_per_char": 0.6927547852198283, "correct_loss_per_token": 1.6480509042739868, "incorrect_loss_per_token": 1.3855095704396565, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0081276893615723, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": true, "logits_per_token": -1.0081276893615723, "logits_per_char": -0.5040638446807861, "num_chars": 2}, {"sum_logits": -1.310416579246521, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": false, "logits_per_token": -1.310416579246521, "logits_per_char": -0.6552082896232605, "num_chars": 2}, {"sum_logits": -1.8379844427108765, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": false, "logits_per_token": -1.8379844427108765, "logits_per_char": -0.9189922213554382, "num_chars": 2}, {"sum_logits": -1.6480509042739868, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": false, "logits_per_token": -1.6480509042739868, "logits_per_char": -0.8240254521369934, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 10, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6121057271957397, "incorrect_loss_raw": 1.3538583517074585, "correct_loss_per_char": 0.8060528635978699, "incorrect_loss_per_char": 0.6769291758537292, "correct_loss_per_token": 1.6121057271957397, "incorrect_loss_per_token": 1.3538583517074585, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2684069871902466, "num_tokens": 1, "num_tokens_all": 996, "is_greedy": false, "logits_per_token": -1.2684069871902466, "logits_per_char": -0.6342034935951233, "num_chars": 2}, {"sum_logits": -1.1467536687850952, "num_tokens": 1, "num_tokens_all": 996, "is_greedy": true, "logits_per_token": -1.1467536687850952, "logits_per_char": -0.5733768343925476, "num_chars": 2}, {"sum_logits": -1.6464143991470337, "num_tokens": 1, "num_tokens_all": 996, "is_greedy": false, "logits_per_token": -1.6464143991470337, "logits_per_char": -0.8232071995735168, "num_chars": 2}, {"sum_logits": -1.6121057271957397, "num_tokens": 1, "num_tokens_all": 996, "is_greedy": false, "logits_per_token": -1.6121057271957397, "logits_per_char": -0.8060528635978699, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 11, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3355002403259277, "incorrect_loss_raw": 1.4319820006688435, "correct_loss_per_char": 0.6677501201629639, "incorrect_loss_per_char": 0.7159910003344218, "correct_loss_per_token": 1.3355002403259277, "incorrect_loss_per_token": 1.4319820006688435, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2092269659042358, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": true, "logits_per_token": -1.2092269659042358, "logits_per_char": -0.6046134829521179, "num_chars": 2}, {"sum_logits": -1.3355002403259277, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": false, "logits_per_token": -1.3355002403259277, "logits_per_char": -0.6677501201629639, "num_chars": 2}, {"sum_logits": -1.6905213594436646, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": false, "logits_per_token": -1.6905213594436646, "logits_per_char": -0.8452606797218323, "num_chars": 2}, {"sum_logits": -1.3961976766586304, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": false, "logits_per_token": -1.3961976766586304, "logits_per_char": -0.6980988383293152, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 12, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7065742015838623, "incorrect_loss_raw": 1.3126240173975627, "correct_loss_per_char": 0.8532871007919312, "incorrect_loss_per_char": 0.6563120086987814, "correct_loss_per_token": 1.7065742015838623, "incorrect_loss_per_token": 1.3126240173975627, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3459137678146362, "num_tokens": 1, "num_tokens_all": 995, "is_greedy": false, "logits_per_token": -1.3459137678146362, "logits_per_char": -0.6729568839073181, "num_chars": 2}, {"sum_logits": -1.1846439838409424, "num_tokens": 1, "num_tokens_all": 995, "is_greedy": true, "logits_per_token": -1.1846439838409424, "logits_per_char": -0.5923219919204712, "num_chars": 2}, {"sum_logits": -1.7065742015838623, "num_tokens": 1, "num_tokens_all": 995, "is_greedy": false, "logits_per_token": -1.7065742015838623, "logits_per_char": -0.8532871007919312, "num_chars": 2}, {"sum_logits": -1.4073143005371094, "num_tokens": 1, "num_tokens_all": 995, "is_greedy": false, "logits_per_token": -1.4073143005371094, "logits_per_char": -0.7036571502685547, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 13, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2040446996688843, "incorrect_loss_raw": 1.5149635473887126, "correct_loss_per_char": 0.6020223498344421, "incorrect_loss_per_char": 0.7574817736943563, "correct_loss_per_token": 1.2040446996688843, "incorrect_loss_per_token": 1.5149635473887126, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1152102947235107, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": true, "logits_per_token": -1.1152102947235107, "logits_per_char": -0.5576051473617554, "num_chars": 2}, {"sum_logits": -1.2040446996688843, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": false, "logits_per_token": -1.2040446996688843, "logits_per_char": -0.6020223498344421, "num_chars": 2}, {"sum_logits": -1.8451024293899536, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": false, "logits_per_token": -1.8451024293899536, "logits_per_char": -0.9225512146949768, "num_chars": 2}, {"sum_logits": -1.5845779180526733, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": false, "logits_per_token": -1.5845779180526733, "logits_per_char": -0.7922889590263367, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 14, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0477761030197144, "incorrect_loss_raw": 1.5807998180389404, "correct_loss_per_char": 0.5238880515098572, "incorrect_loss_per_char": 0.7903999090194702, "correct_loss_per_token": 1.0477761030197144, "incorrect_loss_per_token": 1.5807998180389404, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0477761030197144, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": true, "logits_per_token": -1.0477761030197144, "logits_per_char": -0.5238880515098572, "num_chars": 2}, {"sum_logits": -1.206789255142212, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": false, "logits_per_token": -1.206789255142212, "logits_per_char": -0.603394627571106, "num_chars": 2}, {"sum_logits": -1.8243958950042725, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": false, "logits_per_token": -1.8243958950042725, "logits_per_char": -0.9121979475021362, "num_chars": 2}, {"sum_logits": -1.711214303970337, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": false, "logits_per_token": -1.711214303970337, "logits_per_char": -0.8556071519851685, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 15, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9815880060195923, "incorrect_loss_raw": 1.6359120607376099, "correct_loss_per_char": 0.49079400300979614, "incorrect_loss_per_char": 0.8179560303688049, "correct_loss_per_token": 0.9815880060195923, "incorrect_loss_per_token": 1.6359120607376099, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9815880060195923, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": true, "logits_per_token": -0.9815880060195923, "logits_per_char": -0.49079400300979614, "num_chars": 2}, {"sum_logits": -1.2160385847091675, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.2160385847091675, "logits_per_char": -0.6080192923545837, "num_chars": 2}, {"sum_logits": -2.013739585876465, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -2.013739585876465, "logits_per_char": -1.0068697929382324, "num_chars": 2}, {"sum_logits": -1.6779580116271973, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.6779580116271973, "logits_per_char": -0.8389790058135986, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 16, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7479307651519775, "incorrect_loss_raw": 1.3099524974822998, "correct_loss_per_char": 0.8739653825759888, "incorrect_loss_per_char": 0.6549762487411499, "correct_loss_per_token": 1.7479307651519775, "incorrect_loss_per_token": 1.3099524974822998, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1010522842407227, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": true, "logits_per_token": -1.1010522842407227, "logits_per_char": -0.5505261421203613, "num_chars": 2}, {"sum_logits": -1.476304531097412, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": false, "logits_per_token": -1.476304531097412, "logits_per_char": -0.738152265548706, "num_chars": 2}, {"sum_logits": -1.7479307651519775, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": false, "logits_per_token": -1.7479307651519775, "logits_per_char": -0.8739653825759888, "num_chars": 2}, {"sum_logits": -1.3525006771087646, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": false, "logits_per_token": -1.3525006771087646, "logits_per_char": -0.6762503385543823, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 17, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0460984706878662, "incorrect_loss_raw": 1.55022394657135, "correct_loss_per_char": 0.5230492353439331, "incorrect_loss_per_char": 0.775111973285675, "correct_loss_per_token": 1.0460984706878662, "incorrect_loss_per_token": 1.55022394657135, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0460984706878662, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": true, "logits_per_token": -1.0460984706878662, "logits_per_char": -0.5230492353439331, "num_chars": 2}, {"sum_logits": -1.3984849452972412, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": false, "logits_per_token": -1.3984849452972412, "logits_per_char": -0.6992424726486206, "num_chars": 2}, {"sum_logits": -1.7287352085113525, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": false, "logits_per_token": -1.7287352085113525, "logits_per_char": -0.8643676042556763, "num_chars": 2}, {"sum_logits": -1.5234516859054565, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": false, "logits_per_token": -1.5234516859054565, "logits_per_char": -0.7617258429527283, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 18, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.02422297000885, "incorrect_loss_raw": 1.559889554977417, "correct_loss_per_char": 0.512111485004425, "incorrect_loss_per_char": 0.7799447774887085, "correct_loss_per_token": 1.02422297000885, "incorrect_loss_per_token": 1.559889554977417, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.02422297000885, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": true, "logits_per_token": -1.02422297000885, "logits_per_char": -0.512111485004425, "num_chars": 2}, {"sum_logits": -1.4551318883895874, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": false, "logits_per_token": -1.4551318883895874, "logits_per_char": -0.7275659441947937, "num_chars": 2}, {"sum_logits": -1.6313201189041138, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": false, "logits_per_token": -1.6313201189041138, "logits_per_char": -0.8156600594520569, "num_chars": 2}, {"sum_logits": -1.5932166576385498, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": false, "logits_per_token": -1.5932166576385498, "logits_per_char": -0.7966083288192749, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 19, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5375926494598389, "incorrect_loss_raw": 1.3891691366831462, "correct_loss_per_char": 0.7687963247299194, "incorrect_loss_per_char": 0.6945845683415731, "correct_loss_per_token": 1.5375926494598389, "incorrect_loss_per_token": 1.3891691366831462, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1231738328933716, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": true, "logits_per_token": -1.1231738328933716, "logits_per_char": -0.5615869164466858, "num_chars": 2}, {"sum_logits": -1.2808243036270142, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.2808243036270142, "logits_per_char": -0.6404121518135071, "num_chars": 2}, {"sum_logits": -1.7635092735290527, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.7635092735290527, "logits_per_char": -0.8817546367645264, "num_chars": 2}, {"sum_logits": -1.5375926494598389, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.5375926494598389, "logits_per_char": -0.7687963247299194, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 20, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.480743408203125, "incorrect_loss_raw": 1.3890462319056194, "correct_loss_per_char": 0.7403717041015625, "incorrect_loss_per_char": 0.6945231159528097, "correct_loss_per_token": 1.480743408203125, "incorrect_loss_per_token": 1.3890462319056194, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1831622123718262, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": true, "logits_per_token": -1.1831622123718262, "logits_per_char": -0.5915811061859131, "num_chars": 2}, {"sum_logits": -1.2966182231903076, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.2966182231903076, "logits_per_char": -0.6483091115951538, "num_chars": 2}, {"sum_logits": -1.6873582601547241, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.6873582601547241, "logits_per_char": -0.8436791300773621, "num_chars": 2}, {"sum_logits": -1.480743408203125, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.480743408203125, "logits_per_char": -0.7403717041015625, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 21, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7845356464385986, "incorrect_loss_raw": 1.3922353188196819, "correct_loss_per_char": 0.8922678232192993, "incorrect_loss_per_char": 0.6961176594098409, "correct_loss_per_token": 1.7845356464385986, "incorrect_loss_per_token": 1.3922353188196819, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9334299564361572, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": true, "logits_per_token": -0.9334299564361572, "logits_per_char": -0.4667149782180786, "num_chars": 2}, {"sum_logits": -1.221173644065857, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.221173644065857, "logits_per_char": -0.6105868220329285, "num_chars": 2}, {"sum_logits": -2.0221023559570312, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -2.0221023559570312, "logits_per_char": -1.0110511779785156, "num_chars": 2}, {"sum_logits": -1.7845356464385986, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.7845356464385986, "logits_per_char": -0.8922678232192993, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 22, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2368625402450562, "incorrect_loss_raw": 1.5330392916997273, "correct_loss_per_char": 0.6184312701225281, "incorrect_loss_per_char": 0.7665196458498637, "correct_loss_per_token": 1.2368625402450562, "incorrect_loss_per_token": 1.5330392916997273, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0922465324401855, "num_tokens": 1, "num_tokens_all": 1124, "is_greedy": true, "logits_per_token": -1.0922465324401855, "logits_per_char": -0.5461232662200928, "num_chars": 2}, {"sum_logits": -1.2368625402450562, "num_tokens": 1, "num_tokens_all": 1124, "is_greedy": false, "logits_per_token": -1.2368625402450562, "logits_per_char": -0.6184312701225281, "num_chars": 2}, {"sum_logits": -2.080979347229004, "num_tokens": 1, "num_tokens_all": 1124, "is_greedy": false, "logits_per_token": -2.080979347229004, "logits_per_char": -1.040489673614502, "num_chars": 2}, {"sum_logits": -1.4258919954299927, "num_tokens": 1, "num_tokens_all": 1124, "is_greedy": false, "logits_per_token": -1.4258919954299927, "logits_per_char": -0.7129459977149963, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 23, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1664373874664307, "incorrect_loss_raw": 1.5628722111384075, "correct_loss_per_char": 0.5832186937332153, "incorrect_loss_per_char": 0.7814361055692037, "correct_loss_per_token": 1.1664373874664307, "incorrect_loss_per_token": 1.5628722111384075, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.03585946559906, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": true, "logits_per_token": -1.03585946559906, "logits_per_char": -0.51792973279953, "num_chars": 2}, {"sum_logits": -1.1664373874664307, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.1664373874664307, "logits_per_char": -0.5832186937332153, "num_chars": 2}, {"sum_logits": -1.941157579421997, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.941157579421997, "logits_per_char": -0.9705787897109985, "num_chars": 2}, {"sum_logits": -1.711599588394165, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.711599588394165, "logits_per_char": -0.8557997941970825, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 24, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2022343873977661, "incorrect_loss_raw": 1.4825437863667805, "correct_loss_per_char": 0.6011171936988831, "incorrect_loss_per_char": 0.7412718931833903, "correct_loss_per_token": 1.2022343873977661, "incorrect_loss_per_token": 1.4825437863667805, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2022343873977661, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": true, "logits_per_token": -1.2022343873977661, "logits_per_char": -0.6011171936988831, "num_chars": 2}, {"sum_logits": -1.2464131116867065, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": false, "logits_per_token": -1.2464131116867065, "logits_per_char": -0.6232065558433533, "num_chars": 2}, {"sum_logits": -1.6369807720184326, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": false, "logits_per_token": -1.6369807720184326, "logits_per_char": -0.8184903860092163, "num_chars": 2}, {"sum_logits": -1.5642374753952026, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": false, "logits_per_token": -1.5642374753952026, "logits_per_char": -0.7821187376976013, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 25, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0978608131408691, "incorrect_loss_raw": 1.5515402952829997, "correct_loss_per_char": 0.5489304065704346, "incorrect_loss_per_char": 0.7757701476414999, "correct_loss_per_token": 1.0978608131408691, "incorrect_loss_per_token": 1.5515402952829997, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0978608131408691, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": true, "logits_per_token": -1.0978608131408691, "logits_per_char": -0.5489304065704346, "num_chars": 2}, {"sum_logits": -1.237317681312561, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.237317681312561, "logits_per_char": -0.6186588406562805, "num_chars": 2}, {"sum_logits": -1.8698886632919312, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.8698886632919312, "logits_per_char": -0.9349443316459656, "num_chars": 2}, {"sum_logits": -1.5474145412445068, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.5474145412445068, "logits_per_char": -0.7737072706222534, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 26, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.730104923248291, "incorrect_loss_raw": 1.3619338075319927, "correct_loss_per_char": 0.8650524616241455, "incorrect_loss_per_char": 0.6809669037659963, "correct_loss_per_token": 1.730104923248291, "incorrect_loss_per_token": 1.3619338075319927, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9891694188117981, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": true, "logits_per_token": -0.9891694188117981, "logits_per_char": -0.49458470940589905, "num_chars": 2}, {"sum_logits": -1.2842750549316406, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": false, "logits_per_token": -1.2842750549316406, "logits_per_char": -0.6421375274658203, "num_chars": 2}, {"sum_logits": -1.812356948852539, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": false, "logits_per_token": -1.812356948852539, "logits_per_char": -0.9061784744262695, "num_chars": 2}, {"sum_logits": -1.730104923248291, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": false, "logits_per_token": -1.730104923248291, "logits_per_char": -0.8650524616241455, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 27, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.513414740562439, "incorrect_loss_raw": 1.4059280157089233, "correct_loss_per_char": 0.7567073702812195, "incorrect_loss_per_char": 0.7029640078544617, "correct_loss_per_token": 1.513414740562439, "incorrect_loss_per_token": 1.4059280157089233, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0578622817993164, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": true, "logits_per_token": -1.0578622817993164, "logits_per_char": -0.5289311408996582, "num_chars": 2}, {"sum_logits": -1.3377765417099, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": false, "logits_per_token": -1.3377765417099, "logits_per_char": -0.66888827085495, "num_chars": 2}, {"sum_logits": -1.8221452236175537, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": false, "logits_per_token": -1.8221452236175537, "logits_per_char": -0.9110726118087769, "num_chars": 2}, {"sum_logits": -1.513414740562439, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": false, "logits_per_token": -1.513414740562439, "logits_per_char": -0.7567073702812195, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 28, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8839291334152222, "incorrect_loss_raw": 1.2922697861989338, "correct_loss_per_char": 0.9419645667076111, "incorrect_loss_per_char": 0.6461348930994669, "correct_loss_per_token": 1.8839291334152222, "incorrect_loss_per_token": 1.2922697861989338, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0921456813812256, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": true, "logits_per_token": -1.0921456813812256, "logits_per_char": -0.5460728406906128, "num_chars": 2}, {"sum_logits": -1.2127676010131836, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": false, "logits_per_token": -1.2127676010131836, "logits_per_char": -0.6063838005065918, "num_chars": 2}, {"sum_logits": -1.8839291334152222, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": false, "logits_per_token": -1.8839291334152222, "logits_per_char": -0.9419645667076111, "num_chars": 2}, {"sum_logits": -1.5718960762023926, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": false, "logits_per_token": -1.5718960762023926, "logits_per_char": -0.7859480381011963, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 29, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6913106441497803, "incorrect_loss_raw": 1.3404996792475383, "correct_loss_per_char": 0.8456553220748901, "incorrect_loss_per_char": 0.6702498396237692, "correct_loss_per_token": 1.6913106441497803, "incorrect_loss_per_token": 1.3404996792475383, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1130847930908203, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": true, "logits_per_token": -1.1130847930908203, "logits_per_char": -0.5565423965454102, "num_chars": 2}, {"sum_logits": -1.2228270769119263, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": false, "logits_per_token": -1.2228270769119263, "logits_per_char": -0.6114135384559631, "num_chars": 2}, {"sum_logits": -1.6855871677398682, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": false, "logits_per_token": -1.6855871677398682, "logits_per_char": -0.8427935838699341, "num_chars": 2}, {"sum_logits": -1.6913106441497803, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": false, "logits_per_token": -1.6913106441497803, "logits_per_char": -0.8456553220748901, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 30, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8990321159362793, "incorrect_loss_raw": 1.3106900850931804, "correct_loss_per_char": 0.9495160579681396, "incorrect_loss_per_char": 0.6553450425465902, "correct_loss_per_token": 1.8990321159362793, "incorrect_loss_per_token": 1.3106900850931804, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0344130992889404, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": true, "logits_per_token": -1.0344130992889404, "logits_per_char": -0.5172065496444702, "num_chars": 2}, {"sum_logits": -1.1876096725463867, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": false, "logits_per_token": -1.1876096725463867, "logits_per_char": -0.5938048362731934, "num_chars": 2}, {"sum_logits": -1.8990321159362793, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": false, "logits_per_token": -1.8990321159362793, "logits_per_char": -0.9495160579681396, "num_chars": 2}, {"sum_logits": -1.7100474834442139, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": false, "logits_per_token": -1.7100474834442139, "logits_per_char": -0.8550237417221069, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 31, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.871811032295227, "incorrect_loss_raw": 1.2908766269683838, "correct_loss_per_char": 0.9359055161476135, "incorrect_loss_per_char": 0.6454383134841919, "correct_loss_per_token": 1.871811032295227, "incorrect_loss_per_token": 1.2908766269683838, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1494348049163818, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": true, "logits_per_token": -1.1494348049163818, "logits_per_char": -0.5747174024581909, "num_chars": 2}, {"sum_logits": -1.1772958040237427, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.1772958040237427, "logits_per_char": -0.5886479020118713, "num_chars": 2}, {"sum_logits": -1.871811032295227, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.871811032295227, "logits_per_char": -0.9359055161476135, "num_chars": 2}, {"sum_logits": -1.5458992719650269, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.5458992719650269, "logits_per_char": -0.7729496359825134, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 32, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.333766222000122, "incorrect_loss_raw": 1.4314530690511067, "correct_loss_per_char": 0.666883111000061, "incorrect_loss_per_char": 0.7157265345255533, "correct_loss_per_token": 1.333766222000122, "incorrect_loss_per_token": 1.4314530690511067, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2100616693496704, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": true, "logits_per_token": -1.2100616693496704, "logits_per_char": -0.6050308346748352, "num_chars": 2}, {"sum_logits": -1.333766222000122, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.333766222000122, "logits_per_char": -0.666883111000061, "num_chars": 2}, {"sum_logits": -1.6278014183044434, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.6278014183044434, "logits_per_char": -0.8139007091522217, "num_chars": 2}, {"sum_logits": -1.4564961194992065, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.4564961194992065, "logits_per_char": -0.7282480597496033, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 33, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1979248523712158, "incorrect_loss_raw": 1.5377737283706665, "correct_loss_per_char": 0.5989624261856079, "incorrect_loss_per_char": 0.7688868641853333, "correct_loss_per_token": 1.1979248523712158, "incorrect_loss_per_token": 1.5377737283706665, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0866748094558716, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": true, "logits_per_token": -1.0866748094558716, "logits_per_char": -0.5433374047279358, "num_chars": 2}, {"sum_logits": -1.1979248523712158, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": false, "logits_per_token": -1.1979248523712158, "logits_per_char": -0.5989624261856079, "num_chars": 2}, {"sum_logits": -2.009517192840576, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": false, "logits_per_token": -2.009517192840576, "logits_per_char": -1.004758596420288, "num_chars": 2}, {"sum_logits": -1.5171291828155518, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": false, "logits_per_token": -1.5171291828155518, "logits_per_char": -0.7585645914077759, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 34, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1824123859405518, "incorrect_loss_raw": 1.525646487871806, "correct_loss_per_char": 0.5912061929702759, "incorrect_loss_per_char": 0.762823243935903, "correct_loss_per_token": 1.1824123859405518, "incorrect_loss_per_token": 1.525646487871806, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1824123859405518, "num_tokens": 1, "num_tokens_all": 1119, "is_greedy": false, "logits_per_token": -1.1824123859405518, "logits_per_char": -0.5912061929702759, "num_chars": 2}, {"sum_logits": -1.1520575284957886, "num_tokens": 1, "num_tokens_all": 1119, "is_greedy": true, "logits_per_token": -1.1520575284957886, "logits_per_char": -0.5760287642478943, "num_chars": 2}, {"sum_logits": -1.9254573583602905, "num_tokens": 1, "num_tokens_all": 1119, "is_greedy": false, "logits_per_token": -1.9254573583602905, "logits_per_char": -0.9627286791801453, "num_chars": 2}, {"sum_logits": -1.4994245767593384, "num_tokens": 1, "num_tokens_all": 1119, "is_greedy": false, "logits_per_token": -1.4994245767593384, "logits_per_char": -0.7497122883796692, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 35, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8267600536346436, "incorrect_loss_raw": 1.3036614259084065, "correct_loss_per_char": 0.9133800268173218, "incorrect_loss_per_char": 0.6518307129542033, "correct_loss_per_token": 1.8267600536346436, "incorrect_loss_per_token": 1.3036614259084065, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0324410200119019, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": true, "logits_per_token": -1.0324410200119019, "logits_per_char": -0.5162205100059509, "num_chars": 2}, {"sum_logits": -1.3716124296188354, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.3716124296188354, "logits_per_char": -0.6858062148094177, "num_chars": 2}, {"sum_logits": -1.8267600536346436, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.8267600536346436, "logits_per_char": -0.9133800268173218, "num_chars": 2}, {"sum_logits": -1.5069308280944824, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.5069308280944824, "logits_per_char": -0.7534654140472412, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 36, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7855231761932373, "incorrect_loss_raw": 1.3051966031392415, "correct_loss_per_char": 0.8927615880966187, "incorrect_loss_per_char": 0.6525983015696207, "correct_loss_per_token": 1.7855231761932373, "incorrect_loss_per_token": 1.3051966031392415, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.16083562374115, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": true, "logits_per_token": -1.16083562374115, "logits_per_char": -0.580417811870575, "num_chars": 2}, {"sum_logits": -1.2380013465881348, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.2380013465881348, "logits_per_char": -0.6190006732940674, "num_chars": 2}, {"sum_logits": -1.7855231761932373, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.7855231761932373, "logits_per_char": -0.8927615880966187, "num_chars": 2}, {"sum_logits": -1.51675283908844, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.51675283908844, "logits_per_char": -0.75837641954422, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 37, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2768741846084595, "incorrect_loss_raw": 1.4722824096679688, "correct_loss_per_char": 0.6384370923042297, "incorrect_loss_per_char": 0.7361412048339844, "correct_loss_per_token": 1.2768741846084595, "incorrect_loss_per_token": 1.4722824096679688, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1337640285491943, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": true, "logits_per_token": -1.1337640285491943, "logits_per_char": -0.5668820142745972, "num_chars": 2}, {"sum_logits": -1.2768741846084595, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": false, "logits_per_token": -1.2768741846084595, "logits_per_char": -0.6384370923042297, "num_chars": 2}, {"sum_logits": -1.7983050346374512, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": false, "logits_per_token": -1.7983050346374512, "logits_per_char": -0.8991525173187256, "num_chars": 2}, {"sum_logits": -1.4847781658172607, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": false, "logits_per_token": -1.4847781658172607, "logits_per_char": -0.7423890829086304, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 38, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.791150689125061, "incorrect_loss_raw": 1.9049795866012573, "correct_loss_per_char": 0.3955753445625305, "incorrect_loss_per_char": 0.9524897933006287, "correct_loss_per_token": 0.791150689125061, "incorrect_loss_per_token": 1.9049795866012573, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.791150689125061, "num_tokens": 1, "num_tokens_all": 1134, "is_greedy": true, "logits_per_token": -0.791150689125061, "logits_per_char": -0.3955753445625305, "num_chars": 2}, {"sum_logits": -1.12920343875885, "num_tokens": 1, "num_tokens_all": 1134, "is_greedy": false, "logits_per_token": -1.12920343875885, "logits_per_char": -0.564601719379425, "num_chars": 2}, {"sum_logits": -2.523530960083008, "num_tokens": 1, "num_tokens_all": 1134, "is_greedy": false, "logits_per_token": -2.523530960083008, "logits_per_char": -1.261765480041504, "num_chars": 2}, {"sum_logits": -2.062204360961914, "num_tokens": 1, "num_tokens_all": 1134, "is_greedy": false, "logits_per_token": -2.062204360961914, "logits_per_char": -1.031102180480957, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 39, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1918506622314453, "incorrect_loss_raw": 1.5325794219970703, "correct_loss_per_char": 0.5959253311157227, "incorrect_loss_per_char": 0.7662897109985352, "correct_loss_per_token": 1.1918506622314453, "incorrect_loss_per_token": 1.5325794219970703, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0821146965026855, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": true, "logits_per_token": -1.0821146965026855, "logits_per_char": -0.5410573482513428, "num_chars": 2}, {"sum_logits": -1.1918506622314453, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -1.1918506622314453, "logits_per_char": -0.5959253311157227, "num_chars": 2}, {"sum_logits": -1.9076671600341797, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -1.9076671600341797, "logits_per_char": -0.9538335800170898, "num_chars": 2}, {"sum_logits": -1.6079564094543457, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -1.6079564094543457, "logits_per_char": -0.8039782047271729, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 40, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7970390319824219, "incorrect_loss_raw": 1.3417038122812908, "correct_loss_per_char": 0.8985195159912109, "incorrect_loss_per_char": 0.6708519061406454, "correct_loss_per_token": 1.7970390319824219, "incorrect_loss_per_token": 1.3417038122812908, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9720010757446289, "num_tokens": 1, "num_tokens_all": 1074, "is_greedy": true, "logits_per_token": -0.9720010757446289, "logits_per_char": -0.48600053787231445, "num_chars": 2}, {"sum_logits": -1.3148174285888672, "num_tokens": 1, "num_tokens_all": 1074, "is_greedy": false, "logits_per_token": -1.3148174285888672, "logits_per_char": -0.6574087142944336, "num_chars": 2}, {"sum_logits": -1.7970390319824219, "num_tokens": 1, "num_tokens_all": 1074, "is_greedy": false, "logits_per_token": -1.7970390319824219, "logits_per_char": -0.8985195159912109, "num_chars": 2}, {"sum_logits": -1.738292932510376, "num_tokens": 1, "num_tokens_all": 1074, "is_greedy": false, "logits_per_token": -1.738292932510376, "logits_per_char": -0.869146466255188, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 41, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.110337495803833, "incorrect_loss_raw": 1.565008004506429, "correct_loss_per_char": 0.5551687479019165, "incorrect_loss_per_char": 0.7825040022532145, "correct_loss_per_token": 1.110337495803833, "incorrect_loss_per_token": 1.565008004506429, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.110337495803833, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": true, "logits_per_token": -1.110337495803833, "logits_per_char": -0.5551687479019165, "num_chars": 2}, {"sum_logits": -1.126065969467163, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": false, "logits_per_token": -1.126065969467163, "logits_per_char": -0.5630329847335815, "num_chars": 2}, {"sum_logits": -1.9213156700134277, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": false, "logits_per_token": -1.9213156700134277, "logits_per_char": -0.9606578350067139, "num_chars": 2}, {"sum_logits": -1.6476423740386963, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": false, "logits_per_token": -1.6476423740386963, "logits_per_char": -0.8238211870193481, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 42, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1606173515319824, "incorrect_loss_raw": 1.5470263957977295, "correct_loss_per_char": 0.5803086757659912, "incorrect_loss_per_char": 0.7735131978988647, "correct_loss_per_token": 1.1606173515319824, "incorrect_loss_per_token": 1.5470263957977295, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0955168008804321, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": true, "logits_per_token": -1.0955168008804321, "logits_per_char": -0.5477584004402161, "num_chars": 2}, {"sum_logits": -1.1606173515319824, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": false, "logits_per_token": -1.1606173515319824, "logits_per_char": -0.5803086757659912, "num_chars": 2}, {"sum_logits": -1.9316548109054565, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": false, "logits_per_token": -1.9316548109054565, "logits_per_char": -0.9658274054527283, "num_chars": 2}, {"sum_logits": -1.6139075756072998, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": false, "logits_per_token": -1.6139075756072998, "logits_per_char": -0.8069537878036499, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 43, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.170867919921875, "incorrect_loss_raw": 1.573411266009013, "correct_loss_per_char": 0.5854339599609375, "incorrect_loss_per_char": 0.7867056330045065, "correct_loss_per_token": 1.170867919921875, "incorrect_loss_per_token": 1.573411266009013, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0095049142837524, "num_tokens": 1, "num_tokens_all": 1127, "is_greedy": true, "logits_per_token": -1.0095049142837524, "logits_per_char": -0.5047524571418762, "num_chars": 2}, {"sum_logits": -1.170867919921875, "num_tokens": 1, "num_tokens_all": 1127, "is_greedy": false, "logits_per_token": -1.170867919921875, "logits_per_char": -0.5854339599609375, "num_chars": 2}, {"sum_logits": -2.019099235534668, "num_tokens": 1, "num_tokens_all": 1127, "is_greedy": false, "logits_per_token": -2.019099235534668, "logits_per_char": -1.009549617767334, "num_chars": 2}, {"sum_logits": -1.6916296482086182, "num_tokens": 1, "num_tokens_all": 1127, "is_greedy": false, "logits_per_token": -1.6916296482086182, "logits_per_char": -0.8458148241043091, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 44, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5436922311782837, "incorrect_loss_raw": 1.362858812014262, "correct_loss_per_char": 0.7718461155891418, "incorrect_loss_per_char": 0.681429406007131, "correct_loss_per_token": 1.5436922311782837, "incorrect_loss_per_token": 1.362858812014262, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.284386396408081, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": false, "logits_per_token": -1.284386396408081, "logits_per_char": -0.6421931982040405, "num_chars": 2}, {"sum_logits": -1.2615437507629395, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": true, "logits_per_token": -1.2615437507629395, "logits_per_char": -0.6307718753814697, "num_chars": 2}, {"sum_logits": -1.5426462888717651, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": false, "logits_per_token": -1.5426462888717651, "logits_per_char": -0.7713231444358826, "num_chars": 2}, {"sum_logits": -1.5436922311782837, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": false, "logits_per_token": -1.5436922311782837, "logits_per_char": -0.7718461155891418, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 45, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.215574860572815, "incorrect_loss_raw": 1.4781140486399333, "correct_loss_per_char": 0.6077874302864075, "incorrect_loss_per_char": 0.7390570243199667, "correct_loss_per_token": 1.215574860572815, "incorrect_loss_per_token": 1.4781140486399333, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.215574860572815, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": true, "logits_per_token": -1.215574860572815, "logits_per_char": -0.6077874302864075, "num_chars": 2}, {"sum_logits": -1.2708563804626465, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": false, "logits_per_token": -1.2708563804626465, "logits_per_char": -0.6354281902313232, "num_chars": 2}, {"sum_logits": -1.6695992946624756, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": false, "logits_per_token": -1.6695992946624756, "logits_per_char": -0.8347996473312378, "num_chars": 2}, {"sum_logits": -1.4938864707946777, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": false, "logits_per_token": -1.4938864707946777, "logits_per_char": -0.7469432353973389, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 46, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1317058801651, "incorrect_loss_raw": 1.5181595087051392, "correct_loss_per_char": 0.56585294008255, "incorrect_loss_per_char": 0.7590797543525696, "correct_loss_per_token": 1.1317058801651, "incorrect_loss_per_token": 1.5181595087051392, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1317058801651, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": true, "logits_per_token": -1.1317058801651, "logits_per_char": -0.56585294008255, "num_chars": 2}, {"sum_logits": -1.2746639251708984, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": false, "logits_per_token": -1.2746639251708984, "logits_per_char": -0.6373319625854492, "num_chars": 2}, {"sum_logits": -1.7230250835418701, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": false, "logits_per_token": -1.7230250835418701, "logits_per_char": -0.8615125417709351, "num_chars": 2}, {"sum_logits": -1.556789517402649, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": false, "logits_per_token": -1.556789517402649, "logits_per_char": -0.7783947587013245, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 47, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2481555938720703, "incorrect_loss_raw": 1.459774096806844, "correct_loss_per_char": 0.6240777969360352, "incorrect_loss_per_char": 0.729887048403422, "correct_loss_per_token": 1.2481555938720703, "incorrect_loss_per_token": 1.459774096806844, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2565937042236328, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": false, "logits_per_token": -1.2565937042236328, "logits_per_char": -0.6282968521118164, "num_chars": 2}, {"sum_logits": -1.2481555938720703, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": true, "logits_per_token": -1.2481555938720703, "logits_per_char": -0.6240777969360352, "num_chars": 2}, {"sum_logits": -1.6015583276748657, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": false, "logits_per_token": -1.6015583276748657, "logits_per_char": -0.8007791638374329, "num_chars": 2}, {"sum_logits": -1.5211702585220337, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": false, "logits_per_token": -1.5211702585220337, "logits_per_char": -0.7605851292610168, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 48, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2115211486816406, "incorrect_loss_raw": 1.4952380259831746, "correct_loss_per_char": 0.6057605743408203, "incorrect_loss_per_char": 0.7476190129915873, "correct_loss_per_token": 1.2115211486816406, "incorrect_loss_per_token": 1.4952380259831746, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.196789026260376, "num_tokens": 1, "num_tokens_all": 1099, "is_greedy": true, "logits_per_token": -1.196789026260376, "logits_per_char": -0.598394513130188, "num_chars": 2}, {"sum_logits": -1.2115211486816406, "num_tokens": 1, "num_tokens_all": 1099, "is_greedy": false, "logits_per_token": -1.2115211486816406, "logits_per_char": -0.6057605743408203, "num_chars": 2}, {"sum_logits": -1.7664161920547485, "num_tokens": 1, "num_tokens_all": 1099, "is_greedy": false, "logits_per_token": -1.7664161920547485, "logits_per_char": -0.8832080960273743, "num_chars": 2}, {"sum_logits": -1.5225088596343994, "num_tokens": 1, "num_tokens_all": 1099, "is_greedy": false, "logits_per_token": -1.5225088596343994, "logits_per_char": -0.7612544298171997, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 49, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.418847918510437, "incorrect_loss_raw": 1.4032684564590454, "correct_loss_per_char": 0.7094239592552185, "incorrect_loss_per_char": 0.7016342282295227, "correct_loss_per_token": 1.418847918510437, "incorrect_loss_per_token": 1.4032684564590454, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.158318281173706, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": true, "logits_per_token": -1.158318281173706, "logits_per_char": -0.579159140586853, "num_chars": 2}, {"sum_logits": -1.418847918510437, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.418847918510437, "logits_per_char": -0.7094239592552185, "num_chars": 2}, {"sum_logits": -1.6224669218063354, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.6224669218063354, "logits_per_char": -0.8112334609031677, "num_chars": 2}, {"sum_logits": -1.4290201663970947, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.4290201663970947, "logits_per_char": -0.7145100831985474, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 50, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8230687379837036, "incorrect_loss_raw": 1.3323566913604736, "correct_loss_per_char": 0.9115343689918518, "incorrect_loss_per_char": 0.6661783456802368, "correct_loss_per_token": 1.8230687379837036, "incorrect_loss_per_token": 1.3323566913604736, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0535396337509155, "num_tokens": 1, "num_tokens_all": 1031, "is_greedy": true, "logits_per_token": -1.0535396337509155, "logits_per_char": -0.5267698168754578, "num_chars": 2}, {"sum_logits": -1.1717522144317627, "num_tokens": 1, "num_tokens_all": 1031, "is_greedy": false, "logits_per_token": -1.1717522144317627, "logits_per_char": -0.5858761072158813, "num_chars": 2}, {"sum_logits": -1.7717782258987427, "num_tokens": 1, "num_tokens_all": 1031, "is_greedy": false, "logits_per_token": -1.7717782258987427, "logits_per_char": -0.8858891129493713, "num_chars": 2}, {"sum_logits": -1.8230687379837036, "num_tokens": 1, "num_tokens_all": 1031, "is_greedy": false, "logits_per_token": -1.8230687379837036, "logits_per_char": -0.9115343689918518, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 51, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0469906330108643, "incorrect_loss_raw": 1.2633225917816162, "correct_loss_per_char": 1.0234953165054321, "incorrect_loss_per_char": 0.6316612958908081, "correct_loss_per_token": 2.0469906330108643, "incorrect_loss_per_token": 1.2633225917816162, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1273339986801147, "num_tokens": 1, "num_tokens_all": 1120, "is_greedy": false, "logits_per_token": -1.1273339986801147, "logits_per_char": -0.5636669993400574, "num_chars": 2}, {"sum_logits": -1.1248246431350708, "num_tokens": 1, "num_tokens_all": 1120, "is_greedy": true, "logits_per_token": -1.1248246431350708, "logits_per_char": -0.5624123215675354, "num_chars": 2}, {"sum_logits": -2.0469906330108643, "num_tokens": 1, "num_tokens_all": 1120, "is_greedy": false, "logits_per_token": -2.0469906330108643, "logits_per_char": -1.0234953165054321, "num_chars": 2}, {"sum_logits": -1.537809133529663, "num_tokens": 1, "num_tokens_all": 1120, "is_greedy": false, "logits_per_token": -1.537809133529663, "logits_per_char": -0.7689045667648315, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 52, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1590076684951782, "incorrect_loss_raw": 1.4904706875483196, "correct_loss_per_char": 0.5795038342475891, "incorrect_loss_per_char": 0.7452353437741598, "correct_loss_per_token": 1.1590076684951782, "incorrect_loss_per_token": 1.4904706875483196, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1590076684951782, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": true, "logits_per_token": -1.1590076684951782, "logits_per_char": -0.5795038342475891, "num_chars": 2}, {"sum_logits": -1.4062973260879517, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -1.4062973260879517, "logits_per_char": -0.7031486630439758, "num_chars": 2}, {"sum_logits": -1.5935187339782715, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -1.5935187339782715, "logits_per_char": -0.7967593669891357, "num_chars": 2}, {"sum_logits": -1.4715960025787354, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -1.4715960025787354, "logits_per_char": -0.7357980012893677, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 53, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7880582809448242, "incorrect_loss_raw": 1.3196876247723897, "correct_loss_per_char": 0.8940291404724121, "incorrect_loss_per_char": 0.6598438123861948, "correct_loss_per_token": 1.7880582809448242, "incorrect_loss_per_token": 1.3196876247723897, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9931064248085022, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": true, "logits_per_token": -0.9931064248085022, "logits_per_char": -0.4965532124042511, "num_chars": 2}, {"sum_logits": -1.4225037097930908, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.4225037097930908, "logits_per_char": -0.7112518548965454, "num_chars": 2}, {"sum_logits": -1.7880582809448242, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.7880582809448242, "logits_per_char": -0.8940291404724121, "num_chars": 2}, {"sum_logits": -1.5434527397155762, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.5434527397155762, "logits_per_char": -0.7717263698577881, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 54, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4286653995513916, "incorrect_loss_raw": 1.402055025100708, "correct_loss_per_char": 0.7143326997756958, "incorrect_loss_per_char": 0.701027512550354, "correct_loss_per_token": 1.4286653995513916, "incorrect_loss_per_token": 1.402055025100708, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.181719422340393, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": true, "logits_per_token": -1.181719422340393, "logits_per_char": -0.5908597111701965, "num_chars": 2}, {"sum_logits": -1.3570665121078491, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": false, "logits_per_token": -1.3570665121078491, "logits_per_char": -0.6785332560539246, "num_chars": 2}, {"sum_logits": -1.6673791408538818, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": false, "logits_per_token": -1.6673791408538818, "logits_per_char": -0.8336895704269409, "num_chars": 2}, {"sum_logits": -1.4286653995513916, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": false, "logits_per_token": -1.4286653995513916, "logits_per_char": -0.7143326997756958, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 55, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.09451425075531, "incorrect_loss_raw": 1.5475683212280273, "correct_loss_per_char": 0.547257125377655, "incorrect_loss_per_char": 0.7737841606140137, "correct_loss_per_token": 1.09451425075531, "incorrect_loss_per_token": 1.5475683212280273, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.09451425075531, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": true, "logits_per_token": -1.09451425075531, "logits_per_char": -0.547257125377655, "num_chars": 2}, {"sum_logits": -1.2411301136016846, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": false, "logits_per_token": -1.2411301136016846, "logits_per_char": -0.6205650568008423, "num_chars": 2}, {"sum_logits": -1.8062865734100342, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": false, "logits_per_token": -1.8062865734100342, "logits_per_char": -0.9031432867050171, "num_chars": 2}, {"sum_logits": -1.5952882766723633, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": false, "logits_per_token": -1.5952882766723633, "logits_per_char": -0.7976441383361816, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 56, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6535871028900146, "incorrect_loss_raw": 1.3850058317184448, "correct_loss_per_char": 0.8267935514450073, "incorrect_loss_per_char": 0.6925029158592224, "correct_loss_per_token": 1.6535871028900146, "incorrect_loss_per_token": 1.3850058317184448, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0136576890945435, "num_tokens": 1, "num_tokens_all": 1052, "is_greedy": true, "logits_per_token": -1.0136576890945435, "logits_per_char": -0.5068288445472717, "num_chars": 2}, {"sum_logits": -1.2449849843978882, "num_tokens": 1, "num_tokens_all": 1052, "is_greedy": false, "logits_per_token": -1.2449849843978882, "logits_per_char": -0.6224924921989441, "num_chars": 2}, {"sum_logits": -1.8963748216629028, "num_tokens": 1, "num_tokens_all": 1052, "is_greedy": false, "logits_per_token": -1.8963748216629028, "logits_per_char": -0.9481874108314514, "num_chars": 2}, {"sum_logits": -1.6535871028900146, "num_tokens": 1, "num_tokens_all": 1052, "is_greedy": false, "logits_per_token": -1.6535871028900146, "logits_per_char": -0.8267935514450073, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 57, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4933521747589111, "incorrect_loss_raw": 1.394209384918213, "correct_loss_per_char": 0.7466760873794556, "incorrect_loss_per_char": 0.6971046924591064, "correct_loss_per_token": 1.4933521747589111, "incorrect_loss_per_token": 1.394209384918213, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.127922534942627, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": true, "logits_per_token": -1.127922534942627, "logits_per_char": -0.5639612674713135, "num_chars": 2}, {"sum_logits": -1.3361676931381226, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": false, "logits_per_token": -1.3361676931381226, "logits_per_char": -0.6680838465690613, "num_chars": 2}, {"sum_logits": -1.7185379266738892, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": false, "logits_per_token": -1.7185379266738892, "logits_per_char": -0.8592689633369446, "num_chars": 2}, {"sum_logits": -1.4933521747589111, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": false, "logits_per_token": -1.4933521747589111, "logits_per_char": -0.7466760873794556, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 58, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1949843168258667, "incorrect_loss_raw": 1.4760717153549194, "correct_loss_per_char": 0.5974921584129333, "incorrect_loss_per_char": 0.7380358576774597, "correct_loss_per_token": 1.1949843168258667, "incorrect_loss_per_token": 1.4760717153549194, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1949843168258667, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": true, "logits_per_token": -1.1949843168258667, "logits_per_char": -0.5974921584129333, "num_chars": 2}, {"sum_logits": -1.333314299583435, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.333314299583435, "logits_per_char": -0.6666571497917175, "num_chars": 2}, {"sum_logits": -1.588538408279419, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.588538408279419, "logits_per_char": -0.7942692041397095, "num_chars": 2}, {"sum_logits": -1.5063624382019043, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.5063624382019043, "logits_per_char": -0.7531812191009521, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 59, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9860315322875977, "incorrect_loss_raw": 1.6214627822240193, "correct_loss_per_char": 0.49301576614379883, "incorrect_loss_per_char": 0.8107313911120096, "correct_loss_per_token": 0.9860315322875977, "incorrect_loss_per_token": 1.6214627822240193, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9860315322875977, "num_tokens": 1, "num_tokens_all": 1059, "is_greedy": true, "logits_per_token": -0.9860315322875977, "logits_per_char": -0.49301576614379883, "num_chars": 2}, {"sum_logits": -1.2360862493515015, "num_tokens": 1, "num_tokens_all": 1059, "is_greedy": false, "logits_per_token": -1.2360862493515015, "logits_per_char": -0.6180431246757507, "num_chars": 2}, {"sum_logits": -1.7884091138839722, "num_tokens": 1, "num_tokens_all": 1059, "is_greedy": false, "logits_per_token": -1.7884091138839722, "logits_per_char": -0.8942045569419861, "num_chars": 2}, {"sum_logits": -1.8398929834365845, "num_tokens": 1, "num_tokens_all": 1059, "is_greedy": false, "logits_per_token": -1.8398929834365845, "logits_per_char": -0.9199464917182922, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 60, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9138500690460205, "incorrect_loss_raw": 1.2848008076349895, "correct_loss_per_char": 0.9569250345230103, "incorrect_loss_per_char": 0.6424004038174947, "correct_loss_per_token": 1.9138500690460205, "incorrect_loss_per_token": 1.2848008076349895, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.165732502937317, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": false, "logits_per_token": -1.165732502937317, "logits_per_char": -0.5828662514686584, "num_chars": 2}, {"sum_logits": -1.133070945739746, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": true, "logits_per_token": -1.133070945739746, "logits_per_char": -0.566535472869873, "num_chars": 2}, {"sum_logits": -1.9138500690460205, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": false, "logits_per_token": -1.9138500690460205, "logits_per_char": -0.9569250345230103, "num_chars": 2}, {"sum_logits": -1.5555989742279053, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": false, "logits_per_token": -1.5555989742279053, "logits_per_char": -0.7777994871139526, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 61, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1341005563735962, "incorrect_loss_raw": 1.4984007279078166, "correct_loss_per_char": 0.5670502781867981, "incorrect_loss_per_char": 0.7492003639539083, "correct_loss_per_token": 1.1341005563735962, "incorrect_loss_per_token": 1.4984007279078166, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1341005563735962, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -1.1341005563735962, "logits_per_char": -0.5670502781867981, "num_chars": 2}, {"sum_logits": -1.433304786682129, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.433304786682129, "logits_per_char": -0.7166523933410645, "num_chars": 2}, {"sum_logits": -1.579825758934021, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.579825758934021, "logits_per_char": -0.7899128794670105, "num_chars": 2}, {"sum_logits": -1.4820716381072998, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.4820716381072998, "logits_per_char": -0.7410358190536499, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 62, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4705884456634521, "incorrect_loss_raw": 1.4131584167480469, "correct_loss_per_char": 0.7352942228317261, "incorrect_loss_per_char": 0.7065792083740234, "correct_loss_per_token": 1.4705884456634521, "incorrect_loss_per_token": 1.4131584167480469, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0249440670013428, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": true, "logits_per_token": -1.0249440670013428, "logits_per_char": -0.5124720335006714, "num_chars": 2}, {"sum_logits": -1.4705884456634521, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": false, "logits_per_token": -1.4705884456634521, "logits_per_char": -0.7352942228317261, "num_chars": 2}, {"sum_logits": -1.6931458711624146, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": false, "logits_per_token": -1.6931458711624146, "logits_per_char": -0.8465729355812073, "num_chars": 2}, {"sum_logits": -1.5213853120803833, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": false, "logits_per_token": -1.5213853120803833, "logits_per_char": -0.7606926560401917, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 63, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8398454189300537, "incorrect_loss_raw": 1.299729347229004, "correct_loss_per_char": 0.9199227094650269, "incorrect_loss_per_char": 0.649864673614502, "correct_loss_per_token": 1.8398454189300537, "incorrect_loss_per_token": 1.299729347229004, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1677945852279663, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": false, "logits_per_token": -1.1677945852279663, "logits_per_char": -0.5838972926139832, "num_chars": 2}, {"sum_logits": -1.140964150428772, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": true, "logits_per_token": -1.140964150428772, "logits_per_char": -0.570482075214386, "num_chars": 2}, {"sum_logits": -1.8398454189300537, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": false, "logits_per_token": -1.8398454189300537, "logits_per_char": -0.9199227094650269, "num_chars": 2}, {"sum_logits": -1.5904293060302734, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": false, "logits_per_token": -1.5904293060302734, "logits_per_char": -0.7952146530151367, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 64, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1942168474197388, "incorrect_loss_raw": 1.4833736817042034, "correct_loss_per_char": 0.5971084237098694, "incorrect_loss_per_char": 0.7416868408521017, "correct_loss_per_token": 1.1942168474197388, "incorrect_loss_per_token": 1.4833736817042034, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1942168474197388, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": true, "logits_per_token": -1.1942168474197388, "logits_per_char": -0.5971084237098694, "num_chars": 2}, {"sum_logits": -1.3106523752212524, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": false, "logits_per_token": -1.3106523752212524, "logits_per_char": -0.6553261876106262, "num_chars": 2}, {"sum_logits": -1.7150226831436157, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": false, "logits_per_token": -1.7150226831436157, "logits_per_char": -0.8575113415718079, "num_chars": 2}, {"sum_logits": -1.4244459867477417, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": false, "logits_per_token": -1.4244459867477417, "logits_per_char": -0.7122229933738708, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 65, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0663937330245972, "incorrect_loss_raw": 1.569452206293742, "correct_loss_per_char": 0.5331968665122986, "incorrect_loss_per_char": 0.784726103146871, "correct_loss_per_token": 1.0663937330245972, "incorrect_loss_per_token": 1.569452206293742, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0663937330245972, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": true, "logits_per_token": -1.0663937330245972, "logits_per_char": -0.5331968665122986, "num_chars": 2}, {"sum_logits": -1.255453109741211, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -1.255453109741211, "logits_per_char": -0.6277265548706055, "num_chars": 2}, {"sum_logits": -1.8970593214035034, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -1.8970593214035034, "logits_per_char": -0.9485296607017517, "num_chars": 2}, {"sum_logits": -1.5558441877365112, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -1.5558441877365112, "logits_per_char": -0.7779220938682556, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 66, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7035633325576782, "incorrect_loss_raw": 1.3271132707595825, "correct_loss_per_char": 0.8517816662788391, "incorrect_loss_per_char": 0.6635566353797913, "correct_loss_per_token": 1.7035633325576782, "incorrect_loss_per_token": 1.3271132707595825, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0987753868103027, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": true, "logits_per_token": -1.0987753868103027, "logits_per_char": -0.5493876934051514, "num_chars": 2}, {"sum_logits": -1.3237155675888062, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": false, "logits_per_token": -1.3237155675888062, "logits_per_char": -0.6618577837944031, "num_chars": 2}, {"sum_logits": -1.7035633325576782, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": false, "logits_per_token": -1.7035633325576782, "logits_per_char": -0.8517816662788391, "num_chars": 2}, {"sum_logits": -1.5588488578796387, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": false, "logits_per_token": -1.5588488578796387, "logits_per_char": -0.7794244289398193, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 67, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.147808313369751, "incorrect_loss_raw": 1.5635066827138264, "correct_loss_per_char": 0.5739041566848755, "incorrect_loss_per_char": 0.7817533413569132, "correct_loss_per_token": 1.147808313369751, "incorrect_loss_per_token": 1.5635066827138264, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0686876773834229, "num_tokens": 1, "num_tokens_all": 1119, "is_greedy": true, "logits_per_token": -1.0686876773834229, "logits_per_char": -0.5343438386917114, "num_chars": 2}, {"sum_logits": -1.147808313369751, "num_tokens": 1, "num_tokens_all": 1119, "is_greedy": false, "logits_per_token": -1.147808313369751, "logits_per_char": -0.5739041566848755, "num_chars": 2}, {"sum_logits": -1.921836018562317, "num_tokens": 1, "num_tokens_all": 1119, "is_greedy": false, "logits_per_token": -1.921836018562317, "logits_per_char": -0.9609180092811584, "num_chars": 2}, {"sum_logits": -1.6999963521957397, "num_tokens": 1, "num_tokens_all": 1119, "is_greedy": false, "logits_per_token": -1.6999963521957397, "logits_per_char": -0.8499981760978699, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 68, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9006348848342896, "incorrect_loss_raw": 1.2819795608520508, "correct_loss_per_char": 0.9503174424171448, "incorrect_loss_per_char": 0.6409897804260254, "correct_loss_per_token": 1.9006348848342896, "incorrect_loss_per_token": 1.2819795608520508, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.094384789466858, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": true, "logits_per_token": -1.094384789466858, "logits_per_char": -0.547192394733429, "num_chars": 2}, {"sum_logits": -1.295108437538147, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.295108437538147, "logits_per_char": -0.6475542187690735, "num_chars": 2}, {"sum_logits": -1.9006348848342896, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.9006348848342896, "logits_per_char": -0.9503174424171448, "num_chars": 2}, {"sum_logits": -1.4564454555511475, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.4564454555511475, "logits_per_char": -0.7282227277755737, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 69, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7629413604736328, "incorrect_loss_raw": 1.299140731493632, "correct_loss_per_char": 0.8814706802368164, "incorrect_loss_per_char": 0.649570365746816, "correct_loss_per_token": 1.7629413604736328, "incorrect_loss_per_token": 1.299140731493632, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1935776472091675, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": true, "logits_per_token": -1.1935776472091675, "logits_per_char": -0.5967888236045837, "num_chars": 2}, {"sum_logits": -1.398451805114746, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.398451805114746, "logits_per_char": -0.699225902557373, "num_chars": 2}, {"sum_logits": -1.7629413604736328, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.7629413604736328, "logits_per_char": -0.8814706802368164, "num_chars": 2}, {"sum_logits": -1.3053927421569824, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.3053927421569824, "logits_per_char": -0.6526963710784912, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 70, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8296277523040771, "incorrect_loss_raw": 1.3435189326604207, "correct_loss_per_char": 0.9148138761520386, "incorrect_loss_per_char": 0.6717594663302103, "correct_loss_per_token": 1.8296277523040771, "incorrect_loss_per_token": 1.3435189326604207, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9927117824554443, "num_tokens": 1, "num_tokens_all": 1031, "is_greedy": true, "logits_per_token": -0.9927117824554443, "logits_per_char": -0.49635589122772217, "num_chars": 2}, {"sum_logits": -1.2135978937149048, "num_tokens": 1, "num_tokens_all": 1031, "is_greedy": false, "logits_per_token": -1.2135978937149048, "logits_per_char": -0.6067989468574524, "num_chars": 2}, {"sum_logits": -1.8296277523040771, "num_tokens": 1, "num_tokens_all": 1031, "is_greedy": false, "logits_per_token": -1.8296277523040771, "logits_per_char": -0.9148138761520386, "num_chars": 2}, {"sum_logits": -1.824247121810913, "num_tokens": 1, "num_tokens_all": 1031, "is_greedy": false, "logits_per_token": -1.824247121810913, "logits_per_char": -0.9121235609054565, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 71, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0130277872085571, "incorrect_loss_raw": 1.6013602415720622, "correct_loss_per_char": 0.5065138936042786, "incorrect_loss_per_char": 0.8006801207860311, "correct_loss_per_token": 1.0130277872085571, "incorrect_loss_per_token": 1.6013602415720622, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0130277872085571, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": true, "logits_per_token": -1.0130277872085571, "logits_per_char": -0.5065138936042786, "num_chars": 2}, {"sum_logits": -1.2692749500274658, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -1.2692749500274658, "logits_per_char": -0.6346374750137329, "num_chars": 2}, {"sum_logits": -1.9086722135543823, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -1.9086722135543823, "logits_per_char": -0.9543361067771912, "num_chars": 2}, {"sum_logits": -1.6261335611343384, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -1.6261335611343384, "logits_per_char": -0.8130667805671692, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 72, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.12859046459198, "incorrect_loss_raw": 1.5103192329406738, "correct_loss_per_char": 0.56429523229599, "incorrect_loss_per_char": 0.7551596164703369, "correct_loss_per_token": 1.12859046459198, "incorrect_loss_per_token": 1.5103192329406738, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.12859046459198, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": true, "logits_per_token": -1.12859046459198, "logits_per_char": -0.56429523229599, "num_chars": 2}, {"sum_logits": -1.3901288509368896, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": false, "logits_per_token": -1.3901288509368896, "logits_per_char": -0.6950644254684448, "num_chars": 2}, {"sum_logits": -1.674473762512207, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": false, "logits_per_token": -1.674473762512207, "logits_per_char": -0.8372368812561035, "num_chars": 2}, {"sum_logits": -1.4663550853729248, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": false, "logits_per_token": -1.4663550853729248, "logits_per_char": -0.7331775426864624, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 73, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.419670820236206, "incorrect_loss_raw": 1.418778379758199, "correct_loss_per_char": 0.709835410118103, "incorrect_loss_per_char": 0.7093891898790995, "correct_loss_per_token": 1.419670820236206, "incorrect_loss_per_token": 1.418778379758199, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.109588623046875, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": true, "logits_per_token": -1.109588623046875, "logits_per_char": -0.5547943115234375, "num_chars": 2}, {"sum_logits": -1.419670820236206, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.419670820236206, "logits_per_char": -0.709835410118103, "num_chars": 2}, {"sum_logits": -1.7514338493347168, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.7514338493347168, "logits_per_char": -0.8757169246673584, "num_chars": 2}, {"sum_logits": -1.3953126668930054, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.3953126668930054, "logits_per_char": -0.6976563334465027, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 74, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6825544834136963, "incorrect_loss_raw": 1.3450839122136433, "correct_loss_per_char": 0.8412772417068481, "incorrect_loss_per_char": 0.6725419561068217, "correct_loss_per_token": 1.6825544834136963, "incorrect_loss_per_token": 1.3450839122136433, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1025997400283813, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": true, "logits_per_token": -1.1025997400283813, "logits_per_char": -0.5512998700141907, "num_chars": 2}, {"sum_logits": -1.26847243309021, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": false, "logits_per_token": -1.26847243309021, "logits_per_char": -0.634236216545105, "num_chars": 2}, {"sum_logits": -1.6825544834136963, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": false, "logits_per_token": -1.6825544834136963, "logits_per_char": -0.8412772417068481, "num_chars": 2}, {"sum_logits": -1.6641795635223389, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": false, "logits_per_token": -1.6641795635223389, "logits_per_char": -0.8320897817611694, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 75, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0930532217025757, "incorrect_loss_raw": 1.5331697066624959, "correct_loss_per_char": 0.5465266108512878, "incorrect_loss_per_char": 0.7665848533312479, "correct_loss_per_token": 1.0930532217025757, "incorrect_loss_per_token": 1.5331697066624959, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0930532217025757, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": true, "logits_per_token": -1.0930532217025757, "logits_per_char": -0.5465266108512878, "num_chars": 2}, {"sum_logits": -1.3377221822738647, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": false, "logits_per_token": -1.3377221822738647, "logits_per_char": -0.6688610911369324, "num_chars": 2}, {"sum_logits": -1.7507085800170898, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": false, "logits_per_token": -1.7507085800170898, "logits_per_char": -0.8753542900085449, "num_chars": 2}, {"sum_logits": -1.5110783576965332, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": false, "logits_per_token": -1.5110783576965332, "logits_per_char": -0.7555391788482666, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 76, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1659646034240723, "incorrect_loss_raw": 1.5152550141016643, "correct_loss_per_char": 0.5829823017120361, "incorrect_loss_per_char": 0.7576275070508321, "correct_loss_per_token": 1.1659646034240723, "incorrect_loss_per_token": 1.5152550141016643, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1659646034240723, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": true, "logits_per_token": -1.1659646034240723, "logits_per_char": -0.5829823017120361, "num_chars": 2}, {"sum_logits": -1.1893833875656128, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.1893833875656128, "logits_per_char": -0.5946916937828064, "num_chars": 2}, {"sum_logits": -1.8213919401168823, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.8213919401168823, "logits_per_char": -0.9106959700584412, "num_chars": 2}, {"sum_logits": -1.5349897146224976, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.5349897146224976, "logits_per_char": -0.7674948573112488, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 77, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.2010674476623535, "incorrect_loss_raw": 1.2588157653808594, "correct_loss_per_char": 1.1005337238311768, "incorrect_loss_per_char": 0.6294078826904297, "correct_loss_per_token": 2.2010674476623535, "incorrect_loss_per_token": 1.2588157653808594, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0057225227355957, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": true, "logits_per_token": -1.0057225227355957, "logits_per_char": -0.5028612613677979, "num_chars": 2}, {"sum_logits": -1.156618356704712, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": false, "logits_per_token": -1.156618356704712, "logits_per_char": -0.578309178352356, "num_chars": 2}, {"sum_logits": -2.2010674476623535, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": false, "logits_per_token": -2.2010674476623535, "logits_per_char": -1.1005337238311768, "num_chars": 2}, {"sum_logits": -1.6141064167022705, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": false, "logits_per_token": -1.6141064167022705, "logits_per_char": -0.8070532083511353, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 78, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2253786325454712, "incorrect_loss_raw": 1.4743104378382366, "correct_loss_per_char": 0.6126893162727356, "incorrect_loss_per_char": 0.7371552189191183, "correct_loss_per_token": 1.2253786325454712, "incorrect_loss_per_token": 1.4743104378382366, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2253786325454712, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": true, "logits_per_token": -1.2253786325454712, "logits_per_char": -0.6126893162727356, "num_chars": 2}, {"sum_logits": -1.2426960468292236, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": false, "logits_per_token": -1.2426960468292236, "logits_per_char": -0.6213480234146118, "num_chars": 2}, {"sum_logits": -1.7089346647262573, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": false, "logits_per_token": -1.7089346647262573, "logits_per_char": -0.8544673323631287, "num_chars": 2}, {"sum_logits": -1.4713006019592285, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": false, "logits_per_token": -1.4713006019592285, "logits_per_char": -0.7356503009796143, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 79, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9493225812911987, "incorrect_loss_raw": 1.6789484818776448, "correct_loss_per_char": 0.47466129064559937, "incorrect_loss_per_char": 0.8394742409388224, "correct_loss_per_token": 0.9493225812911987, "incorrect_loss_per_token": 1.6789484818776448, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9493225812911987, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": true, "logits_per_token": -0.9493225812911987, "logits_per_char": -0.47466129064559937, "num_chars": 2}, {"sum_logits": -1.307448148727417, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": false, "logits_per_token": -1.307448148727417, "logits_per_char": -0.6537240743637085, "num_chars": 2}, {"sum_logits": -2.2251882553100586, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": false, "logits_per_token": -2.2251882553100586, "logits_per_char": -1.1125941276550293, "num_chars": 2}, {"sum_logits": -1.504209041595459, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": false, "logits_per_token": -1.504209041595459, "logits_per_char": -0.7521045207977295, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 80, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.509248971939087, "incorrect_loss_raw": 1.4420822461446126, "correct_loss_per_char": 0.7546244859695435, "incorrect_loss_per_char": 0.7210411230723063, "correct_loss_per_token": 1.509248971939087, "incorrect_loss_per_token": 1.4420822461446126, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9667909145355225, "num_tokens": 1, "num_tokens_all": 1117, "is_greedy": true, "logits_per_token": -0.9667909145355225, "logits_per_char": -0.48339545726776123, "num_chars": 2}, {"sum_logits": -1.420914649963379, "num_tokens": 1, "num_tokens_all": 1117, "is_greedy": false, "logits_per_token": -1.420914649963379, "logits_per_char": -0.7104573249816895, "num_chars": 2}, {"sum_logits": -1.9385411739349365, "num_tokens": 1, "num_tokens_all": 1117, "is_greedy": false, "logits_per_token": -1.9385411739349365, "logits_per_char": -0.9692705869674683, "num_chars": 2}, {"sum_logits": -1.509248971939087, "num_tokens": 1, "num_tokens_all": 1117, "is_greedy": false, "logits_per_token": -1.509248971939087, "logits_per_char": -0.7546244859695435, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 81, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1030163764953613, "incorrect_loss_raw": 1.566017707188924, "correct_loss_per_char": 0.5515081882476807, "incorrect_loss_per_char": 0.783008853594462, "correct_loss_per_token": 1.1030163764953613, "incorrect_loss_per_token": 1.566017707188924, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1030163764953613, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": true, "logits_per_token": -1.1030163764953613, "logits_per_char": -0.5515081882476807, "num_chars": 2}, {"sum_logits": -1.199043869972229, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": false, "logits_per_token": -1.199043869972229, "logits_per_char": -0.5995219349861145, "num_chars": 2}, {"sum_logits": -2.010125160217285, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": false, "logits_per_token": -2.010125160217285, "logits_per_char": -1.0050625801086426, "num_chars": 2}, {"sum_logits": -1.4888840913772583, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": false, "logits_per_token": -1.4888840913772583, "logits_per_char": -0.7444420456886292, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 82, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0358014106750488, "incorrect_loss_raw": 1.600951910018921, "correct_loss_per_char": 0.5179007053375244, "incorrect_loss_per_char": 0.8004759550094604, "correct_loss_per_token": 1.0358014106750488, "incorrect_loss_per_token": 1.600951910018921, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0358014106750488, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": true, "logits_per_token": -1.0358014106750488, "logits_per_char": -0.5179007053375244, "num_chars": 2}, {"sum_logits": -1.2017444372177124, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.2017444372177124, "logits_per_char": -0.6008722186088562, "num_chars": 2}, {"sum_logits": -1.9322184324264526, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.9322184324264526, "logits_per_char": -0.9661092162132263, "num_chars": 2}, {"sum_logits": -1.6688928604125977, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.6688928604125977, "logits_per_char": -0.8344464302062988, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 83, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4569370746612549, "incorrect_loss_raw": 1.4271396001180012, "correct_loss_per_char": 0.7284685373306274, "incorrect_loss_per_char": 0.7135698000590006, "correct_loss_per_token": 1.4569370746612549, "incorrect_loss_per_token": 1.4271396001180012, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1307135820388794, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": true, "logits_per_token": -1.1307135820388794, "logits_per_char": -0.5653567910194397, "num_chars": 2}, {"sum_logits": -1.2552772760391235, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.2552772760391235, "logits_per_char": -0.6276386380195618, "num_chars": 2}, {"sum_logits": -1.895427942276001, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.895427942276001, "logits_per_char": -0.9477139711380005, "num_chars": 2}, {"sum_logits": -1.4569370746612549, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.4569370746612549, "logits_per_char": -0.7284685373306274, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 84, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.120326042175293, "incorrect_loss_raw": 1.5806864102681477, "correct_loss_per_char": 0.5601630210876465, "incorrect_loss_per_char": 0.7903432051340739, "correct_loss_per_token": 1.120326042175293, "incorrect_loss_per_token": 1.5806864102681477, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1047515869140625, "num_tokens": 1, "num_tokens_all": 1106, "is_greedy": true, "logits_per_token": -1.1047515869140625, "logits_per_char": -0.5523757934570312, "num_chars": 2}, {"sum_logits": -1.120326042175293, "num_tokens": 1, "num_tokens_all": 1106, "is_greedy": false, "logits_per_token": -1.120326042175293, "logits_per_char": -0.5601630210876465, "num_chars": 2}, {"sum_logits": -2.0695226192474365, "num_tokens": 1, "num_tokens_all": 1106, "is_greedy": false, "logits_per_token": -2.0695226192474365, "logits_per_char": -1.0347613096237183, "num_chars": 2}, {"sum_logits": -1.5677850246429443, "num_tokens": 1, "num_tokens_all": 1106, "is_greedy": false, "logits_per_token": -1.5677850246429443, "logits_per_char": -0.7838925123214722, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 85, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1864655017852783, "incorrect_loss_raw": 1.5107427438100178, "correct_loss_per_char": 0.5932327508926392, "incorrect_loss_per_char": 0.7553713719050089, "correct_loss_per_token": 1.1864655017852783, "incorrect_loss_per_token": 1.5107427438100178, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1864655017852783, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": true, "logits_per_token": -1.1864655017852783, "logits_per_char": -0.5932327508926392, "num_chars": 2}, {"sum_logits": -1.2109462022781372, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": false, "logits_per_token": -1.2109462022781372, "logits_per_char": -0.6054731011390686, "num_chars": 2}, {"sum_logits": -1.8861638307571411, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": false, "logits_per_token": -1.8861638307571411, "logits_per_char": -0.9430819153785706, "num_chars": 2}, {"sum_logits": -1.4351181983947754, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": false, "logits_per_token": -1.4351181983947754, "logits_per_char": -0.7175590991973877, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 86, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0840309858322144, "incorrect_loss_raw": 1.5464723507563274, "correct_loss_per_char": 0.5420154929161072, "incorrect_loss_per_char": 0.7732361753781637, "correct_loss_per_token": 1.0840309858322144, "incorrect_loss_per_token": 1.5464723507563274, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0840309858322144, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": true, "logits_per_token": -1.0840309858322144, "logits_per_char": -0.5420154929161072, "num_chars": 2}, {"sum_logits": -1.2815316915512085, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": false, "logits_per_token": -1.2815316915512085, "logits_per_char": -0.6407658457756042, "num_chars": 2}, {"sum_logits": -1.7660493850708008, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": false, "logits_per_token": -1.7660493850708008, "logits_per_char": -0.8830246925354004, "num_chars": 2}, {"sum_logits": -1.5918359756469727, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": false, "logits_per_token": -1.5918359756469727, "logits_per_char": -0.7959179878234863, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 87, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5198464393615723, "incorrect_loss_raw": 1.4070040782292683, "correct_loss_per_char": 0.7599232196807861, "incorrect_loss_per_char": 0.7035020391146342, "correct_loss_per_token": 1.5198464393615723, "incorrect_loss_per_token": 1.4070040782292683, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9963706731796265, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -0.9963706731796265, "logits_per_char": -0.49818533658981323, "num_chars": 2}, {"sum_logits": -1.4619545936584473, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.4619545936584473, "logits_per_char": -0.7309772968292236, "num_chars": 2}, {"sum_logits": -1.7626869678497314, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.7626869678497314, "logits_per_char": -0.8813434839248657, "num_chars": 2}, {"sum_logits": -1.5198464393615723, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.5198464393615723, "logits_per_char": -0.7599232196807861, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 88, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.800484299659729, "incorrect_loss_raw": 1.2990527153015137, "correct_loss_per_char": 0.9002421498298645, "incorrect_loss_per_char": 0.6495263576507568, "correct_loss_per_token": 1.800484299659729, "incorrect_loss_per_token": 1.2990527153015137, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2223825454711914, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": false, "logits_per_token": -1.2223825454711914, "logits_per_char": -0.6111912727355957, "num_chars": 2}, {"sum_logits": -1.1679961681365967, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": true, "logits_per_token": -1.1679961681365967, "logits_per_char": -0.5839980840682983, "num_chars": 2}, {"sum_logits": -1.800484299659729, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": false, "logits_per_token": -1.800484299659729, "logits_per_char": -0.9002421498298645, "num_chars": 2}, {"sum_logits": -1.506779432296753, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": false, "logits_per_token": -1.506779432296753, "logits_per_char": -0.7533897161483765, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 89, "native_id": null, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.311792254447937, "incorrect_loss_raw": 1.47205917040507, "correct_loss_per_char": 0.6558961272239685, "incorrect_loss_per_char": 0.736029585202535, "correct_loss_per_token": 1.311792254447937, "incorrect_loss_per_token": 1.47205917040507, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2600380182266235, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": false, "logits_per_token": -1.2600380182266235, "logits_per_char": -0.6300190091133118, "num_chars": 2}, {"sum_logits": -1.311792254447937, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": false, "logits_per_token": -1.311792254447937, "logits_per_char": -0.6558961272239685, "num_chars": 2}, {"sum_logits": -1.9346150159835815, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": false, "logits_per_token": -1.9346150159835815, "logits_per_char": -0.9673075079917908, "num_chars": 2}, {"sum_logits": -1.2215244770050049, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": true, "logits_per_token": -1.2215244770050049, "logits_per_char": -0.6107622385025024, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 90, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.428781509399414, "incorrect_loss_raw": 1.4211341540018718, "correct_loss_per_char": 0.714390754699707, "incorrect_loss_per_char": 0.7105670770009359, "correct_loss_per_token": 1.428781509399414, "incorrect_loss_per_token": 1.4211341540018718, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.062990665435791, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": true, "logits_per_token": -1.062990665435791, "logits_per_char": -0.5314953327178955, "num_chars": 2}, {"sum_logits": -1.428781509399414, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": false, "logits_per_token": -1.428781509399414, "logits_per_char": -0.714390754699707, "num_chars": 2}, {"sum_logits": -1.7524851560592651, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": false, "logits_per_token": -1.7524851560592651, "logits_per_char": -0.8762425780296326, "num_chars": 2}, {"sum_logits": -1.447926640510559, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": false, "logits_per_token": -1.447926640510559, "logits_per_char": -0.7239633202552795, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 91, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.151448369026184, "incorrect_loss_raw": 1.5949302911758423, "correct_loss_per_char": 0.575724184513092, "incorrect_loss_per_char": 0.7974651455879211, "correct_loss_per_token": 1.151448369026184, "incorrect_loss_per_token": 1.5949302911758423, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9916298389434814, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": true, "logits_per_token": -0.9916298389434814, "logits_per_char": -0.4958149194717407, "num_chars": 2}, {"sum_logits": -1.151448369026184, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": false, "logits_per_token": -1.151448369026184, "logits_per_char": -0.575724184513092, "num_chars": 2}, {"sum_logits": -1.986084222793579, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": false, "logits_per_token": -1.986084222793579, "logits_per_char": -0.9930421113967896, "num_chars": 2}, {"sum_logits": -1.8070768117904663, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": false, "logits_per_token": -1.8070768117904663, "logits_per_char": -0.9035384058952332, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 92, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0301856994628906, "incorrect_loss_raw": 1.6147512594858806, "correct_loss_per_char": 0.5150928497314453, "incorrect_loss_per_char": 0.8073756297429403, "correct_loss_per_token": 1.0301856994628906, "incorrect_loss_per_token": 1.6147512594858806, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0301856994628906, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": true, "logits_per_token": -1.0301856994628906, "logits_per_char": -0.5150928497314453, "num_chars": 2}, {"sum_logits": -1.1615512371063232, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -1.1615512371063232, "logits_per_char": -0.5807756185531616, "num_chars": 2}, {"sum_logits": -1.9760446548461914, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -1.9760446548461914, "logits_per_char": -0.9880223274230957, "num_chars": 2}, {"sum_logits": -1.706657886505127, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -1.706657886505127, "logits_per_char": -0.8533289432525635, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 93, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8781168460845947, "incorrect_loss_raw": 1.297402064005534, "correct_loss_per_char": 0.9390584230422974, "incorrect_loss_per_char": 0.648701032002767, "correct_loss_per_token": 1.8781168460845947, "incorrect_loss_per_token": 1.297402064005534, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0700331926345825, "num_tokens": 1, "num_tokens_all": 1046, "is_greedy": true, "logits_per_token": -1.0700331926345825, "logits_per_char": -0.5350165963172913, "num_chars": 2}, {"sum_logits": -1.2350307703018188, "num_tokens": 1, "num_tokens_all": 1046, "is_greedy": false, "logits_per_token": -1.2350307703018188, "logits_per_char": -0.6175153851509094, "num_chars": 2}, {"sum_logits": -1.8781168460845947, "num_tokens": 1, "num_tokens_all": 1046, "is_greedy": false, "logits_per_token": -1.8781168460845947, "logits_per_char": -0.9390584230422974, "num_chars": 2}, {"sum_logits": -1.5871422290802002, "num_tokens": 1, "num_tokens_all": 1046, "is_greedy": false, "logits_per_token": -1.5871422290802002, "logits_per_char": -0.7935711145401001, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 94, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2595162391662598, "incorrect_loss_raw": 1.4768304030100505, "correct_loss_per_char": 0.6297581195831299, "incorrect_loss_per_char": 0.7384152015050253, "correct_loss_per_token": 1.2595162391662598, "incorrect_loss_per_token": 1.4768304030100505, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1533620357513428, "num_tokens": 1, "num_tokens_all": 1048, "is_greedy": true, "logits_per_token": -1.1533620357513428, "logits_per_char": -0.5766810178756714, "num_chars": 2}, {"sum_logits": -1.2595162391662598, "num_tokens": 1, "num_tokens_all": 1048, "is_greedy": false, "logits_per_token": -1.2595162391662598, "logits_per_char": -0.6297581195831299, "num_chars": 2}, {"sum_logits": -1.7758866548538208, "num_tokens": 1, "num_tokens_all": 1048, "is_greedy": false, "logits_per_token": -1.7758866548538208, "logits_per_char": -0.8879433274269104, "num_chars": 2}, {"sum_logits": -1.5012425184249878, "num_tokens": 1, "num_tokens_all": 1048, "is_greedy": false, "logits_per_token": -1.5012425184249878, "logits_per_char": -0.7506212592124939, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 95, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.757178783416748, "incorrect_loss_raw": 1.400303324063619, "correct_loss_per_char": 0.878589391708374, "incorrect_loss_per_char": 0.7001516620318095, "correct_loss_per_token": 1.757178783416748, "incorrect_loss_per_token": 1.400303324063619, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9314167499542236, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": true, "logits_per_token": -0.9314167499542236, "logits_per_char": -0.4657083749771118, "num_chars": 2}, {"sum_logits": -1.243695855140686, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": false, "logits_per_token": -1.243695855140686, "logits_per_char": -0.621847927570343, "num_chars": 2}, {"sum_logits": -2.0257973670959473, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": false, "logits_per_token": -2.0257973670959473, "logits_per_char": -1.0128986835479736, "num_chars": 2}, {"sum_logits": -1.757178783416748, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": false, "logits_per_token": -1.757178783416748, "logits_per_char": -0.878589391708374, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 96, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.723687767982483, "incorrect_loss_raw": 1.3117151657740276, "correct_loss_per_char": 0.8618438839912415, "incorrect_loss_per_char": 0.6558575828870138, "correct_loss_per_token": 1.723687767982483, "incorrect_loss_per_token": 1.3117151657740276, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2154698371887207, "num_tokens": 1, "num_tokens_all": 1010, "is_greedy": true, "logits_per_token": -1.2154698371887207, "logits_per_char": -0.6077349185943604, "num_chars": 2}, {"sum_logits": -1.2596784830093384, "num_tokens": 1, "num_tokens_all": 1010, "is_greedy": false, "logits_per_token": -1.2596784830093384, "logits_per_char": -0.6298392415046692, "num_chars": 2}, {"sum_logits": -1.723687767982483, "num_tokens": 1, "num_tokens_all": 1010, "is_greedy": false, "logits_per_token": -1.723687767982483, "logits_per_char": -0.8618438839912415, "num_chars": 2}, {"sum_logits": -1.4599971771240234, "num_tokens": 1, "num_tokens_all": 1010, "is_greedy": false, "logits_per_token": -1.4599971771240234, "logits_per_char": -0.7299985885620117, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 97, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3336248397827148, "incorrect_loss_raw": 1.4413117170333862, "correct_loss_per_char": 0.6668124198913574, "incorrect_loss_per_char": 0.7206558585166931, "correct_loss_per_token": 1.3336248397827148, "incorrect_loss_per_token": 1.4413117170333862, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1616398096084595, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": true, "logits_per_token": -1.1616398096084595, "logits_per_char": -0.5808199048042297, "num_chars": 2}, {"sum_logits": -1.4220808744430542, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": false, "logits_per_token": -1.4220808744430542, "logits_per_char": -0.7110404372215271, "num_chars": 2}, {"sum_logits": -1.740214467048645, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": false, "logits_per_token": -1.740214467048645, "logits_per_char": -0.8701072335243225, "num_chars": 2}, {"sum_logits": -1.3336248397827148, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": false, "logits_per_token": -1.3336248397827148, "logits_per_char": -0.6668124198913574, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 98, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6027952432632446, "incorrect_loss_raw": 1.3932147026062012, "correct_loss_per_char": 0.8013976216316223, "incorrect_loss_per_char": 0.6966073513031006, "correct_loss_per_token": 1.6027952432632446, "incorrect_loss_per_token": 1.3932147026062012, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.121665596961975, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": true, "logits_per_token": -1.121665596961975, "logits_per_char": -0.5608327984809875, "num_chars": 2}, {"sum_logits": -1.1669390201568604, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.1669390201568604, "logits_per_char": -0.5834695100784302, "num_chars": 2}, {"sum_logits": -1.891039490699768, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.891039490699768, "logits_per_char": -0.945519745349884, "num_chars": 2}, {"sum_logits": -1.6027952432632446, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.6027952432632446, "logits_per_char": -0.8013976216316223, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 99, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5506623983383179, "incorrect_loss_raw": 1.4078171650568645, "correct_loss_per_char": 0.7753311991691589, "incorrect_loss_per_char": 0.7039085825284322, "correct_loss_per_token": 1.5506623983383179, "incorrect_loss_per_token": 1.4078171650568645, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.174359679222107, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -1.174359679222107, "logits_per_char": -0.5871798396110535, "num_chars": 2}, {"sum_logits": -1.1303974390029907, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": true, "logits_per_token": -1.1303974390029907, "logits_per_char": -0.5651987195014954, "num_chars": 2}, {"sum_logits": -1.9186943769454956, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -1.9186943769454956, "logits_per_char": -0.9593471884727478, "num_chars": 2}, {"sum_logits": -1.5506623983383179, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -1.5506623983383179, "logits_per_char": -0.7753311991691589, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 100, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2553468942642212, "incorrect_loss_raw": 1.49445374806722, "correct_loss_per_char": 0.6276734471321106, "incorrect_loss_per_char": 0.74722687403361, "correct_loss_per_token": 1.2553468942642212, "incorrect_loss_per_token": 1.49445374806722, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1765568256378174, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": true, "logits_per_token": -1.1765568256378174, "logits_per_char": -0.5882784128189087, "num_chars": 2}, {"sum_logits": -1.2553468942642212, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.2553468942642212, "logits_per_char": -0.6276734471321106, "num_chars": 2}, {"sum_logits": -1.930762529373169, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.930762529373169, "logits_per_char": -0.9653812646865845, "num_chars": 2}, {"sum_logits": -1.3760418891906738, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.3760418891906738, "logits_per_char": -0.6880209445953369, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 101, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0516976118087769, "incorrect_loss_raw": 1.599512775739034, "correct_loss_per_char": 0.5258488059043884, "incorrect_loss_per_char": 0.799756387869517, "correct_loss_per_token": 1.0516976118087769, "incorrect_loss_per_token": 1.599512775739034, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0516976118087769, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": true, "logits_per_token": -1.0516976118087769, "logits_per_char": -0.5258488059043884, "num_chars": 2}, {"sum_logits": -1.1654272079467773, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.1654272079467773, "logits_per_char": -0.5827136039733887, "num_chars": 2}, {"sum_logits": -1.9768881797790527, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.9768881797790527, "logits_per_char": -0.9884440898895264, "num_chars": 2}, {"sum_logits": -1.656222939491272, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.656222939491272, "logits_per_char": -0.828111469745636, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 102, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.65602445602417, "incorrect_loss_raw": 1.3284662167231243, "correct_loss_per_char": 0.828012228012085, "incorrect_loss_per_char": 0.6642331083615621, "correct_loss_per_token": 1.65602445602417, "incorrect_loss_per_token": 1.3284662167231243, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1437592506408691, "num_tokens": 1, "num_tokens_all": 941, "is_greedy": true, "logits_per_token": -1.1437592506408691, "logits_per_char": -0.5718796253204346, "num_chars": 2}, {"sum_logits": -1.4193212985992432, "num_tokens": 1, "num_tokens_all": 941, "is_greedy": false, "logits_per_token": -1.4193212985992432, "logits_per_char": -0.7096606492996216, "num_chars": 2}, {"sum_logits": -1.65602445602417, "num_tokens": 1, "num_tokens_all": 941, "is_greedy": false, "logits_per_token": -1.65602445602417, "logits_per_char": -0.828012228012085, "num_chars": 2}, {"sum_logits": -1.4223181009292603, "num_tokens": 1, "num_tokens_all": 941, "is_greedy": false, "logits_per_token": -1.4223181009292603, "logits_per_char": -0.7111590504646301, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 103, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6692845821380615, "incorrect_loss_raw": 1.400137186050415, "correct_loss_per_char": 0.8346422910690308, "incorrect_loss_per_char": 0.7000685930252075, "correct_loss_per_token": 1.6692845821380615, "incorrect_loss_per_token": 1.400137186050415, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9721946716308594, "num_tokens": 1, "num_tokens_all": 1036, "is_greedy": true, "logits_per_token": -0.9721946716308594, "logits_per_char": -0.4860973358154297, "num_chars": 2}, {"sum_logits": -1.2467643022537231, "num_tokens": 1, "num_tokens_all": 1036, "is_greedy": false, "logits_per_token": -1.2467643022537231, "logits_per_char": -0.6233821511268616, "num_chars": 2}, {"sum_logits": -1.9814525842666626, "num_tokens": 1, "num_tokens_all": 1036, "is_greedy": false, "logits_per_token": -1.9814525842666626, "logits_per_char": -0.9907262921333313, "num_chars": 2}, {"sum_logits": -1.6692845821380615, "num_tokens": 1, "num_tokens_all": 1036, "is_greedy": false, "logits_per_token": -1.6692845821380615, "logits_per_char": -0.8346422910690308, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 104, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7667735815048218, "incorrect_loss_raw": 1.3545835415522258, "correct_loss_per_char": 0.8833867907524109, "incorrect_loss_per_char": 0.6772917707761129, "correct_loss_per_token": 1.7667735815048218, "incorrect_loss_per_token": 1.3545835415522258, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0748950242996216, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": true, "logits_per_token": -1.0748950242996216, "logits_per_char": -0.5374475121498108, "num_chars": 2}, {"sum_logits": -1.1259851455688477, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": false, "logits_per_token": -1.1259851455688477, "logits_per_char": -0.5629925727844238, "num_chars": 2}, {"sum_logits": -1.862870454788208, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": false, "logits_per_token": -1.862870454788208, "logits_per_char": -0.931435227394104, "num_chars": 2}, {"sum_logits": -1.7667735815048218, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": false, "logits_per_token": -1.7667735815048218, "logits_per_char": -0.8833867907524109, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 105, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.585842251777649, "incorrect_loss_raw": 1.418111006418864, "correct_loss_per_char": 0.7929211258888245, "incorrect_loss_per_char": 0.709055503209432, "correct_loss_per_token": 1.585842251777649, "incorrect_loss_per_token": 1.418111006418864, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0783635377883911, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": true, "logits_per_token": -1.0783635377883911, "logits_per_char": -0.5391817688941956, "num_chars": 2}, {"sum_logits": -1.1664899587631226, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": false, "logits_per_token": -1.1664899587631226, "logits_per_char": -0.5832449793815613, "num_chars": 2}, {"sum_logits": -2.009479522705078, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": false, "logits_per_token": -2.009479522705078, "logits_per_char": -1.004739761352539, "num_chars": 2}, {"sum_logits": -1.585842251777649, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": false, "logits_per_token": -1.585842251777649, "logits_per_char": -0.7929211258888245, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 106, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7118737697601318, "incorrect_loss_raw": 1.3138702313105266, "correct_loss_per_char": 0.8559368848800659, "incorrect_loss_per_char": 0.6569351156552633, "correct_loss_per_token": 1.7118737697601318, "incorrect_loss_per_token": 1.3138702313105266, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3253123760223389, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": false, "logits_per_token": -1.3253123760223389, "logits_per_char": -0.6626561880111694, "num_chars": 2}, {"sum_logits": -1.157899022102356, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": true, "logits_per_token": -1.157899022102356, "logits_per_char": -0.578949511051178, "num_chars": 2}, {"sum_logits": -1.7118737697601318, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": false, "logits_per_token": -1.7118737697601318, "logits_per_char": -0.8559368848800659, "num_chars": 2}, {"sum_logits": -1.4583992958068848, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": false, "logits_per_token": -1.4583992958068848, "logits_per_char": -0.7291996479034424, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 107, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.968984842300415, "incorrect_loss_raw": 1.2872065703074138, "correct_loss_per_char": 0.9844924211502075, "incorrect_loss_per_char": 0.6436032851537069, "correct_loss_per_token": 1.968984842300415, "incorrect_loss_per_token": 1.2872065703074138, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0275824069976807, "num_tokens": 1, "num_tokens_all": 1059, "is_greedy": true, "logits_per_token": -1.0275824069976807, "logits_per_char": -0.5137912034988403, "num_chars": 2}, {"sum_logits": -1.2277815341949463, "num_tokens": 1, "num_tokens_all": 1059, "is_greedy": false, "logits_per_token": -1.2277815341949463, "logits_per_char": -0.6138907670974731, "num_chars": 2}, {"sum_logits": -1.968984842300415, "num_tokens": 1, "num_tokens_all": 1059, "is_greedy": false, "logits_per_token": -1.968984842300415, "logits_per_char": -0.9844924211502075, "num_chars": 2}, {"sum_logits": -1.6062557697296143, "num_tokens": 1, "num_tokens_all": 1059, "is_greedy": false, "logits_per_token": -1.6062557697296143, "logits_per_char": -0.8031278848648071, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 108, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.613152265548706, "incorrect_loss_raw": 1.339435299237569, "correct_loss_per_char": 0.806576132774353, "incorrect_loss_per_char": 0.6697176496187845, "correct_loss_per_token": 1.613152265548706, "incorrect_loss_per_token": 1.339435299237569, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.291985273361206, "num_tokens": 1, "num_tokens_all": 987, "is_greedy": false, "logits_per_token": -1.291985273361206, "logits_per_char": -0.645992636680603, "num_chars": 2}, {"sum_logits": -1.2035013437271118, "num_tokens": 1, "num_tokens_all": 987, "is_greedy": true, "logits_per_token": -1.2035013437271118, "logits_per_char": -0.6017506718635559, "num_chars": 2}, {"sum_logits": -1.613152265548706, "num_tokens": 1, "num_tokens_all": 987, "is_greedy": false, "logits_per_token": -1.613152265548706, "logits_per_char": -0.806576132774353, "num_chars": 2}, {"sum_logits": -1.5228192806243896, "num_tokens": 1, "num_tokens_all": 987, "is_greedy": false, "logits_per_token": -1.5228192806243896, "logits_per_char": -0.7614096403121948, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 109, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2673343420028687, "incorrect_loss_raw": 1.4512006441752117, "correct_loss_per_char": 0.6336671710014343, "incorrect_loss_per_char": 0.7256003220876058, "correct_loss_per_token": 1.2673343420028687, "incorrect_loss_per_token": 1.4512006441752117, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2673343420028687, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": false, "logits_per_token": -1.2673343420028687, "logits_per_char": -0.6336671710014343, "num_chars": 2}, {"sum_logits": -1.2594839334487915, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": true, "logits_per_token": -1.2594839334487915, "logits_per_char": -0.6297419667243958, "num_chars": 2}, {"sum_logits": -1.5937275886535645, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": false, "logits_per_token": -1.5937275886535645, "logits_per_char": -0.7968637943267822, "num_chars": 2}, {"sum_logits": -1.5003904104232788, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": false, "logits_per_token": -1.5003904104232788, "logits_per_char": -0.7501952052116394, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 110, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6157886981964111, "incorrect_loss_raw": 1.3843266169230144, "correct_loss_per_char": 0.8078943490982056, "incorrect_loss_per_char": 0.6921633084615072, "correct_loss_per_token": 1.6157886981964111, "incorrect_loss_per_token": 1.3843266169230144, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1237170696258545, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": true, "logits_per_token": -1.1237170696258545, "logits_per_char": -0.5618585348129272, "num_chars": 2}, {"sum_logits": -1.1760739088058472, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.1760739088058472, "logits_per_char": -0.5880369544029236, "num_chars": 2}, {"sum_logits": -1.8531888723373413, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.8531888723373413, "logits_per_char": -0.9265944361686707, "num_chars": 2}, {"sum_logits": -1.6157886981964111, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.6157886981964111, "logits_per_char": -0.8078943490982056, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 111, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0384546518325806, "incorrect_loss_raw": 1.5907532374064128, "correct_loss_per_char": 0.5192273259162903, "incorrect_loss_per_char": 0.7953766187032064, "correct_loss_per_token": 1.0384546518325806, "incorrect_loss_per_token": 1.5907532374064128, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0384546518325806, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": true, "logits_per_token": -1.0384546518325806, "logits_per_char": -0.5192273259162903, "num_chars": 2}, {"sum_logits": -1.2228281497955322, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": false, "logits_per_token": -1.2228281497955322, "logits_per_char": -0.6114140748977661, "num_chars": 2}, {"sum_logits": -1.944819688796997, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": false, "logits_per_token": -1.944819688796997, "logits_per_char": -0.9724098443984985, "num_chars": 2}, {"sum_logits": -1.604611873626709, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": false, "logits_per_token": -1.604611873626709, "logits_per_char": -0.8023059368133545, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 112, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4668903350830078, "incorrect_loss_raw": 1.3917600711186726, "correct_loss_per_char": 0.7334451675415039, "incorrect_loss_per_char": 0.6958800355593363, "correct_loss_per_token": 1.4668903350830078, "incorrect_loss_per_token": 1.3917600711186726, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.176949143409729, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": true, "logits_per_token": -1.176949143409729, "logits_per_char": -0.5884745717048645, "num_chars": 2}, {"sum_logits": -1.3402167558670044, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": false, "logits_per_token": -1.3402167558670044, "logits_per_char": -0.6701083779335022, "num_chars": 2}, {"sum_logits": -1.6581143140792847, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": false, "logits_per_token": -1.6581143140792847, "logits_per_char": -0.8290571570396423, "num_chars": 2}, {"sum_logits": -1.4668903350830078, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": false, "logits_per_token": -1.4668903350830078, "logits_per_char": -0.7334451675415039, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 113, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0080138444900513, "incorrect_loss_raw": 1.6147820949554443, "correct_loss_per_char": 0.5040069222450256, "incorrect_loss_per_char": 0.8073910474777222, "correct_loss_per_token": 1.0080138444900513, "incorrect_loss_per_token": 1.6147820949554443, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0080138444900513, "num_tokens": 1, "num_tokens_all": 1074, "is_greedy": true, "logits_per_token": -1.0080138444900513, "logits_per_char": -0.5040069222450256, "num_chars": 2}, {"sum_logits": -1.2330642938613892, "num_tokens": 1, "num_tokens_all": 1074, "is_greedy": false, "logits_per_token": -1.2330642938613892, "logits_per_char": -0.6165321469306946, "num_chars": 2}, {"sum_logits": -2.00358247756958, "num_tokens": 1, "num_tokens_all": 1074, "is_greedy": false, "logits_per_token": -2.00358247756958, "logits_per_char": -1.00179123878479, "num_chars": 2}, {"sum_logits": -1.6076995134353638, "num_tokens": 1, "num_tokens_all": 1074, "is_greedy": false, "logits_per_token": -1.6076995134353638, "logits_per_char": -0.8038497567176819, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 114, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9813182353973389, "incorrect_loss_raw": 1.6438794136047363, "correct_loss_per_char": 0.49065911769866943, "incorrect_loss_per_char": 0.8219397068023682, "correct_loss_per_token": 0.9813182353973389, "incorrect_loss_per_token": 1.6438794136047363, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9813182353973389, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": true, "logits_per_token": -0.9813182353973389, "logits_per_char": -0.49065911769866943, "num_chars": 2}, {"sum_logits": -1.2066469192504883, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.2066469192504883, "logits_per_char": -0.6033234596252441, "num_chars": 2}, {"sum_logits": -2.080082893371582, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -2.080082893371582, "logits_per_char": -1.040041446685791, "num_chars": 2}, {"sum_logits": -1.6449084281921387, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.6449084281921387, "logits_per_char": -0.8224542140960693, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 115, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4922659397125244, "incorrect_loss_raw": 1.3854986031850178, "correct_loss_per_char": 0.7461329698562622, "incorrect_loss_per_char": 0.6927493015925089, "correct_loss_per_token": 1.4922659397125244, "incorrect_loss_per_token": 1.3854986031850178, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.090980887413025, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": true, "logits_per_token": -1.090980887413025, "logits_per_char": -0.5454904437065125, "num_chars": 2}, {"sum_logits": -1.4922659397125244, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -1.4922659397125244, "logits_per_char": -0.7461329698562622, "num_chars": 2}, {"sum_logits": -1.5740585327148438, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -1.5740585327148438, "logits_per_char": -0.7870292663574219, "num_chars": 2}, {"sum_logits": -1.491456389427185, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -1.491456389427185, "logits_per_char": -0.7457281947135925, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 116, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3856570720672607, "incorrect_loss_raw": 1.42922043800354, "correct_loss_per_char": 0.6928285360336304, "incorrect_loss_per_char": 0.71461021900177, "correct_loss_per_token": 1.3856570720672607, "incorrect_loss_per_token": 1.42922043800354, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1255465745925903, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": true, "logits_per_token": -1.1255465745925903, "logits_per_char": -0.5627732872962952, "num_chars": 2}, {"sum_logits": -1.3856570720672607, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": false, "logits_per_token": -1.3856570720672607, "logits_per_char": -0.6928285360336304, "num_chars": 2}, {"sum_logits": -1.7550115585327148, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": false, "logits_per_token": -1.7550115585327148, "logits_per_char": -0.8775057792663574, "num_chars": 2}, {"sum_logits": -1.407103180885315, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": false, "logits_per_token": -1.407103180885315, "logits_per_char": -0.7035515904426575, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 117, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.121971607208252, "incorrect_loss_raw": 1.5504441658655803, "correct_loss_per_char": 0.560985803604126, "incorrect_loss_per_char": 0.7752220829327902, "correct_loss_per_token": 1.121971607208252, "incorrect_loss_per_token": 1.5504441658655803, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.121971607208252, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": true, "logits_per_token": -1.121971607208252, "logits_per_char": -0.560985803604126, "num_chars": 2}, {"sum_logits": -1.177258849143982, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.177258849143982, "logits_per_char": -0.588629424571991, "num_chars": 2}, {"sum_logits": -1.9217551946640015, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.9217551946640015, "logits_per_char": -0.9608775973320007, "num_chars": 2}, {"sum_logits": -1.5523184537887573, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.5523184537887573, "logits_per_char": -0.7761592268943787, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 118, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1537175178527832, "incorrect_loss_raw": 1.5526338418324788, "correct_loss_per_char": 0.5768587589263916, "incorrect_loss_per_char": 0.7763169209162394, "correct_loss_per_token": 1.1537175178527832, "incorrect_loss_per_token": 1.5526338418324788, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0855975151062012, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": true, "logits_per_token": -1.0855975151062012, "logits_per_char": -0.5427987575531006, "num_chars": 2}, {"sum_logits": -1.1537175178527832, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.1537175178527832, "logits_per_char": -0.5768587589263916, "num_chars": 2}, {"sum_logits": -1.9253499507904053, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.9253499507904053, "logits_per_char": -0.9626749753952026, "num_chars": 2}, {"sum_logits": -1.64695405960083, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.64695405960083, "logits_per_char": -0.823477029800415, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 119, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5018030405044556, "incorrect_loss_raw": 1.3971078793207805, "correct_loss_per_char": 0.7509015202522278, "incorrect_loss_per_char": 0.6985539396603903, "correct_loss_per_token": 1.5018030405044556, "incorrect_loss_per_token": 1.3971078793207805, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2189327478408813, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": false, "logits_per_token": -1.2189327478408813, "logits_per_char": -0.6094663739204407, "num_chars": 2}, {"sum_logits": -1.1854491233825684, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": true, "logits_per_token": -1.1854491233825684, "logits_per_char": -0.5927245616912842, "num_chars": 2}, {"sum_logits": -1.7869417667388916, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": false, "logits_per_token": -1.7869417667388916, "logits_per_char": -0.8934708833694458, "num_chars": 2}, {"sum_logits": -1.5018030405044556, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": false, "logits_per_token": -1.5018030405044556, "logits_per_char": -0.7509015202522278, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 120, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9129464626312256, "incorrect_loss_raw": 1.2868771155675252, "correct_loss_per_char": 0.9564732313156128, "incorrect_loss_per_char": 0.6434385577837626, "correct_loss_per_token": 1.9129464626312256, "incorrect_loss_per_token": 1.2868771155675252, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.121539831161499, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": true, "logits_per_token": -1.121539831161499, "logits_per_char": -0.5607699155807495, "num_chars": 2}, {"sum_logits": -1.167786955833435, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.167786955833435, "logits_per_char": -0.5838934779167175, "num_chars": 2}, {"sum_logits": -1.9129464626312256, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.9129464626312256, "logits_per_char": -0.9564732313156128, "num_chars": 2}, {"sum_logits": -1.5713045597076416, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.5713045597076416, "logits_per_char": -0.7856522798538208, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 121, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0654351711273193, "incorrect_loss_raw": 1.5366192261377971, "correct_loss_per_char": 0.5327175855636597, "incorrect_loss_per_char": 0.7683096130688986, "correct_loss_per_token": 1.0654351711273193, "incorrect_loss_per_token": 1.5366192261377971, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0654351711273193, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": true, "logits_per_token": -1.0654351711273193, "logits_per_char": -0.5327175855636597, "num_chars": 2}, {"sum_logits": -1.4243844747543335, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -1.4243844747543335, "logits_per_char": -0.7121922373771667, "num_chars": 2}, {"sum_logits": -1.6842066049575806, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -1.6842066049575806, "logits_per_char": -0.8421033024787903, "num_chars": 2}, {"sum_logits": -1.501266598701477, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -1.501266598701477, "logits_per_char": -0.7506332993507385, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 122, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8930879831314087, "incorrect_loss_raw": 1.3601315816243489, "correct_loss_per_char": 0.9465439915657043, "incorrect_loss_per_char": 0.6800657908121744, "correct_loss_per_token": 1.8930879831314087, "incorrect_loss_per_token": 1.3601315816243489, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8808161020278931, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": true, "logits_per_token": -0.8808161020278931, "logits_per_char": -0.44040805101394653, "num_chars": 2}, {"sum_logits": -1.3083150386810303, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": false, "logits_per_token": -1.3083150386810303, "logits_per_char": -0.6541575193405151, "num_chars": 2}, {"sum_logits": -1.8930879831314087, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": false, "logits_per_token": -1.8930879831314087, "logits_per_char": -0.9465439915657043, "num_chars": 2}, {"sum_logits": -1.8912636041641235, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": false, "logits_per_token": -1.8912636041641235, "logits_per_char": -0.9456318020820618, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 123, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.192358374595642, "incorrect_loss_raw": 1.5118661721547444, "correct_loss_per_char": 0.596179187297821, "incorrect_loss_per_char": 0.7559330860773722, "correct_loss_per_token": 1.192358374595642, "incorrect_loss_per_token": 1.5118661721547444, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.192358374595642, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": true, "logits_per_token": -1.192358374595642, "logits_per_char": -0.596179187297821, "num_chars": 2}, {"sum_logits": -1.2065277099609375, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -1.2065277099609375, "logits_per_char": -0.6032638549804688, "num_chars": 2}, {"sum_logits": -1.848181962966919, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -1.848181962966919, "logits_per_char": -0.9240909814834595, "num_chars": 2}, {"sum_logits": -1.480888843536377, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -1.480888843536377, "logits_per_char": -0.7404444217681885, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 124, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3386249542236328, "incorrect_loss_raw": 1.4426207939783733, "correct_loss_per_char": 0.6693124771118164, "incorrect_loss_per_char": 0.7213103969891866, "correct_loss_per_token": 1.3386249542236328, "incorrect_loss_per_token": 1.4426207939783733, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1369119882583618, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": true, "logits_per_token": -1.1369119882583618, "logits_per_char": -0.5684559941291809, "num_chars": 2}, {"sum_logits": -1.3386249542236328, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.3386249542236328, "logits_per_char": -0.6693124771118164, "num_chars": 2}, {"sum_logits": -1.7147223949432373, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.7147223949432373, "logits_per_char": -0.8573611974716187, "num_chars": 2}, {"sum_logits": -1.4762279987335205, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.4762279987335205, "logits_per_char": -0.7381139993667603, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 125, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0632908344268799, "incorrect_loss_raw": 1.5724085966746013, "correct_loss_per_char": 0.5316454172134399, "incorrect_loss_per_char": 0.7862042983373007, "correct_loss_per_token": 1.0632908344268799, "incorrect_loss_per_token": 1.5724085966746013, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0632908344268799, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": true, "logits_per_token": -1.0632908344268799, "logits_per_char": -0.5316454172134399, "num_chars": 2}, {"sum_logits": -1.2545461654663086, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -1.2545461654663086, "logits_per_char": -0.6272730827331543, "num_chars": 2}, {"sum_logits": -1.9277633428573608, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -1.9277633428573608, "logits_per_char": -0.9638816714286804, "num_chars": 2}, {"sum_logits": -1.5349162817001343, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -1.5349162817001343, "logits_per_char": -0.7674581408500671, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 126, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0814409255981445, "incorrect_loss_raw": 1.5742484331130981, "correct_loss_per_char": 0.5407204627990723, "incorrect_loss_per_char": 0.7871242165565491, "correct_loss_per_token": 1.0814409255981445, "incorrect_loss_per_token": 1.5742484331130981, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0814409255981445, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": true, "logits_per_token": -1.0814409255981445, "logits_per_char": -0.5407204627990723, "num_chars": 2}, {"sum_logits": -1.2391934394836426, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.2391934394836426, "logits_per_char": -0.6195967197418213, "num_chars": 2}, {"sum_logits": -1.9880532026290894, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.9880532026290894, "logits_per_char": -0.9940266013145447, "num_chars": 2}, {"sum_logits": -1.4954986572265625, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.4954986572265625, "logits_per_char": -0.7477493286132812, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 127, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5738916397094727, "incorrect_loss_raw": 1.4029104312260945, "correct_loss_per_char": 0.7869458198547363, "incorrect_loss_per_char": 0.7014552156130472, "correct_loss_per_token": 1.5738916397094727, "incorrect_loss_per_token": 1.4029104312260945, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0393468141555786, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": true, "logits_per_token": -1.0393468141555786, "logits_per_char": -0.5196734070777893, "num_chars": 2}, {"sum_logits": -1.276390552520752, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.276390552520752, "logits_per_char": -0.638195276260376, "num_chars": 2}, {"sum_logits": -1.8929939270019531, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.8929939270019531, "logits_per_char": -0.9464969635009766, "num_chars": 2}, {"sum_logits": -1.5738916397094727, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.5738916397094727, "logits_per_char": -0.7869458198547363, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 128, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0382404327392578, "incorrect_loss_raw": 1.5511136849721272, "correct_loss_per_char": 0.5191202163696289, "incorrect_loss_per_char": 0.7755568424860636, "correct_loss_per_token": 1.0382404327392578, "incorrect_loss_per_token": 1.5511136849721272, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0382404327392578, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": true, "logits_per_token": -1.0382404327392578, "logits_per_char": -0.5191202163696289, "num_chars": 2}, {"sum_logits": -1.4434627294540405, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": false, "logits_per_token": -1.4434627294540405, "logits_per_char": -0.7217313647270203, "num_chars": 2}, {"sum_logits": -1.6502699851989746, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": false, "logits_per_token": -1.6502699851989746, "logits_per_char": -0.8251349925994873, "num_chars": 2}, {"sum_logits": -1.5596083402633667, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": false, "logits_per_token": -1.5596083402633667, "logits_per_char": -0.7798041701316833, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 129, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.002140998840332, "incorrect_loss_raw": 1.6250821352005005, "correct_loss_per_char": 0.501070499420166, "incorrect_loss_per_char": 0.8125410676002502, "correct_loss_per_token": 1.002140998840332, "incorrect_loss_per_token": 1.6250821352005005, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.002140998840332, "num_tokens": 1, "num_tokens_all": 1053, "is_greedy": true, "logits_per_token": -1.002140998840332, "logits_per_char": -0.501070499420166, "num_chars": 2}, {"sum_logits": -1.1839066743850708, "num_tokens": 1, "num_tokens_all": 1053, "is_greedy": false, "logits_per_token": -1.1839066743850708, "logits_per_char": -0.5919533371925354, "num_chars": 2}, {"sum_logits": -1.9493169784545898, "num_tokens": 1, "num_tokens_all": 1053, "is_greedy": false, "logits_per_token": -1.9493169784545898, "logits_per_char": -0.9746584892272949, "num_chars": 2}, {"sum_logits": -1.7420227527618408, "num_tokens": 1, "num_tokens_all": 1053, "is_greedy": false, "logits_per_token": -1.7420227527618408, "logits_per_char": -0.8710113763809204, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 130, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0651531219482422, "incorrect_loss_raw": 1.5829127232233684, "correct_loss_per_char": 0.5325765609741211, "incorrect_loss_per_char": 0.7914563616116842, "correct_loss_per_token": 1.0651531219482422, "incorrect_loss_per_token": 1.5829127232233684, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0651531219482422, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": true, "logits_per_token": -1.0651531219482422, "logits_per_char": -0.5325765609741211, "num_chars": 2}, {"sum_logits": -1.1995923519134521, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.1995923519134521, "logits_per_char": -0.5997961759567261, "num_chars": 2}, {"sum_logits": -1.9549227952957153, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.9549227952957153, "logits_per_char": -0.9774613976478577, "num_chars": 2}, {"sum_logits": -1.5942230224609375, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.5942230224609375, "logits_per_char": -0.7971115112304688, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 131, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9793058633804321, "incorrect_loss_raw": 1.5882453123728435, "correct_loss_per_char": 0.48965293169021606, "incorrect_loss_per_char": 0.7941226561864217, "correct_loss_per_token": 0.9793058633804321, "incorrect_loss_per_token": 1.5882453123728435, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9793058633804321, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": true, "logits_per_token": -0.9793058633804321, "logits_per_char": -0.48965293169021606, "num_chars": 2}, {"sum_logits": -1.5028012990951538, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": false, "logits_per_token": -1.5028012990951538, "logits_per_char": -0.7514006495475769, "num_chars": 2}, {"sum_logits": -1.7642217874526978, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": false, "logits_per_token": -1.7642217874526978, "logits_per_char": -0.8821108937263489, "num_chars": 2}, {"sum_logits": -1.4977128505706787, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": false, "logits_per_token": -1.4977128505706787, "logits_per_char": -0.7488564252853394, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 132, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0779756307601929, "incorrect_loss_raw": 1.5590898593266804, "correct_loss_per_char": 0.5389878153800964, "incorrect_loss_per_char": 0.7795449296633402, "correct_loss_per_token": 1.0779756307601929, "incorrect_loss_per_token": 1.5590898593266804, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0779756307601929, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": true, "logits_per_token": -1.0779756307601929, "logits_per_char": -0.5389878153800964, "num_chars": 2}, {"sum_logits": -1.2816507816314697, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.2816507816314697, "logits_per_char": -0.6408253908157349, "num_chars": 2}, {"sum_logits": -1.8868046998977661, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.8868046998977661, "logits_per_char": -0.9434023499488831, "num_chars": 2}, {"sum_logits": -1.5088140964508057, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.5088140964508057, "logits_per_char": -0.7544070482254028, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 133, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.951417088508606, "incorrect_loss_raw": 1.6529898246129353, "correct_loss_per_char": 0.475708544254303, "incorrect_loss_per_char": 0.8264949123064677, "correct_loss_per_token": 0.951417088508606, "incorrect_loss_per_token": 1.6529898246129353, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.951417088508606, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": true, "logits_per_token": -0.951417088508606, "logits_per_char": -0.475708544254303, "num_chars": 2}, {"sum_logits": -1.2409982681274414, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.2409982681274414, "logits_per_char": -0.6204991340637207, "num_chars": 2}, {"sum_logits": -1.9717756509780884, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.9717756509780884, "logits_per_char": -0.9858878254890442, "num_chars": 2}, {"sum_logits": -1.7461955547332764, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.7461955547332764, "logits_per_char": -0.8730977773666382, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 134, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2227065563201904, "incorrect_loss_raw": 1.4784795045852661, "correct_loss_per_char": 0.6113532781600952, "incorrect_loss_per_char": 0.7392397522926331, "correct_loss_per_token": 1.2227065563201904, "incorrect_loss_per_token": 1.4784795045852661, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2748874425888062, "num_tokens": 1, "num_tokens_all": 994, "is_greedy": false, "logits_per_token": -1.2748874425888062, "logits_per_char": -0.6374437212944031, "num_chars": 2}, {"sum_logits": -1.2227065563201904, "num_tokens": 1, "num_tokens_all": 994, "is_greedy": true, "logits_per_token": -1.2227065563201904, "logits_per_char": -0.6113532781600952, "num_chars": 2}, {"sum_logits": -1.754251480102539, "num_tokens": 1, "num_tokens_all": 994, "is_greedy": false, "logits_per_token": -1.754251480102539, "logits_per_char": -0.8771257400512695, "num_chars": 2}, {"sum_logits": -1.4062995910644531, "num_tokens": 1, "num_tokens_all": 994, "is_greedy": false, "logits_per_token": -1.4062995910644531, "logits_per_char": -0.7031497955322266, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 135, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9291651248931885, "incorrect_loss_raw": 1.2731505235036213, "correct_loss_per_char": 0.9645825624465942, "incorrect_loss_per_char": 0.6365752617518107, "correct_loss_per_token": 1.9291651248931885, "incorrect_loss_per_token": 1.2731505235036213, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.119957685470581, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": true, "logits_per_token": -1.119957685470581, "logits_per_char": -0.5599788427352905, "num_chars": 2}, {"sum_logits": -1.3522429466247559, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.3522429466247559, "logits_per_char": -0.6761214733123779, "num_chars": 2}, {"sum_logits": -1.9291651248931885, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.9291651248931885, "logits_per_char": -0.9645825624465942, "num_chars": 2}, {"sum_logits": -1.3472509384155273, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.3472509384155273, "logits_per_char": -0.6736254692077637, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 136, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8245912790298462, "incorrect_loss_raw": 1.3062504927317302, "correct_loss_per_char": 0.9122956395149231, "incorrect_loss_per_char": 0.6531252463658651, "correct_loss_per_token": 1.8245912790298462, "incorrect_loss_per_token": 1.3062504927317302, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0662109851837158, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": true, "logits_per_token": -1.0662109851837158, "logits_per_char": -0.5331054925918579, "num_chars": 2}, {"sum_logits": -1.252981424331665, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": false, "logits_per_token": -1.252981424331665, "logits_per_char": -0.6264907121658325, "num_chars": 2}, {"sum_logits": -1.8245912790298462, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": false, "logits_per_token": -1.8245912790298462, "logits_per_char": -0.9122956395149231, "num_chars": 2}, {"sum_logits": -1.5995590686798096, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": false, "logits_per_token": -1.5995590686798096, "logits_per_char": -0.7997795343399048, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 137, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3428714275360107, "incorrect_loss_raw": 1.4955972234408061, "correct_loss_per_char": 0.6714357137680054, "incorrect_loss_per_char": 0.7477986117204031, "correct_loss_per_token": 1.3428714275360107, "incorrect_loss_per_token": 1.4955972234408061, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9713407158851624, "num_tokens": 1, "num_tokens_all": 1050, "is_greedy": true, "logits_per_token": -0.9713407158851624, "logits_per_char": -0.4856703579425812, "num_chars": 2}, {"sum_logits": -1.3428714275360107, "num_tokens": 1, "num_tokens_all": 1050, "is_greedy": false, "logits_per_token": -1.3428714275360107, "logits_per_char": -0.6714357137680054, "num_chars": 2}, {"sum_logits": -1.953674077987671, "num_tokens": 1, "num_tokens_all": 1050, "is_greedy": false, "logits_per_token": -1.953674077987671, "logits_per_char": -0.9768370389938354, "num_chars": 2}, {"sum_logits": -1.561776876449585, "num_tokens": 1, "num_tokens_all": 1050, "is_greedy": false, "logits_per_token": -1.561776876449585, "logits_per_char": -0.7808884382247925, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 138, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8140064477920532, "incorrect_loss_raw": 1.3495745261510212, "correct_loss_per_char": 0.9070032238960266, "incorrect_loss_per_char": 0.6747872630755106, "correct_loss_per_token": 1.8140064477920532, "incorrect_loss_per_token": 1.3495745261510212, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0851377248764038, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": true, "logits_per_token": -1.0851377248764038, "logits_per_char": -0.5425688624382019, "num_chars": 2}, {"sum_logits": -1.0867700576782227, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": false, "logits_per_token": -1.0867700576782227, "logits_per_char": -0.5433850288391113, "num_chars": 2}, {"sum_logits": -1.8768157958984375, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": false, "logits_per_token": -1.8768157958984375, "logits_per_char": -0.9384078979492188, "num_chars": 2}, {"sum_logits": -1.8140064477920532, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": false, "logits_per_token": -1.8140064477920532, "logits_per_char": -0.9070032238960266, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 139, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9318090677261353, "incorrect_loss_raw": 1.6780608495076497, "correct_loss_per_char": 0.4659045338630676, "incorrect_loss_per_char": 0.8390304247538248, "correct_loss_per_token": 0.9318090677261353, "incorrect_loss_per_token": 1.6780608495076497, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9318090677261353, "num_tokens": 1, "num_tokens_all": 1117, "is_greedy": true, "logits_per_token": -0.9318090677261353, "logits_per_char": -0.4659045338630676, "num_chars": 2}, {"sum_logits": -1.2232515811920166, "num_tokens": 1, "num_tokens_all": 1117, "is_greedy": false, "logits_per_token": -1.2232515811920166, "logits_per_char": -0.6116257905960083, "num_chars": 2}, {"sum_logits": -2.0937929153442383, "num_tokens": 1, "num_tokens_all": 1117, "is_greedy": false, "logits_per_token": -2.0937929153442383, "logits_per_char": -1.0468964576721191, "num_chars": 2}, {"sum_logits": -1.7171380519866943, "num_tokens": 1, "num_tokens_all": 1117, "is_greedy": false, "logits_per_token": -1.7171380519866943, "logits_per_char": -0.8585690259933472, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 140, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.109712839126587, "incorrect_loss_raw": 1.7008736928304036, "correct_loss_per_char": 0.5548564195632935, "incorrect_loss_per_char": 0.8504368464152018, "correct_loss_per_token": 1.109712839126587, "incorrect_loss_per_token": 1.7008736928304036, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8902928829193115, "num_tokens": 1, "num_tokens_all": 1117, "is_greedy": true, "logits_per_token": -0.8902928829193115, "logits_per_char": -0.44514644145965576, "num_chars": 2}, {"sum_logits": -1.109712839126587, "num_tokens": 1, "num_tokens_all": 1117, "is_greedy": false, "logits_per_token": -1.109712839126587, "logits_per_char": -0.5548564195632935, "num_chars": 2}, {"sum_logits": -2.204378366470337, "num_tokens": 1, "num_tokens_all": 1117, "is_greedy": false, "logits_per_token": -2.204378366470337, "logits_per_char": -1.1021891832351685, "num_chars": 2}, {"sum_logits": -2.0079498291015625, "num_tokens": 1, "num_tokens_all": 1117, "is_greedy": false, "logits_per_token": -2.0079498291015625, "logits_per_char": -1.0039749145507812, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 141, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2047951221466064, "incorrect_loss_raw": 1.5331969658533733, "correct_loss_per_char": 0.6023975610733032, "incorrect_loss_per_char": 0.7665984829266866, "correct_loss_per_token": 1.2047951221466064, "incorrect_loss_per_token": 1.5331969658533733, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.082995057106018, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": true, "logits_per_token": -1.082995057106018, "logits_per_char": -0.541497528553009, "num_chars": 2}, {"sum_logits": -1.2047951221466064, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -1.2047951221466064, "logits_per_char": -0.6023975610733032, "num_chars": 2}, {"sum_logits": -1.9731732606887817, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -1.9731732606887817, "logits_per_char": -0.9865866303443909, "num_chars": 2}, {"sum_logits": -1.5434225797653198, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -1.5434225797653198, "logits_per_char": -0.7717112898826599, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 142, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9564081430435181, "incorrect_loss_raw": 1.6350805759429932, "correct_loss_per_char": 0.47820407152175903, "incorrect_loss_per_char": 0.8175402879714966, "correct_loss_per_token": 0.9564081430435181, "incorrect_loss_per_token": 1.6350805759429932, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9564081430435181, "num_tokens": 1, "num_tokens_all": 1050, "is_greedy": true, "logits_per_token": -0.9564081430435181, "logits_per_char": -0.47820407152175903, "num_chars": 2}, {"sum_logits": -1.3041664361953735, "num_tokens": 1, "num_tokens_all": 1050, "is_greedy": false, "logits_per_token": -1.3041664361953735, "logits_per_char": -0.6520832180976868, "num_chars": 2}, {"sum_logits": -1.9259976148605347, "num_tokens": 1, "num_tokens_all": 1050, "is_greedy": false, "logits_per_token": -1.9259976148605347, "logits_per_char": -0.9629988074302673, "num_chars": 2}, {"sum_logits": -1.6750776767730713, "num_tokens": 1, "num_tokens_all": 1050, "is_greedy": false, "logits_per_token": -1.6750776767730713, "logits_per_char": -0.8375388383865356, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 143, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.476750373840332, "incorrect_loss_raw": 1.3832889795303345, "correct_loss_per_char": 0.738375186920166, "incorrect_loss_per_char": 0.6916444897651672, "correct_loss_per_token": 1.476750373840332, "incorrect_loss_per_token": 1.3832889795303345, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1370619535446167, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": true, "logits_per_token": -1.1370619535446167, "logits_per_char": -0.5685309767723083, "num_chars": 2}, {"sum_logits": -1.476750373840332, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -1.476750373840332, "logits_per_char": -0.738375186920166, "num_chars": 2}, {"sum_logits": -1.552512764930725, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -1.552512764930725, "logits_per_char": -0.7762563824653625, "num_chars": 2}, {"sum_logits": -1.4602922201156616, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -1.4602922201156616, "logits_per_char": -0.7301461100578308, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 144, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2768588066101074, "incorrect_loss_raw": 1.4934139649073284, "correct_loss_per_char": 0.6384294033050537, "incorrect_loss_per_char": 0.7467069824536642, "correct_loss_per_token": 1.2768588066101074, "incorrect_loss_per_token": 1.4934139649073284, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0818595886230469, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": true, "logits_per_token": -1.0818595886230469, "logits_per_char": -0.5409297943115234, "num_chars": 2}, {"sum_logits": -1.2768588066101074, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.2768588066101074, "logits_per_char": -0.6384294033050537, "num_chars": 2}, {"sum_logits": -1.91499662399292, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.91499662399292, "logits_per_char": -0.95749831199646, "num_chars": 2}, {"sum_logits": -1.483385682106018, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.483385682106018, "logits_per_char": -0.741692841053009, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 145, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2018401622772217, "incorrect_loss_raw": 1.5287100871404011, "correct_loss_per_char": 0.6009200811386108, "incorrect_loss_per_char": 0.7643550435702006, "correct_loss_per_token": 1.2018401622772217, "incorrect_loss_per_token": 1.5287100871404011, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.07150137424469, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": true, "logits_per_token": -1.07150137424469, "logits_per_char": -0.535750687122345, "num_chars": 2}, {"sum_logits": -1.2018401622772217, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.2018401622772217, "logits_per_char": -0.6009200811386108, "num_chars": 2}, {"sum_logits": -1.907834529876709, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.907834529876709, "logits_per_char": -0.9539172649383545, "num_chars": 2}, {"sum_logits": -1.6067943572998047, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.6067943572998047, "logits_per_char": -0.8033971786499023, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 146, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1202237606048584, "incorrect_loss_raw": 1.5540224711100261, "correct_loss_per_char": 0.5601118803024292, "incorrect_loss_per_char": 0.7770112355550131, "correct_loss_per_token": 1.1202237606048584, "incorrect_loss_per_token": 1.5540224711100261, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1202237606048584, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": true, "logits_per_token": -1.1202237606048584, "logits_per_char": -0.5601118803024292, "num_chars": 2}, {"sum_logits": -1.156306505203247, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": false, "logits_per_token": -1.156306505203247, "logits_per_char": -0.5781532526016235, "num_chars": 2}, {"sum_logits": -1.8927345275878906, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": false, "logits_per_token": -1.8927345275878906, "logits_per_char": -0.9463672637939453, "num_chars": 2}, {"sum_logits": -1.6130263805389404, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": false, "logits_per_token": -1.6130263805389404, "logits_per_char": -0.8065131902694702, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 147, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5740951299667358, "incorrect_loss_raw": 1.3424687782923381, "correct_loss_per_char": 0.7870475649833679, "incorrect_loss_per_char": 0.6712343891461691, "correct_loss_per_token": 1.5740951299667358, "incorrect_loss_per_token": 1.3424687782923381, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2845542430877686, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": true, "logits_per_token": -1.2845542430877686, "logits_per_char": -0.6422771215438843, "num_chars": 2}, {"sum_logits": -1.307951807975769, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": false, "logits_per_token": -1.307951807975769, "logits_per_char": -0.6539759039878845, "num_chars": 2}, {"sum_logits": -1.5740951299667358, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": false, "logits_per_token": -1.5740951299667358, "logits_per_char": -0.7870475649833679, "num_chars": 2}, {"sum_logits": -1.4349002838134766, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": false, "logits_per_token": -1.4349002838134766, "logits_per_char": -0.7174501419067383, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 148, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.110048770904541, "incorrect_loss_raw": 1.2968192100524902, "correct_loss_per_char": 1.0550243854522705, "incorrect_loss_per_char": 0.6484096050262451, "correct_loss_per_token": 2.110048770904541, "incorrect_loss_per_token": 1.2968192100524902, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.921799898147583, "num_tokens": 1, "num_tokens_all": 1121, "is_greedy": true, "logits_per_token": -0.921799898147583, "logits_per_char": -0.4608999490737915, "num_chars": 2}, {"sum_logits": -1.2049481868743896, "num_tokens": 1, "num_tokens_all": 1121, "is_greedy": false, "logits_per_token": -1.2049481868743896, "logits_per_char": -0.6024740934371948, "num_chars": 2}, {"sum_logits": -2.110048770904541, "num_tokens": 1, "num_tokens_all": 1121, "is_greedy": false, "logits_per_token": -2.110048770904541, "logits_per_char": -1.0550243854522705, "num_chars": 2}, {"sum_logits": -1.763709545135498, "num_tokens": 1, "num_tokens_all": 1121, "is_greedy": false, "logits_per_token": -1.763709545135498, "logits_per_char": -0.881854772567749, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 149, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1642576456069946, "incorrect_loss_raw": 1.545336922009786, "correct_loss_per_char": 0.5821288228034973, "incorrect_loss_per_char": 0.772668461004893, "correct_loss_per_token": 1.1642576456069946, "incorrect_loss_per_token": 1.545336922009786, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0854556560516357, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": true, "logits_per_token": -1.0854556560516357, "logits_per_char": -0.5427278280258179, "num_chars": 2}, {"sum_logits": -1.1642576456069946, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": false, "logits_per_token": -1.1642576456069946, "logits_per_char": -0.5821288228034973, "num_chars": 2}, {"sum_logits": -1.9466618299484253, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": false, "logits_per_token": -1.9466618299484253, "logits_per_char": -0.9733309149742126, "num_chars": 2}, {"sum_logits": -1.6038932800292969, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": false, "logits_per_token": -1.6038932800292969, "logits_per_char": -0.8019466400146484, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 150, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2358849048614502, "incorrect_loss_raw": 1.477030078570048, "correct_loss_per_char": 0.6179424524307251, "incorrect_loss_per_char": 0.738515039285024, "correct_loss_per_token": 1.2358849048614502, "incorrect_loss_per_token": 1.477030078570048, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2358849048614502, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": false, "logits_per_token": -1.2358849048614502, "logits_per_char": -0.6179424524307251, "num_chars": 2}, {"sum_logits": -1.1893303394317627, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": true, "logits_per_token": -1.1893303394317627, "logits_per_char": -0.5946651697158813, "num_chars": 2}, {"sum_logits": -1.7444243431091309, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": false, "logits_per_token": -1.7444243431091309, "logits_per_char": -0.8722121715545654, "num_chars": 2}, {"sum_logits": -1.4973355531692505, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": false, "logits_per_token": -1.4973355531692505, "logits_per_char": -0.7486677765846252, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 151, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.095259189605713, "incorrect_loss_raw": 1.5537411371866863, "correct_loss_per_char": 0.5476295948028564, "incorrect_loss_per_char": 0.7768705685933431, "correct_loss_per_token": 1.095259189605713, "incorrect_loss_per_token": 1.5537411371866863, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.095259189605713, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": true, "logits_per_token": -1.095259189605713, "logits_per_char": -0.5476295948028564, "num_chars": 2}, {"sum_logits": -1.2229206562042236, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.2229206562042236, "logits_per_char": -0.6114603281021118, "num_chars": 2}, {"sum_logits": -1.8099377155303955, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.8099377155303955, "logits_per_char": -0.9049688577651978, "num_chars": 2}, {"sum_logits": -1.6283650398254395, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.6283650398254395, "logits_per_char": -0.8141825199127197, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 152, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0116212368011475, "incorrect_loss_raw": 1.6008944511413574, "correct_loss_per_char": 0.5058106184005737, "incorrect_loss_per_char": 0.8004472255706787, "correct_loss_per_token": 1.0116212368011475, "incorrect_loss_per_token": 1.6008944511413574, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0116212368011475, "num_tokens": 1, "num_tokens_all": 1053, "is_greedy": true, "logits_per_token": -1.0116212368011475, "logits_per_char": -0.5058106184005737, "num_chars": 2}, {"sum_logits": -1.2307679653167725, "num_tokens": 1, "num_tokens_all": 1053, "is_greedy": false, "logits_per_token": -1.2307679653167725, "logits_per_char": -0.6153839826583862, "num_chars": 2}, {"sum_logits": -1.863795280456543, "num_tokens": 1, "num_tokens_all": 1053, "is_greedy": false, "logits_per_token": -1.863795280456543, "logits_per_char": -0.9318976402282715, "num_chars": 2}, {"sum_logits": -1.7081201076507568, "num_tokens": 1, "num_tokens_all": 1053, "is_greedy": false, "logits_per_token": -1.7081201076507568, "logits_per_char": -0.8540600538253784, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 153, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9142354726791382, "incorrect_loss_raw": 1.380924940109253, "correct_loss_per_char": 0.9571177363395691, "incorrect_loss_per_char": 0.6904624700546265, "correct_loss_per_token": 1.9142354726791382, "incorrect_loss_per_token": 1.380924940109253, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.881995439529419, "num_tokens": 1, "num_tokens_all": 1074, "is_greedy": true, "logits_per_token": -0.881995439529419, "logits_per_char": -0.4409977197647095, "num_chars": 2}, {"sum_logits": -1.1986253261566162, "num_tokens": 1, "num_tokens_all": 1074, "is_greedy": false, "logits_per_token": -1.1986253261566162, "logits_per_char": -0.5993126630783081, "num_chars": 2}, {"sum_logits": -2.0621540546417236, "num_tokens": 1, "num_tokens_all": 1074, "is_greedy": false, "logits_per_token": -2.0621540546417236, "logits_per_char": -1.0310770273208618, "num_chars": 2}, {"sum_logits": -1.9142354726791382, "num_tokens": 1, "num_tokens_all": 1074, "is_greedy": false, "logits_per_token": -1.9142354726791382, "logits_per_char": -0.9571177363395691, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 154, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9548318386077881, "incorrect_loss_raw": 1.64439860979716, "correct_loss_per_char": 0.47741591930389404, "incorrect_loss_per_char": 0.82219930489858, "correct_loss_per_token": 0.9548318386077881, "incorrect_loss_per_token": 1.64439860979716, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9548318386077881, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": true, "logits_per_token": -0.9548318386077881, "logits_per_char": -0.47741591930389404, "num_chars": 2}, {"sum_logits": -1.2736964225769043, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.2736964225769043, "logits_per_char": -0.6368482112884521, "num_chars": 2}, {"sum_logits": -2.0026352405548096, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -2.0026352405548096, "logits_per_char": -1.0013176202774048, "num_chars": 2}, {"sum_logits": -1.6568641662597656, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.6568641662597656, "logits_per_char": -0.8284320831298828, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 155, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3827134370803833, "incorrect_loss_raw": 1.4295334021250408, "correct_loss_per_char": 0.6913567185401917, "incorrect_loss_per_char": 0.7147667010625204, "correct_loss_per_token": 1.3827134370803833, "incorrect_loss_per_token": 1.4295334021250408, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.079648494720459, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": true, "logits_per_token": -1.079648494720459, "logits_per_char": -0.5398242473602295, "num_chars": 2}, {"sum_logits": -1.3827134370803833, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.3827134370803833, "logits_per_char": -0.6913567185401917, "num_chars": 2}, {"sum_logits": -1.6856719255447388, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.6856719255447388, "logits_per_char": -0.8428359627723694, "num_chars": 2}, {"sum_logits": -1.5232797861099243, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.5232797861099243, "logits_per_char": -0.7616398930549622, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 156, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5042129755020142, "incorrect_loss_raw": 1.403734842936198, "correct_loss_per_char": 0.7521064877510071, "incorrect_loss_per_char": 0.701867421468099, "correct_loss_per_token": 1.5042129755020142, "incorrect_loss_per_token": 1.403734842936198, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0305352210998535, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": true, "logits_per_token": -1.0305352210998535, "logits_per_char": -0.5152676105499268, "num_chars": 2}, {"sum_logits": -1.5042129755020142, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": false, "logits_per_token": -1.5042129755020142, "logits_per_char": -0.7521064877510071, "num_chars": 2}, {"sum_logits": -1.7611322402954102, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": false, "logits_per_token": -1.7611322402954102, "logits_per_char": -0.8805661201477051, "num_chars": 2}, {"sum_logits": -1.41953706741333, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": false, "logits_per_token": -1.41953706741333, "logits_per_char": -0.709768533706665, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 157, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5276596546173096, "incorrect_loss_raw": 1.3704527219136555, "correct_loss_per_char": 0.7638298273086548, "incorrect_loss_per_char": 0.6852263609568278, "correct_loss_per_token": 1.5276596546173096, "incorrect_loss_per_token": 1.3704527219136555, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2941923141479492, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": false, "logits_per_token": -1.2941923141479492, "logits_per_char": -0.6470961570739746, "num_chars": 2}, {"sum_logits": -1.1820502281188965, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": true, "logits_per_token": -1.1820502281188965, "logits_per_char": -0.5910251140594482, "num_chars": 2}, {"sum_logits": -1.635115623474121, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": false, "logits_per_token": -1.635115623474121, "logits_per_char": -0.8175578117370605, "num_chars": 2}, {"sum_logits": -1.5276596546173096, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": false, "logits_per_token": -1.5276596546173096, "logits_per_char": -0.7638298273086548, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 158, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.763907551765442, "incorrect_loss_raw": 1.311307470003764, "correct_loss_per_char": 0.881953775882721, "incorrect_loss_per_char": 0.655653735001882, "correct_loss_per_token": 1.763907551765442, "incorrect_loss_per_token": 1.311307470003764, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.128436803817749, "num_tokens": 1, "num_tokens_all": 976, "is_greedy": true, "logits_per_token": -1.128436803817749, "logits_per_char": -0.5642184019088745, "num_chars": 2}, {"sum_logits": -1.2536592483520508, "num_tokens": 1, "num_tokens_all": 976, "is_greedy": false, "logits_per_token": -1.2536592483520508, "logits_per_char": -0.6268296241760254, "num_chars": 2}, {"sum_logits": -1.763907551765442, "num_tokens": 1, "num_tokens_all": 976, "is_greedy": false, "logits_per_token": -1.763907551765442, "logits_per_char": -0.881953775882721, "num_chars": 2}, {"sum_logits": -1.5518263578414917, "num_tokens": 1, "num_tokens_all": 976, "is_greedy": false, "logits_per_token": -1.5518263578414917, "logits_per_char": -0.7759131789207458, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 159, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7521252632141113, "incorrect_loss_raw": 1.3087306022644043, "correct_loss_per_char": 0.8760626316070557, "incorrect_loss_per_char": 0.6543653011322021, "correct_loss_per_token": 1.7521252632141113, "incorrect_loss_per_token": 1.3087306022644043, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2377046346664429, "num_tokens": 1, "num_tokens_all": 1008, "is_greedy": false, "logits_per_token": -1.2377046346664429, "logits_per_char": -0.6188523173332214, "num_chars": 2}, {"sum_logits": -1.2120916843414307, "num_tokens": 1, "num_tokens_all": 1008, "is_greedy": true, "logits_per_token": -1.2120916843414307, "logits_per_char": -0.6060458421707153, "num_chars": 2}, {"sum_logits": -1.7521252632141113, "num_tokens": 1, "num_tokens_all": 1008, "is_greedy": false, "logits_per_token": -1.7521252632141113, "logits_per_char": -0.8760626316070557, "num_chars": 2}, {"sum_logits": -1.4763954877853394, "num_tokens": 1, "num_tokens_all": 1008, "is_greedy": false, "logits_per_token": -1.4763954877853394, "logits_per_char": -0.7381977438926697, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 160, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.199722409248352, "incorrect_loss_raw": 1.5323663155237834, "correct_loss_per_char": 0.599861204624176, "incorrect_loss_per_char": 0.7661831577618917, "correct_loss_per_token": 1.199722409248352, "incorrect_loss_per_token": 1.5323663155237834, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0816903114318848, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": true, "logits_per_token": -1.0816903114318848, "logits_per_char": -0.5408451557159424, "num_chars": 2}, {"sum_logits": -1.199722409248352, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.199722409248352, "logits_per_char": -0.599861204624176, "num_chars": 2}, {"sum_logits": -1.9164111614227295, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.9164111614227295, "logits_per_char": -0.9582055807113647, "num_chars": 2}, {"sum_logits": -1.5989974737167358, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.5989974737167358, "logits_per_char": -0.7994987368583679, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 161, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4069873094558716, "incorrect_loss_raw": 1.4109203815460205, "correct_loss_per_char": 0.7034936547279358, "incorrect_loss_per_char": 0.7054601907730103, "correct_loss_per_token": 1.4069873094558716, "incorrect_loss_per_token": 1.4109203815460205, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1560791730880737, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -1.1560791730880737, "logits_per_char": -0.5780395865440369, "num_chars": 2}, {"sum_logits": -1.415036678314209, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.415036678314209, "logits_per_char": -0.7075183391571045, "num_chars": 2}, {"sum_logits": -1.6616452932357788, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.6616452932357788, "logits_per_char": -0.8308226466178894, "num_chars": 2}, {"sum_logits": -1.4069873094558716, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.4069873094558716, "logits_per_char": -0.7034936547279358, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 162, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7843502759933472, "incorrect_loss_raw": 1.3049245278040569, "correct_loss_per_char": 0.8921751379966736, "incorrect_loss_per_char": 0.6524622639020284, "correct_loss_per_token": 1.7843502759933472, "incorrect_loss_per_token": 1.3049245278040569, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.139456033706665, "num_tokens": 1, "num_tokens_all": 1046, "is_greedy": true, "logits_per_token": -1.139456033706665, "logits_per_char": -0.5697280168533325, "num_chars": 2}, {"sum_logits": -1.249021053314209, "num_tokens": 1, "num_tokens_all": 1046, "is_greedy": false, "logits_per_token": -1.249021053314209, "logits_per_char": -0.6245105266571045, "num_chars": 2}, {"sum_logits": -1.7843502759933472, "num_tokens": 1, "num_tokens_all": 1046, "is_greedy": false, "logits_per_token": -1.7843502759933472, "logits_per_char": -0.8921751379966736, "num_chars": 2}, {"sum_logits": -1.5262964963912964, "num_tokens": 1, "num_tokens_all": 1046, "is_greedy": false, "logits_per_token": -1.5262964963912964, "logits_per_char": -0.7631482481956482, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 163, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7287601232528687, "incorrect_loss_raw": 1.3100159168243408, "correct_loss_per_char": 0.8643800616264343, "incorrect_loss_per_char": 0.6550079584121704, "correct_loss_per_token": 1.7287601232528687, "incorrect_loss_per_token": 1.3100159168243408, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1374419927597046, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": true, "logits_per_token": -1.1374419927597046, "logits_per_char": -0.5687209963798523, "num_chars": 2}, {"sum_logits": -1.3999178409576416, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": false, "logits_per_token": -1.3999178409576416, "logits_per_char": -0.6999589204788208, "num_chars": 2}, {"sum_logits": -1.7287601232528687, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": false, "logits_per_token": -1.7287601232528687, "logits_per_char": -0.8643800616264343, "num_chars": 2}, {"sum_logits": -1.3926879167556763, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": false, "logits_per_token": -1.3926879167556763, "logits_per_char": -0.6963439583778381, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 164, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5188614130020142, "incorrect_loss_raw": 1.3815287748972576, "correct_loss_per_char": 0.7594307065010071, "incorrect_loss_per_char": 0.6907643874486288, "correct_loss_per_token": 1.5188614130020142, "incorrect_loss_per_token": 1.3815287748972576, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1510560512542725, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": true, "logits_per_token": -1.1510560512542725, "logits_per_char": -0.5755280256271362, "num_chars": 2}, {"sum_logits": -1.296671748161316, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.296671748161316, "logits_per_char": -0.648335874080658, "num_chars": 2}, {"sum_logits": -1.696858525276184, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.696858525276184, "logits_per_char": -0.848429262638092, "num_chars": 2}, {"sum_logits": -1.5188614130020142, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.5188614130020142, "logits_per_char": -0.7594307065010071, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 165, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3940620422363281, "incorrect_loss_raw": 1.4146576722462971, "correct_loss_per_char": 0.6970310211181641, "incorrect_loss_per_char": 0.7073288361231486, "correct_loss_per_token": 1.3940620422363281, "incorrect_loss_per_token": 1.4146576722462971, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1464499235153198, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": true, "logits_per_token": -1.1464499235153198, "logits_per_char": -0.5732249617576599, "num_chars": 2}, {"sum_logits": -1.3940620422363281, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": false, "logits_per_token": -1.3940620422363281, "logits_per_char": -0.6970310211181641, "num_chars": 2}, {"sum_logits": -1.6425073146820068, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": false, "logits_per_token": -1.6425073146820068, "logits_per_char": -0.8212536573410034, "num_chars": 2}, {"sum_logits": -1.455015778541565, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": false, "logits_per_token": -1.455015778541565, "logits_per_char": -0.7275078892707825, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 166, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1767278909683228, "incorrect_loss_raw": 1.4826212326685588, "correct_loss_per_char": 0.5883639454841614, "incorrect_loss_per_char": 0.7413106163342794, "correct_loss_per_token": 1.1767278909683228, "incorrect_loss_per_token": 1.4826212326685588, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3365178108215332, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": false, "logits_per_token": -1.3365178108215332, "logits_per_char": -0.6682589054107666, "num_chars": 2}, {"sum_logits": -1.1767278909683228, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": true, "logits_per_token": -1.1767278909683228, "logits_per_char": -0.5883639454841614, "num_chars": 2}, {"sum_logits": -1.6354161500930786, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": false, "logits_per_token": -1.6354161500930786, "logits_per_char": -0.8177080750465393, "num_chars": 2}, {"sum_logits": -1.4759297370910645, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": false, "logits_per_token": -1.4759297370910645, "logits_per_char": -0.7379648685455322, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 167, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.180795431137085, "incorrect_loss_raw": 1.4838673273722331, "correct_loss_per_char": 0.5903977155685425, "incorrect_loss_per_char": 0.7419336636861166, "correct_loss_per_token": 1.180795431137085, "incorrect_loss_per_token": 1.4838673273722331, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.180795431137085, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -1.180795431137085, "logits_per_char": -0.5903977155685425, "num_chars": 2}, {"sum_logits": -1.3949086666107178, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.3949086666107178, "logits_per_char": -0.6974543333053589, "num_chars": 2}, {"sum_logits": -1.666831612586975, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.666831612586975, "logits_per_char": -0.8334158062934875, "num_chars": 2}, {"sum_logits": -1.3898617029190063, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.3898617029190063, "logits_per_char": -0.6949308514595032, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 168, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1832003593444824, "incorrect_loss_raw": 1.6044011116027832, "correct_loss_per_char": 0.5916001796722412, "incorrect_loss_per_char": 0.8022005558013916, "correct_loss_per_token": 1.1832003593444824, "incorrect_loss_per_token": 1.6044011116027832, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9180293083190918, "num_tokens": 1, "num_tokens_all": 1043, "is_greedy": true, "logits_per_token": -0.9180293083190918, "logits_per_char": -0.4590146541595459, "num_chars": 2}, {"sum_logits": -1.1832003593444824, "num_tokens": 1, "num_tokens_all": 1043, "is_greedy": false, "logits_per_token": -1.1832003593444824, "logits_per_char": -0.5916001796722412, "num_chars": 2}, {"sum_logits": -2.0863723754882812, "num_tokens": 1, "num_tokens_all": 1043, "is_greedy": false, "logits_per_token": -2.0863723754882812, "logits_per_char": -1.0431861877441406, "num_chars": 2}, {"sum_logits": -1.8088016510009766, "num_tokens": 1, "num_tokens_all": 1043, "is_greedy": false, "logits_per_token": -1.8088016510009766, "logits_per_char": -0.9044008255004883, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 169, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.242591142654419, "incorrect_loss_raw": 1.4797813892364502, "correct_loss_per_char": 0.6212955713272095, "incorrect_loss_per_char": 0.7398906946182251, "correct_loss_per_token": 1.242591142654419, "incorrect_loss_per_token": 1.4797813892364502, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.242591142654419, "num_tokens": 1, "num_tokens_all": 998, "is_greedy": true, "logits_per_token": -1.242591142654419, "logits_per_char": -0.6212955713272095, "num_chars": 2}, {"sum_logits": -1.3005743026733398, "num_tokens": 1, "num_tokens_all": 998, "is_greedy": false, "logits_per_token": -1.3005743026733398, "logits_per_char": -0.6502871513366699, "num_chars": 2}, {"sum_logits": -1.842213749885559, "num_tokens": 1, "num_tokens_all": 998, "is_greedy": false, "logits_per_token": -1.842213749885559, "logits_per_char": -0.9211068749427795, "num_chars": 2}, {"sum_logits": -1.2965561151504517, "num_tokens": 1, "num_tokens_all": 998, "is_greedy": false, "logits_per_token": -1.2965561151504517, "logits_per_char": -0.6482780575752258, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 170, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3012421131134033, "incorrect_loss_raw": 1.5150044759114583, "correct_loss_per_char": 0.6506210565567017, "incorrect_loss_per_char": 0.7575022379557291, "correct_loss_per_token": 1.3012421131134033, "incorrect_loss_per_token": 1.5150044759114583, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9512784481048584, "num_tokens": 1, "num_tokens_all": 1066, "is_greedy": true, "logits_per_token": -0.9512784481048584, "logits_per_char": -0.4756392240524292, "num_chars": 2}, {"sum_logits": -1.3012421131134033, "num_tokens": 1, "num_tokens_all": 1066, "is_greedy": false, "logits_per_token": -1.3012421131134033, "logits_per_char": -0.6506210565567017, "num_chars": 2}, {"sum_logits": -1.908449649810791, "num_tokens": 1, "num_tokens_all": 1066, "is_greedy": false, "logits_per_token": -1.908449649810791, "logits_per_char": -0.9542248249053955, "num_chars": 2}, {"sum_logits": -1.6852853298187256, "num_tokens": 1, "num_tokens_all": 1066, "is_greedy": false, "logits_per_token": -1.6852853298187256, "logits_per_char": -0.8426426649093628, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 171, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1457723379135132, "incorrect_loss_raw": 1.5505409638086955, "correct_loss_per_char": 0.5728861689567566, "incorrect_loss_per_char": 0.7752704819043478, "correct_loss_per_token": 1.1457723379135132, "incorrect_loss_per_token": 1.5505409638086955, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1457723379135132, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": true, "logits_per_token": -1.1457723379135132, "logits_per_char": -0.5728861689567566, "num_chars": 2}, {"sum_logits": -1.2090961933135986, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": false, "logits_per_token": -1.2090961933135986, "logits_per_char": -0.6045480966567993, "num_chars": 2}, {"sum_logits": -2.015434741973877, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": false, "logits_per_token": -2.015434741973877, "logits_per_char": -1.0077173709869385, "num_chars": 2}, {"sum_logits": -1.4270919561386108, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": false, "logits_per_token": -1.4270919561386108, "logits_per_char": -0.7135459780693054, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 172, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6556216478347778, "incorrect_loss_raw": 1.3943523168563843, "correct_loss_per_char": 0.8278108239173889, "incorrect_loss_per_char": 0.6971761584281921, "correct_loss_per_token": 1.6556216478347778, "incorrect_loss_per_token": 1.3943523168563843, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0109204053878784, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": true, "logits_per_token": -1.0109204053878784, "logits_per_char": -0.5054602026939392, "num_chars": 2}, {"sum_logits": -1.2185858488082886, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.2185858488082886, "logits_per_char": -0.6092929244041443, "num_chars": 2}, {"sum_logits": -1.9535506963729858, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.9535506963729858, "logits_per_char": -0.9767753481864929, "num_chars": 2}, {"sum_logits": -1.6556216478347778, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.6556216478347778, "logits_per_char": -0.8278108239173889, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 173, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5114859342575073, "incorrect_loss_raw": 1.3972136974334717, "correct_loss_per_char": 0.7557429671287537, "incorrect_loss_per_char": 0.6986068487167358, "correct_loss_per_token": 1.5114859342575073, "incorrect_loss_per_token": 1.3972136974334717, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.153861403465271, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": true, "logits_per_token": -1.153861403465271, "logits_per_char": -0.5769307017326355, "num_chars": 2}, {"sum_logits": -1.2341208457946777, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": false, "logits_per_token": -1.2341208457946777, "logits_per_char": -0.6170604228973389, "num_chars": 2}, {"sum_logits": -1.8036588430404663, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": false, "logits_per_token": -1.8036588430404663, "logits_per_char": -0.9018294215202332, "num_chars": 2}, {"sum_logits": -1.5114859342575073, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": false, "logits_per_token": -1.5114859342575073, "logits_per_char": -0.7557429671287537, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 174, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.081949234008789, "incorrect_loss_raw": 1.5838863849639893, "correct_loss_per_char": 0.5409746170043945, "incorrect_loss_per_char": 0.7919431924819946, "correct_loss_per_token": 1.081949234008789, "incorrect_loss_per_token": 1.5838863849639893, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.081949234008789, "num_tokens": 1, "num_tokens_all": 1119, "is_greedy": true, "logits_per_token": -1.081949234008789, "logits_per_char": -0.5409746170043945, "num_chars": 2}, {"sum_logits": -1.1530234813690186, "num_tokens": 1, "num_tokens_all": 1119, "is_greedy": false, "logits_per_token": -1.1530234813690186, "logits_per_char": -0.5765117406845093, "num_chars": 2}, {"sum_logits": -1.9576940536499023, "num_tokens": 1, "num_tokens_all": 1119, "is_greedy": false, "logits_per_token": -1.9576940536499023, "logits_per_char": -0.9788470268249512, "num_chars": 2}, {"sum_logits": -1.6409416198730469, "num_tokens": 1, "num_tokens_all": 1119, "is_greedy": false, "logits_per_token": -1.6409416198730469, "logits_per_char": -0.8204708099365234, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 175, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1841914653778076, "incorrect_loss_raw": 1.5741699536641438, "correct_loss_per_char": 0.5920957326889038, "incorrect_loss_per_char": 0.7870849768320719, "correct_loss_per_token": 1.1841914653778076, "incorrect_loss_per_token": 1.5741699536641438, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9920530319213867, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": true, "logits_per_token": -0.9920530319213867, "logits_per_char": -0.49602651596069336, "num_chars": 2}, {"sum_logits": -1.1841914653778076, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.1841914653778076, "logits_per_char": -0.5920957326889038, "num_chars": 2}, {"sum_logits": -2.042240619659424, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -2.042240619659424, "logits_per_char": -1.021120309829712, "num_chars": 2}, {"sum_logits": -1.688216209411621, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.688216209411621, "logits_per_char": -0.8441081047058105, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 176, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.194392204284668, "incorrect_loss_raw": 1.542338252067566, "correct_loss_per_char": 0.597196102142334, "incorrect_loss_per_char": 0.771169126033783, "correct_loss_per_token": 1.194392204284668, "incorrect_loss_per_token": 1.542338252067566, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0455600023269653, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": true, "logits_per_token": -1.0455600023269653, "logits_per_char": -0.5227800011634827, "num_chars": 2}, {"sum_logits": -1.194392204284668, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": false, "logits_per_token": -1.194392204284668, "logits_per_char": -0.597196102142334, "num_chars": 2}, {"sum_logits": -1.934572458267212, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": false, "logits_per_token": -1.934572458267212, "logits_per_char": -0.967286229133606, "num_chars": 2}, {"sum_logits": -1.6468822956085205, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": false, "logits_per_token": -1.6468822956085205, "logits_per_char": -0.8234411478042603, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 177, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.726629614830017, "incorrect_loss_raw": 1.324613094329834, "correct_loss_per_char": 0.8633148074150085, "incorrect_loss_per_char": 0.662306547164917, "correct_loss_per_token": 1.726629614830017, "incorrect_loss_per_token": 1.324613094329834, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0879396200180054, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": true, "logits_per_token": -1.0879396200180054, "logits_per_char": -0.5439698100090027, "num_chars": 2}, {"sum_logits": -1.2829192876815796, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": false, "logits_per_token": -1.2829192876815796, "logits_per_char": -0.6414596438407898, "num_chars": 2}, {"sum_logits": -1.726629614830017, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": false, "logits_per_token": -1.726629614830017, "logits_per_char": -0.8633148074150085, "num_chars": 2}, {"sum_logits": -1.602980375289917, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": false, "logits_per_token": -1.602980375289917, "logits_per_char": -0.8014901876449585, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 178, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.642220139503479, "incorrect_loss_raw": 1.4067498842875164, "correct_loss_per_char": 0.8211100697517395, "incorrect_loss_per_char": 0.7033749421437582, "correct_loss_per_token": 1.642220139503479, "incorrect_loss_per_token": 1.4067498842875164, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9758994579315186, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": true, "logits_per_token": -0.9758994579315186, "logits_per_char": -0.4879497289657593, "num_chars": 2}, {"sum_logits": -1.2569376230239868, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.2569376230239868, "logits_per_char": -0.6284688115119934, "num_chars": 2}, {"sum_logits": -1.9874125719070435, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.9874125719070435, "logits_per_char": -0.9937062859535217, "num_chars": 2}, {"sum_logits": -1.642220139503479, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.642220139503479, "logits_per_char": -0.8211100697517395, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 179, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1624151468276978, "incorrect_loss_raw": 1.5363980929056804, "correct_loss_per_char": 0.5812075734138489, "incorrect_loss_per_char": 0.7681990464528402, "correct_loss_per_token": 1.1624151468276978, "incorrect_loss_per_token": 1.5363980929056804, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.143235206604004, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": true, "logits_per_token": -1.143235206604004, "logits_per_char": -0.571617603302002, "num_chars": 2}, {"sum_logits": -1.1624151468276978, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.1624151468276978, "logits_per_char": -0.5812075734138489, "num_chars": 2}, {"sum_logits": -1.934651494026184, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.934651494026184, "logits_per_char": -0.967325747013092, "num_chars": 2}, {"sum_logits": -1.531307578086853, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.531307578086853, "logits_per_char": -0.7656537890434265, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 180, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6544573307037354, "incorrect_loss_raw": 1.3965837955474854, "correct_loss_per_char": 0.8272286653518677, "incorrect_loss_per_char": 0.6982918977737427, "correct_loss_per_token": 1.6544573307037354, "incorrect_loss_per_token": 1.3965837955474854, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1012861728668213, "num_tokens": 1, "num_tokens_all": 1143, "is_greedy": true, "logits_per_token": -1.1012861728668213, "logits_per_char": -0.5506430864334106, "num_chars": 2}, {"sum_logits": -1.1048623323440552, "num_tokens": 1, "num_tokens_all": 1143, "is_greedy": false, "logits_per_token": -1.1048623323440552, "logits_per_char": -0.5524311661720276, "num_chars": 2}, {"sum_logits": -1.9836028814315796, "num_tokens": 1, "num_tokens_all": 1143, "is_greedy": false, "logits_per_token": -1.9836028814315796, "logits_per_char": -0.9918014407157898, "num_chars": 2}, {"sum_logits": -1.6544573307037354, "num_tokens": 1, "num_tokens_all": 1143, "is_greedy": false, "logits_per_token": -1.6544573307037354, "logits_per_char": -0.8272286653518677, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 181, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0780565738677979, "incorrect_loss_raw": 1.5355982383092244, "correct_loss_per_char": 0.5390282869338989, "incorrect_loss_per_char": 0.7677991191546122, "correct_loss_per_token": 1.0780565738677979, "incorrect_loss_per_token": 1.5355982383092244, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0780565738677979, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": true, "logits_per_token": -1.0780565738677979, "logits_per_char": -0.5390282869338989, "num_chars": 2}, {"sum_logits": -1.4320896863937378, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": false, "logits_per_token": -1.4320896863937378, "logits_per_char": -0.7160448431968689, "num_chars": 2}, {"sum_logits": -1.7447960376739502, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": false, "logits_per_token": -1.7447960376739502, "logits_per_char": -0.8723980188369751, "num_chars": 2}, {"sum_logits": -1.4299089908599854, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": false, "logits_per_token": -1.4299089908599854, "logits_per_char": -0.7149544954299927, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 182, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.937731385231018, "incorrect_loss_raw": 1.2984044154485066, "correct_loss_per_char": 0.968865692615509, "incorrect_loss_per_char": 0.6492022077242533, "correct_loss_per_token": 1.937731385231018, "incorrect_loss_per_token": 1.2984044154485066, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0840401649475098, "num_tokens": 1, "num_tokens_all": 1103, "is_greedy": true, "logits_per_token": -1.0840401649475098, "logits_per_char": -0.5420200824737549, "num_chars": 2}, {"sum_logits": -1.1423462629318237, "num_tokens": 1, "num_tokens_all": 1103, "is_greedy": false, "logits_per_token": -1.1423462629318237, "logits_per_char": -0.5711731314659119, "num_chars": 2}, {"sum_logits": -1.937731385231018, "num_tokens": 1, "num_tokens_all": 1103, "is_greedy": false, "logits_per_token": -1.937731385231018, "logits_per_char": -0.968865692615509, "num_chars": 2}, {"sum_logits": -1.6688268184661865, "num_tokens": 1, "num_tokens_all": 1103, "is_greedy": false, "logits_per_token": -1.6688268184661865, "logits_per_char": -0.8344134092330933, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 183, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2513885498046875, "incorrect_loss_raw": 1.4715856313705444, "correct_loss_per_char": 0.6256942749023438, "incorrect_loss_per_char": 0.7357928156852722, "correct_loss_per_token": 1.2513885498046875, "incorrect_loss_per_token": 1.4715856313705444, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1673024892807007, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": true, "logits_per_token": -1.1673024892807007, "logits_per_char": -0.5836512446403503, "num_chars": 2}, {"sum_logits": -1.2513885498046875, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": false, "logits_per_token": -1.2513885498046875, "logits_per_char": -0.6256942749023438, "num_chars": 2}, {"sum_logits": -1.6713811159133911, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": false, "logits_per_token": -1.6713811159133911, "logits_per_char": -0.8356905579566956, "num_chars": 2}, {"sum_logits": -1.5760732889175415, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": false, "logits_per_token": -1.5760732889175415, "logits_per_char": -0.7880366444587708, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 184, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5614701509475708, "incorrect_loss_raw": 1.3964908917744954, "correct_loss_per_char": 0.7807350754737854, "incorrect_loss_per_char": 0.6982454458872477, "correct_loss_per_token": 1.5614701509475708, "incorrect_loss_per_token": 1.3964908917744954, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1839653253555298, "num_tokens": 1, "num_tokens_all": 1106, "is_greedy": false, "logits_per_token": -1.1839653253555298, "logits_per_char": -0.5919826626777649, "num_chars": 2}, {"sum_logits": -1.1279746294021606, "num_tokens": 1, "num_tokens_all": 1106, "is_greedy": true, "logits_per_token": -1.1279746294021606, "logits_per_char": -0.5639873147010803, "num_chars": 2}, {"sum_logits": -1.877532720565796, "num_tokens": 1, "num_tokens_all": 1106, "is_greedy": false, "logits_per_token": -1.877532720565796, "logits_per_char": -0.938766360282898, "num_chars": 2}, {"sum_logits": -1.5614701509475708, "num_tokens": 1, "num_tokens_all": 1106, "is_greedy": false, "logits_per_token": -1.5614701509475708, "logits_per_char": -0.7807350754737854, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 185, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9893558025360107, "incorrect_loss_raw": 1.2829114198684692, "correct_loss_per_char": 0.9946779012680054, "incorrect_loss_per_char": 0.6414557099342346, "correct_loss_per_token": 1.9893558025360107, "incorrect_loss_per_token": 1.2829114198684692, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1020679473876953, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": true, "logits_per_token": -1.1020679473876953, "logits_per_char": -0.5510339736938477, "num_chars": 2}, {"sum_logits": -1.109849214553833, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": false, "logits_per_token": -1.109849214553833, "logits_per_char": -0.5549246072769165, "num_chars": 2}, {"sum_logits": -1.9893558025360107, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": false, "logits_per_token": -1.9893558025360107, "logits_per_char": -0.9946779012680054, "num_chars": 2}, {"sum_logits": -1.6368170976638794, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": false, "logits_per_token": -1.6368170976638794, "logits_per_char": -0.8184085488319397, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 186, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.950958251953125, "incorrect_loss_raw": 1.6598131656646729, "correct_loss_per_char": 0.4754791259765625, "incorrect_loss_per_char": 0.8299065828323364, "correct_loss_per_token": 0.950958251953125, "incorrect_loss_per_token": 1.6598131656646729, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.950958251953125, "num_tokens": 1, "num_tokens_all": 1107, "is_greedy": true, "logits_per_token": -0.950958251953125, "logits_per_char": -0.4754791259765625, "num_chars": 2}, {"sum_logits": -1.2381317615509033, "num_tokens": 1, "num_tokens_all": 1107, "is_greedy": false, "logits_per_token": -1.2381317615509033, "logits_per_char": -0.6190658807754517, "num_chars": 2}, {"sum_logits": -2.076056480407715, "num_tokens": 1, "num_tokens_all": 1107, "is_greedy": false, "logits_per_token": -2.076056480407715, "logits_per_char": -1.0380282402038574, "num_chars": 2}, {"sum_logits": -1.6652512550354004, "num_tokens": 1, "num_tokens_all": 1107, "is_greedy": false, "logits_per_token": -1.6652512550354004, "logits_per_char": -0.8326256275177002, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 187, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4975132942199707, "incorrect_loss_raw": 1.4123518864313762, "correct_loss_per_char": 0.7487566471099854, "incorrect_loss_per_char": 0.7061759432156881, "correct_loss_per_token": 1.4975132942199707, "incorrect_loss_per_token": 1.4123518864313762, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0509363412857056, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": true, "logits_per_token": -1.0509363412857056, "logits_per_char": -0.5254681706428528, "num_chars": 2}, {"sum_logits": -1.4226257801055908, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.4226257801055908, "logits_per_char": -0.7113128900527954, "num_chars": 2}, {"sum_logits": -1.763493537902832, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.763493537902832, "logits_per_char": -0.881746768951416, "num_chars": 2}, {"sum_logits": -1.4975132942199707, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.4975132942199707, "logits_per_char": -0.7487566471099854, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 188, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.033510684967041, "incorrect_loss_raw": 1.617284099260966, "correct_loss_per_char": 0.5167553424835205, "incorrect_loss_per_char": 0.808642049630483, "correct_loss_per_token": 1.033510684967041, "incorrect_loss_per_token": 1.617284099260966, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.033510684967041, "num_tokens": 1, "num_tokens_all": 1099, "is_greedy": true, "logits_per_token": -1.033510684967041, "logits_per_char": -0.5167553424835205, "num_chars": 2}, {"sum_logits": -1.1602911949157715, "num_tokens": 1, "num_tokens_all": 1099, "is_greedy": false, "logits_per_token": -1.1602911949157715, "logits_per_char": -0.5801455974578857, "num_chars": 2}, {"sum_logits": -2.04929256439209, "num_tokens": 1, "num_tokens_all": 1099, "is_greedy": false, "logits_per_token": -2.04929256439209, "logits_per_char": -1.024646282196045, "num_chars": 2}, {"sum_logits": -1.6422685384750366, "num_tokens": 1, "num_tokens_all": 1099, "is_greedy": false, "logits_per_token": -1.6422685384750366, "logits_per_char": -0.8211342692375183, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 189, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1169216632843018, "incorrect_loss_raw": 1.5401570796966553, "correct_loss_per_char": 0.5584608316421509, "incorrect_loss_per_char": 0.7700785398483276, "correct_loss_per_token": 1.1169216632843018, "incorrect_loss_per_token": 1.5401570796966553, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1169216632843018, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": true, "logits_per_token": -1.1169216632843018, "logits_per_char": -0.5584608316421509, "num_chars": 2}, {"sum_logits": -1.2015403509140015, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.2015403509140015, "logits_per_char": -0.6007701754570007, "num_chars": 2}, {"sum_logits": -1.830265760421753, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.830265760421753, "logits_per_char": -0.9151328802108765, "num_chars": 2}, {"sum_logits": -1.5886651277542114, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.5886651277542114, "logits_per_char": -0.7943325638771057, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 190, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3657602071762085, "incorrect_loss_raw": 1.4366084337234497, "correct_loss_per_char": 0.6828801035881042, "incorrect_loss_per_char": 0.7183042168617249, "correct_loss_per_token": 1.3657602071762085, "incorrect_loss_per_token": 1.4366084337234497, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1254644393920898, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": true, "logits_per_token": -1.1254644393920898, "logits_per_char": -0.5627322196960449, "num_chars": 2}, {"sum_logits": -1.3657602071762085, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": false, "logits_per_token": -1.3657602071762085, "logits_per_char": -0.6828801035881042, "num_chars": 2}, {"sum_logits": -1.769249439239502, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": false, "logits_per_token": -1.769249439239502, "logits_per_char": -0.884624719619751, "num_chars": 2}, {"sum_logits": -1.4151114225387573, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": false, "logits_per_token": -1.4151114225387573, "logits_per_char": -0.7075557112693787, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 191, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8476828336715698, "incorrect_loss_raw": 1.2958598931630452, "correct_loss_per_char": 0.9238414168357849, "incorrect_loss_per_char": 0.6479299465815226, "correct_loss_per_token": 1.8476828336715698, "incorrect_loss_per_token": 1.2958598931630452, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1297091245651245, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": true, "logits_per_token": -1.1297091245651245, "logits_per_char": -0.5648545622825623, "num_chars": 2}, {"sum_logits": -1.211379051208496, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.211379051208496, "logits_per_char": -0.605689525604248, "num_chars": 2}, {"sum_logits": -1.8476828336715698, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.8476828336715698, "logits_per_char": -0.9238414168357849, "num_chars": 2}, {"sum_logits": -1.5464915037155151, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.5464915037155151, "logits_per_char": -0.7732457518577576, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 192, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.638315200805664, "incorrect_loss_raw": 1.4165499607721965, "correct_loss_per_char": 0.819157600402832, "incorrect_loss_per_char": 0.7082749803860983, "correct_loss_per_token": 1.638315200805664, "incorrect_loss_per_token": 1.4165499607721965, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0785423517227173, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": true, "logits_per_token": -1.0785423517227173, "logits_per_char": -0.5392711758613586, "num_chars": 2}, {"sum_logits": -1.1141421794891357, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": false, "logits_per_token": -1.1141421794891357, "logits_per_char": -0.5570710897445679, "num_chars": 2}, {"sum_logits": -2.0569653511047363, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": false, "logits_per_token": -2.0569653511047363, "logits_per_char": -1.0284826755523682, "num_chars": 2}, {"sum_logits": -1.638315200805664, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": false, "logits_per_token": -1.638315200805664, "logits_per_char": -0.819157600402832, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 193, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8234786987304688, "incorrect_loss_raw": 1.310355504353841, "correct_loss_per_char": 0.9117393493652344, "incorrect_loss_per_char": 0.6551777521769205, "correct_loss_per_token": 1.8234786987304688, "incorrect_loss_per_token": 1.310355504353841, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0530622005462646, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": true, "logits_per_token": -1.0530622005462646, "logits_per_char": -0.5265311002731323, "num_chars": 2}, {"sum_logits": -1.263390064239502, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -1.263390064239502, "logits_per_char": -0.631695032119751, "num_chars": 2}, {"sum_logits": -1.8234786987304688, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -1.8234786987304688, "logits_per_char": -0.9117393493652344, "num_chars": 2}, {"sum_logits": -1.6146142482757568, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -1.6146142482757568, "logits_per_char": -0.8073071241378784, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 194, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8185619115829468, "incorrect_loss_raw": 1.313118616739909, "correct_loss_per_char": 0.9092809557914734, "incorrect_loss_per_char": 0.6565593083699545, "correct_loss_per_token": 1.8185619115829468, "incorrect_loss_per_token": 1.313118616739909, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1320565938949585, "num_tokens": 1, "num_tokens_all": 1029, "is_greedy": true, "logits_per_token": -1.1320565938949585, "logits_per_char": -0.5660282969474792, "num_chars": 2}, {"sum_logits": -1.1472948789596558, "num_tokens": 1, "num_tokens_all": 1029, "is_greedy": false, "logits_per_token": -1.1472948789596558, "logits_per_char": -0.5736474394798279, "num_chars": 2}, {"sum_logits": -1.8185619115829468, "num_tokens": 1, "num_tokens_all": 1029, "is_greedy": false, "logits_per_token": -1.8185619115829468, "logits_per_char": -0.9092809557914734, "num_chars": 2}, {"sum_logits": -1.6600043773651123, "num_tokens": 1, "num_tokens_all": 1029, "is_greedy": false, "logits_per_token": -1.6600043773651123, "logits_per_char": -0.8300021886825562, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 195, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.54986572265625, "incorrect_loss_raw": 1.3984917799631755, "correct_loss_per_char": 0.774932861328125, "incorrect_loss_per_char": 0.6992458899815878, "correct_loss_per_token": 1.54986572265625, "incorrect_loss_per_token": 1.3984917799631755, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0753400325775146, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": true, "logits_per_token": -1.0753400325775146, "logits_per_char": -0.5376700162887573, "num_chars": 2}, {"sum_logits": -1.2882328033447266, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.2882328033447266, "logits_per_char": -0.6441164016723633, "num_chars": 2}, {"sum_logits": -1.8319025039672852, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.8319025039672852, "logits_per_char": -0.9159512519836426, "num_chars": 2}, {"sum_logits": -1.54986572265625, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.54986572265625, "logits_per_char": -0.774932861328125, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 196, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6141252517700195, "incorrect_loss_raw": 1.412932276725769, "correct_loss_per_char": 0.8070626258850098, "incorrect_loss_per_char": 0.7064661383628845, "correct_loss_per_token": 1.6141252517700195, "incorrect_loss_per_token": 1.412932276725769, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0321849584579468, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": true, "logits_per_token": -1.0321849584579468, "logits_per_char": -0.5160924792289734, "num_chars": 2}, {"sum_logits": -1.2060062885284424, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.2060062885284424, "logits_per_char": -0.6030031442642212, "num_chars": 2}, {"sum_logits": -2.000605583190918, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -2.000605583190918, "logits_per_char": -1.000302791595459, "num_chars": 2}, {"sum_logits": -1.6141252517700195, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.6141252517700195, "logits_per_char": -0.8070626258850098, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 197, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7495795488357544, "incorrect_loss_raw": 1.3022994995117188, "correct_loss_per_char": 0.8747897744178772, "incorrect_loss_per_char": 0.6511497497558594, "correct_loss_per_token": 1.7495795488357544, "incorrect_loss_per_token": 1.3022994995117188, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1899802684783936, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": true, "logits_per_token": -1.1899802684783936, "logits_per_char": -0.5949901342391968, "num_chars": 2}, {"sum_logits": -1.2893685102462769, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": false, "logits_per_token": -1.2893685102462769, "logits_per_char": -0.6446842551231384, "num_chars": 2}, {"sum_logits": -1.7495795488357544, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": false, "logits_per_token": -1.7495795488357544, "logits_per_char": -0.8747897744178772, "num_chars": 2}, {"sum_logits": -1.4275497198104858, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": false, "logits_per_token": -1.4275497198104858, "logits_per_char": -0.7137748599052429, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 198, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1345129013061523, "incorrect_loss_raw": 1.531334360440572, "correct_loss_per_char": 0.5672564506530762, "incorrect_loss_per_char": 0.765667180220286, "correct_loss_per_token": 1.1345129013061523, "incorrect_loss_per_token": 1.531334360440572, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2283549308776855, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": false, "logits_per_token": -1.2283549308776855, "logits_per_char": -0.6141774654388428, "num_chars": 2}, {"sum_logits": -1.1345129013061523, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": true, "logits_per_token": -1.1345129013061523, "logits_per_char": -0.5672564506530762, "num_chars": 2}, {"sum_logits": -1.8588358163833618, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": false, "logits_per_token": -1.8588358163833618, "logits_per_char": -0.9294179081916809, "num_chars": 2}, {"sum_logits": -1.506812334060669, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": false, "logits_per_token": -1.506812334060669, "logits_per_char": -0.7534061670303345, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 199, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6467320919036865, "incorrect_loss_raw": 1.3792612552642822, "correct_loss_per_char": 0.8233660459518433, "incorrect_loss_per_char": 0.6896306276321411, "correct_loss_per_token": 1.6467320919036865, "incorrect_loss_per_token": 1.3792612552642822, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0722157955169678, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": true, "logits_per_token": -1.0722157955169678, "logits_per_char": -0.5361078977584839, "num_chars": 2}, {"sum_logits": -1.2060626745224, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.2060626745224, "logits_per_char": -0.6030313372612, "num_chars": 2}, {"sum_logits": -1.859505295753479, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.859505295753479, "logits_per_char": -0.9297526478767395, "num_chars": 2}, {"sum_logits": -1.6467320919036865, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.6467320919036865, "logits_per_char": -0.8233660459518433, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 200, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1168146133422852, "incorrect_loss_raw": 1.5375666618347168, "correct_loss_per_char": 0.5584073066711426, "incorrect_loss_per_char": 0.7687833309173584, "correct_loss_per_token": 1.1168146133422852, "incorrect_loss_per_token": 1.5375666618347168, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1168146133422852, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": true, "logits_per_token": -1.1168146133422852, "logits_per_char": -0.5584073066711426, "num_chars": 2}, {"sum_logits": -1.2136259078979492, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.2136259078979492, "logits_per_char": -0.6068129539489746, "num_chars": 2}, {"sum_logits": -1.7573418617248535, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.7573418617248535, "logits_per_char": -0.8786709308624268, "num_chars": 2}, {"sum_logits": -1.6417322158813477, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.6417322158813477, "logits_per_char": -0.8208661079406738, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 201, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.185725212097168, "incorrect_loss_raw": 1.5089768568674724, "correct_loss_per_char": 0.592862606048584, "incorrect_loss_per_char": 0.7544884284337362, "correct_loss_per_token": 1.185725212097168, "incorrect_loss_per_token": 1.5089768568674724, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.185725212097168, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": true, "logits_per_token": -1.185725212097168, "logits_per_char": -0.592862606048584, "num_chars": 2}, {"sum_logits": -1.203389048576355, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.203389048576355, "logits_per_char": -0.6016945242881775, "num_chars": 2}, {"sum_logits": -1.8683751821517944, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.8683751821517944, "logits_per_char": -0.9341875910758972, "num_chars": 2}, {"sum_logits": -1.4551663398742676, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.4551663398742676, "logits_per_char": -0.7275831699371338, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 202, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.681657314300537, "incorrect_loss_raw": 1.325281023979187, "correct_loss_per_char": 0.8408286571502686, "incorrect_loss_per_char": 0.6626405119895935, "correct_loss_per_token": 1.681657314300537, "incorrect_loss_per_token": 1.325281023979187, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3844077587127686, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": false, "logits_per_token": -1.3844077587127686, "logits_per_char": -0.6922038793563843, "num_chars": 2}, {"sum_logits": -1.1154834032058716, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": true, "logits_per_token": -1.1154834032058716, "logits_per_char": -0.5577417016029358, "num_chars": 2}, {"sum_logits": -1.681657314300537, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": false, "logits_per_token": -1.681657314300537, "logits_per_char": -0.8408286571502686, "num_chars": 2}, {"sum_logits": -1.475951910018921, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": false, "logits_per_token": -1.475951910018921, "logits_per_char": -0.7379759550094604, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 203, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0600805282592773, "incorrect_loss_raw": 1.293401837348938, "correct_loss_per_char": 1.0300402641296387, "incorrect_loss_per_char": 0.646700918674469, "correct_loss_per_token": 2.0600805282592773, "incorrect_loss_per_token": 1.293401837348938, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.050269365310669, "num_tokens": 1, "num_tokens_all": 1098, "is_greedy": true, "logits_per_token": -1.050269365310669, "logits_per_char": -0.5251346826553345, "num_chars": 2}, {"sum_logits": -1.083984375, "num_tokens": 1, "num_tokens_all": 1098, "is_greedy": false, "logits_per_token": -1.083984375, "logits_per_char": -0.5419921875, "num_chars": 2}, {"sum_logits": -2.0600805282592773, "num_tokens": 1, "num_tokens_all": 1098, "is_greedy": false, "logits_per_token": -2.0600805282592773, "logits_per_char": -1.0300402641296387, "num_chars": 2}, {"sum_logits": -1.745951771736145, "num_tokens": 1, "num_tokens_all": 1098, "is_greedy": false, "logits_per_token": -1.745951771736145, "logits_per_char": -0.8729758858680725, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 204, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.260546088218689, "incorrect_loss_raw": 1.4980335235595703, "correct_loss_per_char": 0.6302730441093445, "incorrect_loss_per_char": 0.7490167617797852, "correct_loss_per_token": 1.260546088218689, "incorrect_loss_per_token": 1.4980335235595703, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0396672487258911, "num_tokens": 1, "num_tokens_all": 1074, "is_greedy": true, "logits_per_token": -1.0396672487258911, "logits_per_char": -0.5198336243629456, "num_chars": 2}, {"sum_logits": -1.260546088218689, "num_tokens": 1, "num_tokens_all": 1074, "is_greedy": false, "logits_per_token": -1.260546088218689, "logits_per_char": -0.6302730441093445, "num_chars": 2}, {"sum_logits": -1.7766971588134766, "num_tokens": 1, "num_tokens_all": 1074, "is_greedy": false, "logits_per_token": -1.7766971588134766, "logits_per_char": -0.8883485794067383, "num_chars": 2}, {"sum_logits": -1.6777361631393433, "num_tokens": 1, "num_tokens_all": 1074, "is_greedy": false, "logits_per_token": -1.6777361631393433, "logits_per_char": -0.8388680815696716, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 205, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6939854621887207, "incorrect_loss_raw": 1.3125391801198323, "correct_loss_per_char": 0.8469927310943604, "incorrect_loss_per_char": 0.6562695900599161, "correct_loss_per_token": 1.6939854621887207, "incorrect_loss_per_token": 1.3125391801198323, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.296189785003662, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": false, "logits_per_token": -1.296189785003662, "logits_per_char": -0.648094892501831, "num_chars": 2}, {"sum_logits": -1.2823138236999512, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": true, "logits_per_token": -1.2823138236999512, "logits_per_char": -0.6411569118499756, "num_chars": 2}, {"sum_logits": -1.6939854621887207, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": false, "logits_per_token": -1.6939854621887207, "logits_per_char": -0.8469927310943604, "num_chars": 2}, {"sum_logits": -1.3591139316558838, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": false, "logits_per_token": -1.3591139316558838, "logits_per_char": -0.6795569658279419, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 206, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6564980745315552, "incorrect_loss_raw": 1.3309472799301147, "correct_loss_per_char": 0.8282490372657776, "incorrect_loss_per_char": 0.6654736399650574, "correct_loss_per_token": 1.6564980745315552, "incorrect_loss_per_token": 1.3309472799301147, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1345181465148926, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": true, "logits_per_token": -1.1345181465148926, "logits_per_char": -0.5672590732574463, "num_chars": 2}, {"sum_logits": -1.4786722660064697, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.4786722660064697, "logits_per_char": -0.7393361330032349, "num_chars": 2}, {"sum_logits": -1.6564980745315552, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.6564980745315552, "logits_per_char": -0.8282490372657776, "num_chars": 2}, {"sum_logits": -1.379651427268982, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.379651427268982, "logits_per_char": -0.689825713634491, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 207, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4678471088409424, "incorrect_loss_raw": 1.4114714860916138, "correct_loss_per_char": 0.7339235544204712, "incorrect_loss_per_char": 0.7057357430458069, "correct_loss_per_token": 1.4678471088409424, "incorrect_loss_per_token": 1.4114714860916138, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1871956586837769, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": true, "logits_per_token": -1.1871956586837769, "logits_per_char": -0.5935978293418884, "num_chars": 2}, {"sum_logits": -1.22052800655365, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": false, "logits_per_token": -1.22052800655365, "logits_per_char": -0.610264003276825, "num_chars": 2}, {"sum_logits": -1.8266907930374146, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": false, "logits_per_token": -1.8266907930374146, "logits_per_char": -0.9133453965187073, "num_chars": 2}, {"sum_logits": -1.4678471088409424, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": false, "logits_per_token": -1.4678471088409424, "logits_per_char": -0.7339235544204712, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 208, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0323574542999268, "incorrect_loss_raw": 1.2666741212209065, "correct_loss_per_char": 1.0161787271499634, "incorrect_loss_per_char": 0.6333370606104533, "correct_loss_per_token": 2.0323574542999268, "incorrect_loss_per_token": 1.2666741212209065, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0601319074630737, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": true, "logits_per_token": -1.0601319074630737, "logits_per_char": -0.5300659537315369, "num_chars": 2}, {"sum_logits": -1.2310503721237183, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -1.2310503721237183, "logits_per_char": -0.6155251860618591, "num_chars": 2}, {"sum_logits": -2.0323574542999268, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -2.0323574542999268, "logits_per_char": -1.0161787271499634, "num_chars": 2}, {"sum_logits": -1.5088400840759277, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -1.5088400840759277, "logits_per_char": -0.7544200420379639, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 209, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.137577772140503, "incorrect_loss_raw": 1.5944043000539143, "correct_loss_per_char": 0.5687888860702515, "incorrect_loss_per_char": 0.7972021500269572, "correct_loss_per_token": 1.137577772140503, "incorrect_loss_per_token": 1.5944043000539143, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0222370624542236, "num_tokens": 1, "num_tokens_all": 1098, "is_greedy": true, "logits_per_token": -1.0222370624542236, "logits_per_char": -0.5111185312271118, "num_chars": 2}, {"sum_logits": -1.137577772140503, "num_tokens": 1, "num_tokens_all": 1098, "is_greedy": false, "logits_per_token": -1.137577772140503, "logits_per_char": -0.5687888860702515, "num_chars": 2}, {"sum_logits": -2.0221893787384033, "num_tokens": 1, "num_tokens_all": 1098, "is_greedy": false, "logits_per_token": -2.0221893787384033, "logits_per_char": -1.0110946893692017, "num_chars": 2}, {"sum_logits": -1.7387864589691162, "num_tokens": 1, "num_tokens_all": 1098, "is_greedy": false, "logits_per_token": -1.7387864589691162, "logits_per_char": -0.8693932294845581, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 210, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2581801414489746, "incorrect_loss_raw": 1.4838383595148723, "correct_loss_per_char": 0.6290900707244873, "incorrect_loss_per_char": 0.7419191797574362, "correct_loss_per_token": 1.2581801414489746, "incorrect_loss_per_token": 1.4838383595148723, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1499443054199219, "num_tokens": 1, "num_tokens_all": 1099, "is_greedy": true, "logits_per_token": -1.1499443054199219, "logits_per_char": -0.5749721527099609, "num_chars": 2}, {"sum_logits": -1.2581801414489746, "num_tokens": 1, "num_tokens_all": 1099, "is_greedy": false, "logits_per_token": -1.2581801414489746, "logits_per_char": -0.6290900707244873, "num_chars": 2}, {"sum_logits": -1.7992950677871704, "num_tokens": 1, "num_tokens_all": 1099, "is_greedy": false, "logits_per_token": -1.7992950677871704, "logits_per_char": -0.8996475338935852, "num_chars": 2}, {"sum_logits": -1.5022757053375244, "num_tokens": 1, "num_tokens_all": 1099, "is_greedy": false, "logits_per_token": -1.5022757053375244, "logits_per_char": -0.7511378526687622, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 211, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1134907007217407, "incorrect_loss_raw": 1.534751534461975, "correct_loss_per_char": 0.5567453503608704, "incorrect_loss_per_char": 0.7673757672309875, "correct_loss_per_token": 1.1134907007217407, "incorrect_loss_per_token": 1.534751534461975, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1134907007217407, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": true, "logits_per_token": -1.1134907007217407, "logits_per_char": -0.5567453503608704, "num_chars": 2}, {"sum_logits": -1.234352707862854, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": false, "logits_per_token": -1.234352707862854, "logits_per_char": -0.617176353931427, "num_chars": 2}, {"sum_logits": -1.7838317155838013, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": false, "logits_per_token": -1.7838317155838013, "logits_per_char": -0.8919158577919006, "num_chars": 2}, {"sum_logits": -1.58607017993927, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": false, "logits_per_token": -1.58607017993927, "logits_per_char": -0.793035089969635, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 212, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.393984317779541, "incorrect_loss_raw": 1.4089711904525757, "correct_loss_per_char": 0.6969921588897705, "incorrect_loss_per_char": 0.7044855952262878, "correct_loss_per_token": 1.393984317779541, "incorrect_loss_per_token": 1.4089711904525757, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1870970726013184, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": true, "logits_per_token": -1.1870970726013184, "logits_per_char": -0.5935485363006592, "num_chars": 2}, {"sum_logits": -1.4071942567825317, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -1.4071942567825317, "logits_per_char": -0.7035971283912659, "num_chars": 2}, {"sum_logits": -1.632622241973877, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -1.632622241973877, "logits_per_char": -0.8163111209869385, "num_chars": 2}, {"sum_logits": -1.393984317779541, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -1.393984317779541, "logits_per_char": -0.6969921588897705, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 213, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1014021635055542, "incorrect_loss_raw": 1.5529802242914836, "correct_loss_per_char": 0.5507010817527771, "incorrect_loss_per_char": 0.7764901121457418, "correct_loss_per_token": 1.1014021635055542, "incorrect_loss_per_token": 1.5529802242914836, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1014021635055542, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": true, "logits_per_token": -1.1014021635055542, "logits_per_char": -0.5507010817527771, "num_chars": 2}, {"sum_logits": -1.200583815574646, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.200583815574646, "logits_per_char": -0.600291907787323, "num_chars": 2}, {"sum_logits": -1.8572477102279663, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.8572477102279663, "logits_per_char": -0.9286238551139832, "num_chars": 2}, {"sum_logits": -1.6011091470718384, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.6011091470718384, "logits_per_char": -0.8005545735359192, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 214, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.847259759902954, "incorrect_loss_raw": 1.3028268416722615, "correct_loss_per_char": 0.923629879951477, "incorrect_loss_per_char": 0.6514134208361307, "correct_loss_per_token": 1.847259759902954, "incorrect_loss_per_token": 1.3028268416722615, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0776735544204712, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": true, "logits_per_token": -1.0776735544204712, "logits_per_char": -0.5388367772102356, "num_chars": 2}, {"sum_logits": -1.2676668167114258, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.2676668167114258, "logits_per_char": -0.6338334083557129, "num_chars": 2}, {"sum_logits": -1.847259759902954, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.847259759902954, "logits_per_char": -0.923629879951477, "num_chars": 2}, {"sum_logits": -1.5631401538848877, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.5631401538848877, "logits_per_char": -0.7815700769424438, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 215, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8726108074188232, "incorrect_loss_raw": 1.2882037957509358, "correct_loss_per_char": 0.9363054037094116, "incorrect_loss_per_char": 0.6441018978754679, "correct_loss_per_token": 1.8726108074188232, "incorrect_loss_per_token": 1.2882037957509358, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1741156578063965, "num_tokens": 1, "num_tokens_all": 1107, "is_greedy": false, "logits_per_token": -1.1741156578063965, "logits_per_char": -0.5870578289031982, "num_chars": 2}, {"sum_logits": -1.1710437536239624, "num_tokens": 1, "num_tokens_all": 1107, "is_greedy": true, "logits_per_token": -1.1710437536239624, "logits_per_char": -0.5855218768119812, "num_chars": 2}, {"sum_logits": -1.8726108074188232, "num_tokens": 1, "num_tokens_all": 1107, "is_greedy": false, "logits_per_token": -1.8726108074188232, "logits_per_char": -0.9363054037094116, "num_chars": 2}, {"sum_logits": -1.5194519758224487, "num_tokens": 1, "num_tokens_all": 1107, "is_greedy": false, "logits_per_token": -1.5194519758224487, "logits_per_char": -0.7597259879112244, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 216, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0315728187561035, "incorrect_loss_raw": 1.2808903455734253, "correct_loss_per_char": 1.0157864093780518, "incorrect_loss_per_char": 0.6404451727867126, "correct_loss_per_token": 2.0315728187561035, "incorrect_loss_per_token": 1.2808903455734253, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0002729892730713, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": true, "logits_per_token": -1.0002729892730713, "logits_per_char": -0.5001364946365356, "num_chars": 2}, {"sum_logits": -1.2116860151290894, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -1.2116860151290894, "logits_per_char": -0.6058430075645447, "num_chars": 2}, {"sum_logits": -2.0315728187561035, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -2.0315728187561035, "logits_per_char": -1.0157864093780518, "num_chars": 2}, {"sum_logits": -1.6307120323181152, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -1.6307120323181152, "logits_per_char": -0.8153560161590576, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 217, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9983675479888916, "incorrect_loss_raw": 1.278800368309021, "correct_loss_per_char": 0.9991837739944458, "incorrect_loss_per_char": 0.6394001841545105, "correct_loss_per_token": 1.9983675479888916, "incorrect_loss_per_token": 1.278800368309021, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0618311166763306, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": true, "logits_per_token": -1.0618311166763306, "logits_per_char": -0.5309155583381653, "num_chars": 2}, {"sum_logits": -1.1789922714233398, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.1789922714233398, "logits_per_char": -0.5894961357116699, "num_chars": 2}, {"sum_logits": -1.9983675479888916, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.9983675479888916, "logits_per_char": -0.9991837739944458, "num_chars": 2}, {"sum_logits": -1.5955777168273926, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.5955777168273926, "logits_per_char": -0.7977888584136963, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 218, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6185564994812012, "incorrect_loss_raw": 1.418580452601115, "correct_loss_per_char": 0.8092782497406006, "incorrect_loss_per_char": 0.7092902263005575, "correct_loss_per_token": 1.6185564994812012, "incorrect_loss_per_token": 1.418580452601115, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1162524223327637, "num_tokens": 1, "num_tokens_all": 1107, "is_greedy": false, "logits_per_token": -1.1162524223327637, "logits_per_char": -0.5581262111663818, "num_chars": 2}, {"sum_logits": -1.0862236022949219, "num_tokens": 1, "num_tokens_all": 1107, "is_greedy": true, "logits_per_token": -1.0862236022949219, "logits_per_char": -0.5431118011474609, "num_chars": 2}, {"sum_logits": -2.053265333175659, "num_tokens": 1, "num_tokens_all": 1107, "is_greedy": false, "logits_per_token": -2.053265333175659, "logits_per_char": -1.0266326665878296, "num_chars": 2}, {"sum_logits": -1.6185564994812012, "num_tokens": 1, "num_tokens_all": 1107, "is_greedy": false, "logits_per_token": -1.6185564994812012, "logits_per_char": -0.8092782497406006, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 219, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7093859910964966, "incorrect_loss_raw": 1.3686268329620361, "correct_loss_per_char": 0.8546929955482483, "incorrect_loss_per_char": 0.6843134164810181, "correct_loss_per_token": 1.7093859910964966, "incorrect_loss_per_token": 1.3686268329620361, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0033913850784302, "num_tokens": 1, "num_tokens_all": 1047, "is_greedy": true, "logits_per_token": -1.0033913850784302, "logits_per_char": -0.5016956925392151, "num_chars": 2}, {"sum_logits": -1.2449564933776855, "num_tokens": 1, "num_tokens_all": 1047, "is_greedy": false, "logits_per_token": -1.2449564933776855, "logits_per_char": -0.6224782466888428, "num_chars": 2}, {"sum_logits": -1.8575326204299927, "num_tokens": 1, "num_tokens_all": 1047, "is_greedy": false, "logits_per_token": -1.8575326204299927, "logits_per_char": -0.9287663102149963, "num_chars": 2}, {"sum_logits": -1.7093859910964966, "num_tokens": 1, "num_tokens_all": 1047, "is_greedy": false, "logits_per_token": -1.7093859910964966, "logits_per_char": -0.8546929955482483, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 220, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1637301445007324, "incorrect_loss_raw": 1.5185742775599163, "correct_loss_per_char": 0.5818650722503662, "incorrect_loss_per_char": 0.7592871387799581, "correct_loss_per_token": 1.1637301445007324, "incorrect_loss_per_token": 1.5185742775599163, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1637301445007324, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": true, "logits_per_token": -1.1637301445007324, "logits_per_char": -0.5818650722503662, "num_chars": 2}, {"sum_logits": -1.1727055311203003, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.1727055311203003, "logits_per_char": -0.5863527655601501, "num_chars": 2}, {"sum_logits": -1.8112003803253174, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.8112003803253174, "logits_per_char": -0.9056001901626587, "num_chars": 2}, {"sum_logits": -1.5718169212341309, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.5718169212341309, "logits_per_char": -0.7859084606170654, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 221, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5952486991882324, "incorrect_loss_raw": 1.354321002960205, "correct_loss_per_char": 0.7976243495941162, "incorrect_loss_per_char": 0.6771605014801025, "correct_loss_per_token": 1.5952486991882324, "incorrect_loss_per_token": 1.354321002960205, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2045834064483643, "num_tokens": 1, "num_tokens_all": 1026, "is_greedy": true, "logits_per_token": -1.2045834064483643, "logits_per_char": -0.6022917032241821, "num_chars": 2}, {"sum_logits": -1.2396655082702637, "num_tokens": 1, "num_tokens_all": 1026, "is_greedy": false, "logits_per_token": -1.2396655082702637, "logits_per_char": -0.6198327541351318, "num_chars": 2}, {"sum_logits": -1.6187140941619873, "num_tokens": 1, "num_tokens_all": 1026, "is_greedy": false, "logits_per_token": -1.6187140941619873, "logits_per_char": -0.8093570470809937, "num_chars": 2}, {"sum_logits": -1.5952486991882324, "num_tokens": 1, "num_tokens_all": 1026, "is_greedy": false, "logits_per_token": -1.5952486991882324, "logits_per_char": -0.7976243495941162, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 222, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.937620997428894, "incorrect_loss_raw": 1.2874727646509807, "correct_loss_per_char": 0.968810498714447, "incorrect_loss_per_char": 0.6437363823254904, "correct_loss_per_token": 1.937620997428894, "incorrect_loss_per_token": 1.2874727646509807, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0592988729476929, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": true, "logits_per_token": -1.0592988729476929, "logits_per_char": -0.5296494364738464, "num_chars": 2}, {"sum_logits": -1.2204824686050415, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.2204824686050415, "logits_per_char": -0.6102412343025208, "num_chars": 2}, {"sum_logits": -1.937620997428894, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.937620997428894, "logits_per_char": -0.968810498714447, "num_chars": 2}, {"sum_logits": -1.5826369524002075, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.5826369524002075, "logits_per_char": -0.7913184762001038, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 223, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6306971311569214, "incorrect_loss_raw": 1.400315801302592, "correct_loss_per_char": 0.8153485655784607, "incorrect_loss_per_char": 0.700157900651296, "correct_loss_per_token": 1.6306971311569214, "incorrect_loss_per_token": 1.400315801302592, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0024217367172241, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": true, "logits_per_token": -1.0024217367172241, "logits_per_char": -0.5012108683586121, "num_chars": 2}, {"sum_logits": -1.2564119100570679, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.2564119100570679, "logits_per_char": -0.6282059550285339, "num_chars": 2}, {"sum_logits": -1.9421137571334839, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.9421137571334839, "logits_per_char": -0.9710568785667419, "num_chars": 2}, {"sum_logits": -1.6306971311569214, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.6306971311569214, "logits_per_char": -0.8153485655784607, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 224, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2530543804168701, "incorrect_loss_raw": 1.5183158715565999, "correct_loss_per_char": 0.6265271902084351, "incorrect_loss_per_char": 0.7591579357782999, "correct_loss_per_token": 1.2530543804168701, "incorrect_loss_per_token": 1.5183158715565999, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0112526416778564, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": true, "logits_per_token": -1.0112526416778564, "logits_per_char": -0.5056263208389282, "num_chars": 2}, {"sum_logits": -1.2530543804168701, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.2530543804168701, "logits_per_char": -0.6265271902084351, "num_chars": 2}, {"sum_logits": -1.8379578590393066, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.8379578590393066, "logits_per_char": -0.9189789295196533, "num_chars": 2}, {"sum_logits": -1.7057371139526367, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.7057371139526367, "logits_per_char": -0.8528685569763184, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 225, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6036626100540161, "incorrect_loss_raw": 1.3802273670832317, "correct_loss_per_char": 0.8018313050270081, "incorrect_loss_per_char": 0.6901136835416158, "correct_loss_per_token": 1.6036626100540161, "incorrect_loss_per_token": 1.3802273670832317, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.146338701248169, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": true, "logits_per_token": -1.146338701248169, "logits_per_char": -0.5731693506240845, "num_chars": 2}, {"sum_logits": -1.1614484786987305, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.1614484786987305, "logits_per_char": -0.5807242393493652, "num_chars": 2}, {"sum_logits": -1.8328949213027954, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.8328949213027954, "logits_per_char": -0.9164474606513977, "num_chars": 2}, {"sum_logits": -1.6036626100540161, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.6036626100540161, "logits_per_char": -0.8018313050270081, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 226, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.218759298324585, "incorrect_loss_raw": 1.4651616017023723, "correct_loss_per_char": 0.6093796491622925, "incorrect_loss_per_char": 0.7325808008511862, "correct_loss_per_token": 1.218759298324585, "incorrect_loss_per_token": 1.4651616017023723, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.218759298324585, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": true, "logits_per_token": -1.218759298324585, "logits_per_char": -0.6093796491622925, "num_chars": 2}, {"sum_logits": -1.35683274269104, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.35683274269104, "logits_per_char": -0.67841637134552, "num_chars": 2}, {"sum_logits": -1.6260130405426025, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.6260130405426025, "logits_per_char": -0.8130065202713013, "num_chars": 2}, {"sum_logits": -1.4126390218734741, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.4126390218734741, "logits_per_char": -0.7063195109367371, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 227, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3600279092788696, "incorrect_loss_raw": 1.437761902809143, "correct_loss_per_char": 0.6800139546394348, "incorrect_loss_per_char": 0.7188809514045715, "correct_loss_per_token": 1.3600279092788696, "incorrect_loss_per_token": 1.437761902809143, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2768425941467285, "num_tokens": 1, "num_tokens_all": 1098, "is_greedy": false, "logits_per_token": -1.2768425941467285, "logits_per_char": -0.6384212970733643, "num_chars": 2}, {"sum_logits": -1.2457653284072876, "num_tokens": 1, "num_tokens_all": 1098, "is_greedy": true, "logits_per_token": -1.2457653284072876, "logits_per_char": -0.6228826642036438, "num_chars": 2}, {"sum_logits": -1.790677785873413, "num_tokens": 1, "num_tokens_all": 1098, "is_greedy": false, "logits_per_token": -1.790677785873413, "logits_per_char": -0.8953388929367065, "num_chars": 2}, {"sum_logits": -1.3600279092788696, "num_tokens": 1, "num_tokens_all": 1098, "is_greedy": false, "logits_per_token": -1.3600279092788696, "logits_per_char": -0.6800139546394348, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 228, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.524983286857605, "incorrect_loss_raw": 1.36273189385732, "correct_loss_per_char": 0.7624916434288025, "incorrect_loss_per_char": 0.68136594692866, "correct_loss_per_token": 1.524983286857605, "incorrect_loss_per_token": 1.36273189385732, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3411784172058105, "num_tokens": 1, "num_tokens_all": 985, "is_greedy": false, "logits_per_token": -1.3411784172058105, "logits_per_char": -0.6705892086029053, "num_chars": 2}, {"sum_logits": -1.2126046419143677, "num_tokens": 1, "num_tokens_all": 985, "is_greedy": true, "logits_per_token": -1.2126046419143677, "logits_per_char": -0.6063023209571838, "num_chars": 2}, {"sum_logits": -1.524983286857605, "num_tokens": 1, "num_tokens_all": 985, "is_greedy": false, "logits_per_token": -1.524983286857605, "logits_per_char": -0.7624916434288025, "num_chars": 2}, {"sum_logits": -1.5344126224517822, "num_tokens": 1, "num_tokens_all": 985, "is_greedy": false, "logits_per_token": -1.5344126224517822, "logits_per_char": -0.7672063112258911, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 229, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2261648178100586, "incorrect_loss_raw": 1.464470624923706, "correct_loss_per_char": 0.6130824089050293, "incorrect_loss_per_char": 0.732235312461853, "correct_loss_per_token": 1.2261648178100586, "incorrect_loss_per_token": 1.464470624923706, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3218164443969727, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": false, "logits_per_token": -1.3218164443969727, "logits_per_char": -0.6609082221984863, "num_chars": 2}, {"sum_logits": -1.2261648178100586, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": true, "logits_per_token": -1.2261648178100586, "logits_per_char": -0.6130824089050293, "num_chars": 2}, {"sum_logits": -1.6372652053833008, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": false, "logits_per_token": -1.6372652053833008, "logits_per_char": -0.8186326026916504, "num_chars": 2}, {"sum_logits": -1.4343302249908447, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": false, "logits_per_token": -1.4343302249908447, "logits_per_char": -0.7171651124954224, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 230, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.057219982147217, "incorrect_loss_raw": 1.2749541997909546, "correct_loss_per_char": 1.0286099910736084, "incorrect_loss_per_char": 0.6374770998954773, "correct_loss_per_token": 2.057219982147217, "incorrect_loss_per_token": 1.2749541997909546, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0424494743347168, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": true, "logits_per_token": -1.0424494743347168, "logits_per_char": -0.5212247371673584, "num_chars": 2}, {"sum_logits": -1.1651158332824707, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": false, "logits_per_token": -1.1651158332824707, "logits_per_char": -0.5825579166412354, "num_chars": 2}, {"sum_logits": -2.057219982147217, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": false, "logits_per_token": -2.057219982147217, "logits_per_char": -1.0286099910736084, "num_chars": 2}, {"sum_logits": -1.6172972917556763, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": false, "logits_per_token": -1.6172972917556763, "logits_per_char": -0.8086486458778381, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 231, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.158963918685913, "incorrect_loss_raw": 1.5534497102101643, "correct_loss_per_char": 0.5794819593429565, "incorrect_loss_per_char": 0.7767248551050822, "correct_loss_per_token": 1.158963918685913, "incorrect_loss_per_token": 1.5534497102101643, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0793548822402954, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": true, "logits_per_token": -1.0793548822402954, "logits_per_char": -0.5396774411201477, "num_chars": 2}, {"sum_logits": -1.158963918685913, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.158963918685913, "logits_per_char": -0.5794819593429565, "num_chars": 2}, {"sum_logits": -1.9603803157806396, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.9603803157806396, "logits_per_char": -0.9801901578903198, "num_chars": 2}, {"sum_logits": -1.620613932609558, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.620613932609558, "logits_per_char": -0.810306966304779, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 232, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.696742057800293, "incorrect_loss_raw": 1.315838138262431, "correct_loss_per_char": 0.8483710289001465, "incorrect_loss_per_char": 0.6579190691312155, "correct_loss_per_token": 1.696742057800293, "incorrect_loss_per_token": 1.315838138262431, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1694309711456299, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": true, "logits_per_token": -1.1694309711456299, "logits_per_char": -0.5847154855728149, "num_chars": 2}, {"sum_logits": -1.4482662677764893, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": false, "logits_per_token": -1.4482662677764893, "logits_per_char": -0.7241331338882446, "num_chars": 2}, {"sum_logits": -1.696742057800293, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": false, "logits_per_token": -1.696742057800293, "logits_per_char": -0.8483710289001465, "num_chars": 2}, {"sum_logits": -1.3298171758651733, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": false, "logits_per_token": -1.3298171758651733, "logits_per_char": -0.6649085879325867, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 233, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4343457221984863, "incorrect_loss_raw": 1.4127203226089478, "correct_loss_per_char": 0.7171728610992432, "incorrect_loss_per_char": 0.7063601613044739, "correct_loss_per_token": 1.4343457221984863, "incorrect_loss_per_token": 1.4127203226089478, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.08793044090271, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -1.08793044090271, "logits_per_char": -0.543965220451355, "num_chars": 2}, {"sum_logits": -1.4506134986877441, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.4506134986877441, "logits_per_char": -0.7253067493438721, "num_chars": 2}, {"sum_logits": -1.6996170282363892, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.6996170282363892, "logits_per_char": -0.8498085141181946, "num_chars": 2}, {"sum_logits": -1.4343457221984863, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.4343457221984863, "logits_per_char": -0.7171728610992432, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 234, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1109097003936768, "incorrect_loss_raw": 1.5469728310902913, "correct_loss_per_char": 0.5554548501968384, "incorrect_loss_per_char": 0.7734864155451456, "correct_loss_per_token": 1.1109097003936768, "incorrect_loss_per_token": 1.5469728310902913, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1109097003936768, "num_tokens": 1, "num_tokens_all": 1057, "is_greedy": true, "logits_per_token": -1.1109097003936768, "logits_per_char": -0.5554548501968384, "num_chars": 2}, {"sum_logits": -1.1872611045837402, "num_tokens": 1, "num_tokens_all": 1057, "is_greedy": false, "logits_per_token": -1.1872611045837402, "logits_per_char": -0.5936305522918701, "num_chars": 2}, {"sum_logits": -1.8234833478927612, "num_tokens": 1, "num_tokens_all": 1057, "is_greedy": false, "logits_per_token": -1.8234833478927612, "logits_per_char": -0.9117416739463806, "num_chars": 2}, {"sum_logits": -1.6301740407943726, "num_tokens": 1, "num_tokens_all": 1057, "is_greedy": false, "logits_per_token": -1.6301740407943726, "logits_per_char": -0.8150870203971863, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 235, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9223957061767578, "incorrect_loss_raw": 1.3115925192832947, "correct_loss_per_char": 0.9611978530883789, "incorrect_loss_per_char": 0.6557962596416473, "correct_loss_per_token": 1.9223957061767578, "incorrect_loss_per_token": 1.3115925192832947, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9823072552680969, "num_tokens": 1, "num_tokens_all": 1041, "is_greedy": true, "logits_per_token": -0.9823072552680969, "logits_per_char": -0.49115362763404846, "num_chars": 2}, {"sum_logits": -1.2328650951385498, "num_tokens": 1, "num_tokens_all": 1041, "is_greedy": false, "logits_per_token": -1.2328650951385498, "logits_per_char": -0.6164325475692749, "num_chars": 2}, {"sum_logits": -1.9223957061767578, "num_tokens": 1, "num_tokens_all": 1041, "is_greedy": false, "logits_per_token": -1.9223957061767578, "logits_per_char": -0.9611978530883789, "num_chars": 2}, {"sum_logits": -1.7196052074432373, "num_tokens": 1, "num_tokens_all": 1041, "is_greedy": false, "logits_per_token": -1.7196052074432373, "logits_per_char": -0.8598026037216187, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 236, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2432399988174438, "incorrect_loss_raw": 1.522537390391032, "correct_loss_per_char": 0.6216199994087219, "incorrect_loss_per_char": 0.761268695195516, "correct_loss_per_token": 1.2432399988174438, "incorrect_loss_per_token": 1.522537390391032, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0266631841659546, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": true, "logits_per_token": -1.0266631841659546, "logits_per_char": -0.5133315920829773, "num_chars": 2}, {"sum_logits": -1.2432399988174438, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": false, "logits_per_token": -1.2432399988174438, "logits_per_char": -0.6216199994087219, "num_chars": 2}, {"sum_logits": -1.9221444129943848, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": false, "logits_per_token": -1.9221444129943848, "logits_per_char": -0.9610722064971924, "num_chars": 2}, {"sum_logits": -1.6188045740127563, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": false, "logits_per_token": -1.6188045740127563, "logits_per_char": -0.8094022870063782, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 237, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.991492748260498, "incorrect_loss_raw": 1.2754440704981487, "correct_loss_per_char": 0.995746374130249, "incorrect_loss_per_char": 0.6377220352490743, "correct_loss_per_token": 1.991492748260498, "incorrect_loss_per_token": 1.2754440704981487, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.110439419746399, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": true, "logits_per_token": -1.110439419746399, "logits_per_char": -0.5552197098731995, "num_chars": 2}, {"sum_logits": -1.1591930389404297, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -1.1591930389404297, "logits_per_char": -0.5795965194702148, "num_chars": 2}, {"sum_logits": -1.991492748260498, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -1.991492748260498, "logits_per_char": -0.995746374130249, "num_chars": 2}, {"sum_logits": -1.5566997528076172, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -1.5566997528076172, "logits_per_char": -0.7783498764038086, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 238, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.128341555595398, "incorrect_loss_raw": 1.564801534016927, "correct_loss_per_char": 0.564170777797699, "incorrect_loss_per_char": 0.7824007670084635, "correct_loss_per_token": 1.128341555595398, "incorrect_loss_per_token": 1.564801534016927, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.128341555595398, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": false, "logits_per_token": -1.128341555595398, "logits_per_char": -0.564170777797699, "num_chars": 2}, {"sum_logits": -1.0973221063613892, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": true, "logits_per_token": -1.0973221063613892, "logits_per_char": -0.5486610531806946, "num_chars": 2}, {"sum_logits": -1.9257783889770508, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": false, "logits_per_token": -1.9257783889770508, "logits_per_char": -0.9628891944885254, "num_chars": 2}, {"sum_logits": -1.6713041067123413, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": false, "logits_per_token": -1.6713041067123413, "logits_per_char": -0.8356520533561707, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 239, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.638737678527832, "incorrect_loss_raw": 1.3966307242711384, "correct_loss_per_char": 0.819368839263916, "incorrect_loss_per_char": 0.6983153621355692, "correct_loss_per_token": 1.638737678527832, "incorrect_loss_per_token": 1.3966307242711384, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0701377391815186, "num_tokens": 1, "num_tokens_all": 1103, "is_greedy": true, "logits_per_token": -1.0701377391815186, "logits_per_char": -0.5350688695907593, "num_chars": 2}, {"sum_logits": -1.164524793624878, "num_tokens": 1, "num_tokens_all": 1103, "is_greedy": false, "logits_per_token": -1.164524793624878, "logits_per_char": -0.582262396812439, "num_chars": 2}, {"sum_logits": -1.955229640007019, "num_tokens": 1, "num_tokens_all": 1103, "is_greedy": false, "logits_per_token": -1.955229640007019, "logits_per_char": -0.9776148200035095, "num_chars": 2}, {"sum_logits": -1.638737678527832, "num_tokens": 1, "num_tokens_all": 1103, "is_greedy": false, "logits_per_token": -1.638737678527832, "logits_per_char": -0.819368839263916, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 240, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9815998077392578, "incorrect_loss_raw": 1.3078942100207012, "correct_loss_per_char": 0.9907999038696289, "incorrect_loss_per_char": 0.6539471050103506, "correct_loss_per_token": 1.9815998077392578, "incorrect_loss_per_token": 1.3078942100207012, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9470321536064148, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": true, "logits_per_token": -0.9470321536064148, "logits_per_char": -0.4735160768032074, "num_chars": 2}, {"sum_logits": -1.2562456130981445, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.2562456130981445, "logits_per_char": -0.6281228065490723, "num_chars": 2}, {"sum_logits": -1.9815998077392578, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.9815998077392578, "logits_per_char": -0.9907999038696289, "num_chars": 2}, {"sum_logits": -1.720404863357544, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.720404863357544, "logits_per_char": -0.860202431678772, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 241, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.839489221572876, "incorrect_loss_raw": 1.304561694463094, "correct_loss_per_char": 0.919744610786438, "incorrect_loss_per_char": 0.652280847231547, "correct_loss_per_token": 1.839489221572876, "incorrect_loss_per_token": 1.304561694463094, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0506535768508911, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": true, "logits_per_token": -1.0506535768508911, "logits_per_char": -0.5253267884254456, "num_chars": 2}, {"sum_logits": -1.2814453840255737, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.2814453840255737, "logits_per_char": -0.6407226920127869, "num_chars": 2}, {"sum_logits": -1.839489221572876, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.839489221572876, "logits_per_char": -0.919744610786438, "num_chars": 2}, {"sum_logits": -1.5815861225128174, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.5815861225128174, "logits_per_char": -0.7907930612564087, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 242, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.039757490158081, "incorrect_loss_raw": 1.588619629542033, "correct_loss_per_char": 0.5198787450790405, "incorrect_loss_per_char": 0.7943098147710165, "correct_loss_per_token": 1.039757490158081, "incorrect_loss_per_token": 1.588619629542033, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.039757490158081, "num_tokens": 1, "num_tokens_all": 1059, "is_greedy": true, "logits_per_token": -1.039757490158081, "logits_per_char": -0.5198787450790405, "num_chars": 2}, {"sum_logits": -1.2132487297058105, "num_tokens": 1, "num_tokens_all": 1059, "is_greedy": false, "logits_per_token": -1.2132487297058105, "logits_per_char": -0.6066243648529053, "num_chars": 2}, {"sum_logits": -1.8459868431091309, "num_tokens": 1, "num_tokens_all": 1059, "is_greedy": false, "logits_per_token": -1.8459868431091309, "logits_per_char": -0.9229934215545654, "num_chars": 2}, {"sum_logits": -1.7066233158111572, "num_tokens": 1, "num_tokens_all": 1059, "is_greedy": false, "logits_per_token": -1.7066233158111572, "logits_per_char": -0.8533116579055786, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 243, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9826457500457764, "incorrect_loss_raw": 1.277384916941325, "correct_loss_per_char": 0.9913228750228882, "incorrect_loss_per_char": 0.6386924584706625, "correct_loss_per_token": 1.9826457500457764, "incorrect_loss_per_token": 1.277384916941325, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0334725379943848, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": true, "logits_per_token": -1.0334725379943848, "logits_per_char": -0.5167362689971924, "num_chars": 2}, {"sum_logits": -1.2607970237731934, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.2607970237731934, "logits_per_char": -0.6303985118865967, "num_chars": 2}, {"sum_logits": -1.9826457500457764, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.9826457500457764, "logits_per_char": -0.9913228750228882, "num_chars": 2}, {"sum_logits": -1.5378851890563965, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.5378851890563965, "logits_per_char": -0.7689425945281982, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 244, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4367836713790894, "incorrect_loss_raw": 1.406397819519043, "correct_loss_per_char": 0.7183918356895447, "incorrect_loss_per_char": 0.7031989097595215, "correct_loss_per_token": 1.4367836713790894, "incorrect_loss_per_token": 1.406397819519043, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1648331880569458, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": true, "logits_per_token": -1.1648331880569458, "logits_per_char": -0.5824165940284729, "num_chars": 2}, {"sum_logits": -1.3286622762680054, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.3286622762680054, "logits_per_char": -0.6643311381340027, "num_chars": 2}, {"sum_logits": -1.7256979942321777, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.7256979942321777, "logits_per_char": -0.8628489971160889, "num_chars": 2}, {"sum_logits": -1.4367836713790894, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.4367836713790894, "logits_per_char": -0.7183918356895447, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 245, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9578646421432495, "incorrect_loss_raw": 1.2895392576853435, "correct_loss_per_char": 0.9789323210716248, "incorrect_loss_per_char": 0.6447696288426717, "correct_loss_per_token": 1.9578646421432495, "incorrect_loss_per_token": 1.2895392576853435, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1203147172927856, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -1.1203147172927856, "logits_per_char": -0.5601573586463928, "num_chars": 2}, {"sum_logits": -1.1073191165924072, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": true, "logits_per_token": -1.1073191165924072, "logits_per_char": -0.5536595582962036, "num_chars": 2}, {"sum_logits": -1.9578646421432495, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -1.9578646421432495, "logits_per_char": -0.9789323210716248, "num_chars": 2}, {"sum_logits": -1.6409839391708374, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -1.6409839391708374, "logits_per_char": -0.8204919695854187, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 246, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2660210132598877, "incorrect_loss_raw": 1.5209523836771648, "correct_loss_per_char": 0.6330105066299438, "incorrect_loss_per_char": 0.7604761918385824, "correct_loss_per_token": 1.2660210132598877, "incorrect_loss_per_token": 1.5209523836771648, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.99849534034729, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": true, "logits_per_token": -0.99849534034729, "logits_per_char": -0.499247670173645, "num_chars": 2}, {"sum_logits": -1.2660210132598877, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": false, "logits_per_token": -1.2660210132598877, "logits_per_char": -0.6330105066299438, "num_chars": 2}, {"sum_logits": -1.9374825954437256, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": false, "logits_per_token": -1.9374825954437256, "logits_per_char": -0.9687412977218628, "num_chars": 2}, {"sum_logits": -1.6268792152404785, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": false, "logits_per_token": -1.6268792152404785, "logits_per_char": -0.8134396076202393, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 247, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6637715101242065, "incorrect_loss_raw": 1.3294874429702759, "correct_loss_per_char": 0.8318857550621033, "incorrect_loss_per_char": 0.6647437214851379, "correct_loss_per_token": 1.6637715101242065, "incorrect_loss_per_token": 1.3294874429702759, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2291233539581299, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": false, "logits_per_token": -1.2291233539581299, "logits_per_char": -0.6145616769790649, "num_chars": 2}, {"sum_logits": -1.2208653688430786, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": true, "logits_per_token": -1.2208653688430786, "logits_per_char": -0.6104326844215393, "num_chars": 2}, {"sum_logits": -1.6637715101242065, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": false, "logits_per_token": -1.6637715101242065, "logits_per_char": -0.8318857550621033, "num_chars": 2}, {"sum_logits": -1.5384736061096191, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": false, "logits_per_token": -1.5384736061096191, "logits_per_char": -0.7692368030548096, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 248, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0618482828140259, "incorrect_loss_raw": 1.5952194134394329, "correct_loss_per_char": 0.5309241414070129, "incorrect_loss_per_char": 0.7976097067197164, "correct_loss_per_token": 1.0618482828140259, "incorrect_loss_per_token": 1.5952194134394329, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0618482828140259, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": true, "logits_per_token": -1.0618482828140259, "logits_per_char": -0.5309241414070129, "num_chars": 2}, {"sum_logits": -1.1742838621139526, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.1742838621139526, "logits_per_char": -0.5871419310569763, "num_chars": 2}, {"sum_logits": -2.0387730598449707, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -2.0387730598449707, "logits_per_char": -1.0193865299224854, "num_chars": 2}, {"sum_logits": -1.572601318359375, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.572601318359375, "logits_per_char": -0.7863006591796875, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 249, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1967462301254272, "incorrect_loss_raw": 1.519890268643697, "correct_loss_per_char": 0.5983731150627136, "incorrect_loss_per_char": 0.7599451343218485, "correct_loss_per_token": 1.1967462301254272, "incorrect_loss_per_token": 1.519890268643697, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1349570751190186, "num_tokens": 1, "num_tokens_all": 1107, "is_greedy": true, "logits_per_token": -1.1349570751190186, "logits_per_char": -0.5674785375595093, "num_chars": 2}, {"sum_logits": -1.1967462301254272, "num_tokens": 1, "num_tokens_all": 1107, "is_greedy": false, "logits_per_token": -1.1967462301254272, "logits_per_char": -0.5983731150627136, "num_chars": 2}, {"sum_logits": -1.8815248012542725, "num_tokens": 1, "num_tokens_all": 1107, "is_greedy": false, "logits_per_token": -1.8815248012542725, "logits_per_char": -0.9407624006271362, "num_chars": 2}, {"sum_logits": -1.5431889295578003, "num_tokens": 1, "num_tokens_all": 1107, "is_greedy": false, "logits_per_token": -1.5431889295578003, "logits_per_char": -0.7715944647789001, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 250, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0745238065719604, "incorrect_loss_raw": 1.5605826377868652, "correct_loss_per_char": 0.5372619032859802, "incorrect_loss_per_char": 0.7802913188934326, "correct_loss_per_token": 1.0745238065719604, "incorrect_loss_per_token": 1.5605826377868652, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0745238065719604, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": true, "logits_per_token": -1.0745238065719604, "logits_per_char": -0.5372619032859802, "num_chars": 2}, {"sum_logits": -1.2045636177062988, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.2045636177062988, "logits_per_char": -0.6022818088531494, "num_chars": 2}, {"sum_logits": -1.7411320209503174, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.7411320209503174, "logits_per_char": -0.8705660104751587, "num_chars": 2}, {"sum_logits": -1.7360522747039795, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.7360522747039795, "logits_per_char": -0.8680261373519897, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 251, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0988214015960693, "incorrect_loss_raw": 1.5540355443954468, "correct_loss_per_char": 0.5494107007980347, "incorrect_loss_per_char": 0.7770177721977234, "correct_loss_per_token": 1.0988214015960693, "incorrect_loss_per_token": 1.5540355443954468, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0988214015960693, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": true, "logits_per_token": -1.0988214015960693, "logits_per_char": -0.5494107007980347, "num_chars": 2}, {"sum_logits": -1.2126880884170532, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.2126880884170532, "logits_per_char": -0.6063440442085266, "num_chars": 2}, {"sum_logits": -1.9105827808380127, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.9105827808380127, "logits_per_char": -0.9552913904190063, "num_chars": 2}, {"sum_logits": -1.5388357639312744, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.5388357639312744, "logits_per_char": -0.7694178819656372, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 252, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1251606941223145, "incorrect_loss_raw": 1.550191322962443, "correct_loss_per_char": 0.5625803470611572, "incorrect_loss_per_char": 0.7750956614812216, "correct_loss_per_token": 1.1251606941223145, "incorrect_loss_per_token": 1.550191322962443, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1251606941223145, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": true, "logits_per_token": -1.1251606941223145, "logits_per_char": -0.5625803470611572, "num_chars": 2}, {"sum_logits": -1.197522759437561, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.197522759437561, "logits_per_char": -0.5987613797187805, "num_chars": 2}, {"sum_logits": -1.9529647827148438, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.9529647827148438, "logits_per_char": -0.9764823913574219, "num_chars": 2}, {"sum_logits": -1.5000864267349243, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.5000864267349243, "logits_per_char": -0.7500432133674622, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 253, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7190699577331543, "incorrect_loss_raw": 1.4053324063618977, "correct_loss_per_char": 0.8595349788665771, "incorrect_loss_per_char": 0.7026662031809489, "correct_loss_per_token": 1.7190699577331543, "incorrect_loss_per_token": 1.4053324063618977, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9389266967773438, "num_tokens": 1, "num_tokens_all": 1127, "is_greedy": true, "logits_per_token": -0.9389266967773438, "logits_per_char": -0.4694633483886719, "num_chars": 2}, {"sum_logits": -1.2382326126098633, "num_tokens": 1, "num_tokens_all": 1127, "is_greedy": false, "logits_per_token": -1.2382326126098633, "logits_per_char": -0.6191163063049316, "num_chars": 2}, {"sum_logits": -2.0388379096984863, "num_tokens": 1, "num_tokens_all": 1127, "is_greedy": false, "logits_per_token": -2.0388379096984863, "logits_per_char": -1.0194189548492432, "num_chars": 2}, {"sum_logits": -1.7190699577331543, "num_tokens": 1, "num_tokens_all": 1127, "is_greedy": false, "logits_per_token": -1.7190699577331543, "logits_per_char": -0.8595349788665771, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 254, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5839132070541382, "incorrect_loss_raw": 1.3926429748535156, "correct_loss_per_char": 0.7919566035270691, "incorrect_loss_per_char": 0.6963214874267578, "correct_loss_per_token": 1.5839132070541382, "incorrect_loss_per_token": 1.3926429748535156, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0996659994125366, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": true, "logits_per_token": -1.0996659994125366, "logits_per_char": -0.5498329997062683, "num_chars": 2}, {"sum_logits": -1.2048803567886353, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.2048803567886353, "logits_per_char": -0.6024401783943176, "num_chars": 2}, {"sum_logits": -1.873382568359375, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.873382568359375, "logits_per_char": -0.9366912841796875, "num_chars": 2}, {"sum_logits": -1.5839132070541382, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.5839132070541382, "logits_per_char": -0.7919566035270691, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 255, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0434036254882812, "incorrect_loss_raw": 1.2843741575876872, "correct_loss_per_char": 1.0217018127441406, "incorrect_loss_per_char": 0.6421870787938436, "correct_loss_per_token": 2.0434036254882812, "incorrect_loss_per_token": 1.2843741575876872, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0202131271362305, "num_tokens": 1, "num_tokens_all": 1130, "is_greedy": true, "logits_per_token": -1.0202131271362305, "logits_per_char": -0.5101065635681152, "num_chars": 2}, {"sum_logits": -1.1698532104492188, "num_tokens": 1, "num_tokens_all": 1130, "is_greedy": false, "logits_per_token": -1.1698532104492188, "logits_per_char": -0.5849266052246094, "num_chars": 2}, {"sum_logits": -2.0434036254882812, "num_tokens": 1, "num_tokens_all": 1130, "is_greedy": false, "logits_per_token": -2.0434036254882812, "logits_per_char": -1.0217018127441406, "num_chars": 2}, {"sum_logits": -1.6630561351776123, "num_tokens": 1, "num_tokens_all": 1130, "is_greedy": false, "logits_per_token": -1.6630561351776123, "logits_per_char": -0.8315280675888062, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 256, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5306752920150757, "incorrect_loss_raw": 1.396073579788208, "correct_loss_per_char": 0.7653376460075378, "incorrect_loss_per_char": 0.698036789894104, "correct_loss_per_token": 1.5306752920150757, "incorrect_loss_per_token": 1.396073579788208, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2574810981750488, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.2574810981750488, "logits_per_char": -0.6287405490875244, "num_chars": 2}, {"sum_logits": -1.1091071367263794, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": true, "logits_per_token": -1.1091071367263794, "logits_per_char": -0.5545535683631897, "num_chars": 2}, {"sum_logits": -1.8216325044631958, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.8216325044631958, "logits_per_char": -0.9108162522315979, "num_chars": 2}, {"sum_logits": -1.5306752920150757, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.5306752920150757, "logits_per_char": -0.7653376460075378, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 257, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.170689344406128, "incorrect_loss_raw": 1.5018529494603474, "correct_loss_per_char": 0.585344672203064, "incorrect_loss_per_char": 0.7509264747301737, "correct_loss_per_token": 1.170689344406128, "incorrect_loss_per_token": 1.5018529494603474, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.170689344406128, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": true, "logits_per_token": -1.170689344406128, "logits_per_char": -0.585344672203064, "num_chars": 2}, {"sum_logits": -1.2118196487426758, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": false, "logits_per_token": -1.2118196487426758, "logits_per_char": -0.6059098243713379, "num_chars": 2}, {"sum_logits": -1.6556074619293213, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": false, "logits_per_token": -1.6556074619293213, "logits_per_char": -0.8278037309646606, "num_chars": 2}, {"sum_logits": -1.6381317377090454, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": false, "logits_per_token": -1.6381317377090454, "logits_per_char": -0.8190658688545227, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 258, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1460959911346436, "incorrect_loss_raw": 1.5191756884257, "correct_loss_per_char": 0.5730479955673218, "incorrect_loss_per_char": 0.75958784421285, "correct_loss_per_token": 1.1460959911346436, "incorrect_loss_per_token": 1.5191756884257, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1460959911346436, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": true, "logits_per_token": -1.1460959911346436, "logits_per_char": -0.5730479955673218, "num_chars": 2}, {"sum_logits": -1.2244961261749268, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.2244961261749268, "logits_per_char": -0.6122480630874634, "num_chars": 2}, {"sum_logits": -1.7807574272155762, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.7807574272155762, "logits_per_char": -0.8903787136077881, "num_chars": 2}, {"sum_logits": -1.5522735118865967, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.5522735118865967, "logits_per_char": -0.7761367559432983, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 259, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.216463327407837, "incorrect_loss_raw": 1.518838882446289, "correct_loss_per_char": 0.6082316637039185, "incorrect_loss_per_char": 0.7594194412231445, "correct_loss_per_token": 1.216463327407837, "incorrect_loss_per_token": 1.518838882446289, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0826328992843628, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": true, "logits_per_token": -1.0826328992843628, "logits_per_char": -0.5413164496421814, "num_chars": 2}, {"sum_logits": -1.216463327407837, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": false, "logits_per_token": -1.216463327407837, "logits_per_char": -0.6082316637039185, "num_chars": 2}, {"sum_logits": -1.9045953750610352, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": false, "logits_per_token": -1.9045953750610352, "logits_per_char": -0.9522976875305176, "num_chars": 2}, {"sum_logits": -1.5692883729934692, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": false, "logits_per_token": -1.5692883729934692, "logits_per_char": -0.7846441864967346, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 260, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8874436616897583, "incorrect_loss_raw": 1.7217715581258137, "correct_loss_per_char": 0.44372183084487915, "incorrect_loss_per_char": 0.8608857790629069, "correct_loss_per_token": 0.8874436616897583, "incorrect_loss_per_token": 1.7217715581258137, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8874436616897583, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": true, "logits_per_token": -0.8874436616897583, "logits_per_char": -0.44372183084487915, "num_chars": 2}, {"sum_logits": -1.2026422023773193, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.2026422023773193, "logits_per_char": -0.6013211011886597, "num_chars": 2}, {"sum_logits": -2.147608757019043, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -2.147608757019043, "logits_per_char": -1.0738043785095215, "num_chars": 2}, {"sum_logits": -1.815063714981079, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.815063714981079, "logits_per_char": -0.9075318574905396, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 261, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2195653915405273, "incorrect_loss_raw": 1.51072891553243, "correct_loss_per_char": 0.6097826957702637, "incorrect_loss_per_char": 0.755364457766215, "correct_loss_per_token": 1.2195653915405273, "incorrect_loss_per_token": 1.51072891553243, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0891307592391968, "num_tokens": 1, "num_tokens_all": 1052, "is_greedy": true, "logits_per_token": -1.0891307592391968, "logits_per_char": -0.5445653796195984, "num_chars": 2}, {"sum_logits": -1.2195653915405273, "num_tokens": 1, "num_tokens_all": 1052, "is_greedy": false, "logits_per_token": -1.2195653915405273, "logits_per_char": -0.6097826957702637, "num_chars": 2}, {"sum_logits": -1.8213660717010498, "num_tokens": 1, "num_tokens_all": 1052, "is_greedy": false, "logits_per_token": -1.8213660717010498, "logits_per_char": -0.9106830358505249, "num_chars": 2}, {"sum_logits": -1.6216899156570435, "num_tokens": 1, "num_tokens_all": 1052, "is_greedy": false, "logits_per_token": -1.6216899156570435, "logits_per_char": -0.8108449578285217, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 262, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3683526515960693, "incorrect_loss_raw": 1.4437801837921143, "correct_loss_per_char": 0.6841763257980347, "incorrect_loss_per_char": 0.7218900918960571, "correct_loss_per_token": 1.3683526515960693, "incorrect_loss_per_token": 1.4437801837921143, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1074469089508057, "num_tokens": 1, "num_tokens_all": 948, "is_greedy": true, "logits_per_token": -1.1074469089508057, "logits_per_char": -0.5537234544754028, "num_chars": 2}, {"sum_logits": -1.3683526515960693, "num_tokens": 1, "num_tokens_all": 948, "is_greedy": false, "logits_per_token": -1.3683526515960693, "logits_per_char": -0.6841763257980347, "num_chars": 2}, {"sum_logits": -1.8172396421432495, "num_tokens": 1, "num_tokens_all": 948, "is_greedy": false, "logits_per_token": -1.8172396421432495, "logits_per_char": -0.9086198210716248, "num_chars": 2}, {"sum_logits": -1.4066540002822876, "num_tokens": 1, "num_tokens_all": 948, "is_greedy": false, "logits_per_token": -1.4066540002822876, "logits_per_char": -0.7033270001411438, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 263, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8801988363265991, "incorrect_loss_raw": 1.289253830909729, "correct_loss_per_char": 0.9400994181632996, "incorrect_loss_per_char": 0.6446269154548645, "correct_loss_per_token": 1.8801988363265991, "incorrect_loss_per_token": 1.289253830909729, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1273483037948608, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": true, "logits_per_token": -1.1273483037948608, "logits_per_char": -0.5636741518974304, "num_chars": 2}, {"sum_logits": -1.2114492654800415, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": false, "logits_per_token": -1.2114492654800415, "logits_per_char": -0.6057246327400208, "num_chars": 2}, {"sum_logits": -1.8801988363265991, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": false, "logits_per_token": -1.8801988363265991, "logits_per_char": -0.9400994181632996, "num_chars": 2}, {"sum_logits": -1.5289639234542847, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": false, "logits_per_token": -1.5289639234542847, "logits_per_char": -0.7644819617271423, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 264, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0812163352966309, "incorrect_loss_raw": 1.5771363576253254, "correct_loss_per_char": 0.5406081676483154, "incorrect_loss_per_char": 0.7885681788126627, "correct_loss_per_token": 1.0812163352966309, "incorrect_loss_per_token": 1.5771363576253254, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0812163352966309, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": true, "logits_per_token": -1.0812163352966309, "logits_per_char": -0.5406081676483154, "num_chars": 2}, {"sum_logits": -1.1437550783157349, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": false, "logits_per_token": -1.1437550783157349, "logits_per_char": -0.5718775391578674, "num_chars": 2}, {"sum_logits": -1.8567192554473877, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": false, "logits_per_token": -1.8567192554473877, "logits_per_char": -0.9283596277236938, "num_chars": 2}, {"sum_logits": -1.730934739112854, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": false, "logits_per_token": -1.730934739112854, "logits_per_char": -0.865467369556427, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 265, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6291475296020508, "incorrect_loss_raw": 1.3995731671651204, "correct_loss_per_char": 0.8145737648010254, "incorrect_loss_per_char": 0.6997865835825602, "correct_loss_per_token": 1.6291475296020508, "incorrect_loss_per_token": 1.3995731671651204, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0405588150024414, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": true, "logits_per_token": -1.0405588150024414, "logits_per_char": -0.5202794075012207, "num_chars": 2}, {"sum_logits": -1.2074296474456787, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.2074296474456787, "logits_per_char": -0.6037148237228394, "num_chars": 2}, {"sum_logits": -1.9507310390472412, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.9507310390472412, "logits_per_char": -0.9753655195236206, "num_chars": 2}, {"sum_logits": -1.6291475296020508, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.6291475296020508, "logits_per_char": -0.8145737648010254, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 266, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3081138134002686, "incorrect_loss_raw": 1.4487336079279582, "correct_loss_per_char": 0.6540569067001343, "incorrect_loss_per_char": 0.7243668039639791, "correct_loss_per_token": 1.3081138134002686, "incorrect_loss_per_token": 1.4487336079279582, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1351966857910156, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": true, "logits_per_token": -1.1351966857910156, "logits_per_char": -0.5675983428955078, "num_chars": 2}, {"sum_logits": -1.3081138134002686, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": false, "logits_per_token": -1.3081138134002686, "logits_per_char": -0.6540569067001343, "num_chars": 2}, {"sum_logits": -1.6528853178024292, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": false, "logits_per_token": -1.6528853178024292, "logits_per_char": -0.8264426589012146, "num_chars": 2}, {"sum_logits": -1.5581188201904297, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": false, "logits_per_token": -1.5581188201904297, "logits_per_char": -0.7790594100952148, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 267, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1295883655548096, "incorrect_loss_raw": 1.5049440066019695, "correct_loss_per_char": 0.5647941827774048, "incorrect_loss_per_char": 0.7524720033009847, "correct_loss_per_token": 1.1295883655548096, "incorrect_loss_per_token": 1.5049440066019695, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1295883655548096, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": true, "logits_per_token": -1.1295883655548096, "logits_per_char": -0.5647941827774048, "num_chars": 2}, {"sum_logits": -1.4291731119155884, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": false, "logits_per_token": -1.4291731119155884, "logits_per_char": -0.7145865559577942, "num_chars": 2}, {"sum_logits": -1.6410229206085205, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": false, "logits_per_token": -1.6410229206085205, "logits_per_char": -0.8205114603042603, "num_chars": 2}, {"sum_logits": -1.4446359872817993, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": false, "logits_per_token": -1.4446359872817993, "logits_per_char": -0.7223179936408997, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 268, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0784523487091064, "incorrect_loss_raw": 1.5752304395039876, "correct_loss_per_char": 0.5392261743545532, "incorrect_loss_per_char": 0.7876152197519938, "correct_loss_per_token": 1.0784523487091064, "incorrect_loss_per_token": 1.5752304395039876, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0784523487091064, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": true, "logits_per_token": -1.0784523487091064, "logits_per_char": -0.5392261743545532, "num_chars": 2}, {"sum_logits": -1.2011222839355469, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.2011222839355469, "logits_per_char": -0.6005611419677734, "num_chars": 2}, {"sum_logits": -1.9525833129882812, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.9525833129882812, "logits_per_char": -0.9762916564941406, "num_chars": 2}, {"sum_logits": -1.5719857215881348, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.5719857215881348, "logits_per_char": -0.7859928607940674, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 269, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7148330211639404, "incorrect_loss_raw": 1.3110570112864177, "correct_loss_per_char": 0.8574165105819702, "incorrect_loss_per_char": 0.6555285056432089, "correct_loss_per_token": 1.7148330211639404, "incorrect_loss_per_token": 1.3110570112864177, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1690293550491333, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": true, "logits_per_token": -1.1690293550491333, "logits_per_char": -0.5845146775245667, "num_chars": 2}, {"sum_logits": -1.4212654829025269, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.4212654829025269, "logits_per_char": -0.7106327414512634, "num_chars": 2}, {"sum_logits": -1.7148330211639404, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.7148330211639404, "logits_per_char": -0.8574165105819702, "num_chars": 2}, {"sum_logits": -1.3428761959075928, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.3428761959075928, "logits_per_char": -0.6714380979537964, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 270, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4065953493118286, "incorrect_loss_raw": 1.4233650366465251, "correct_loss_per_char": 0.7032976746559143, "incorrect_loss_per_char": 0.7116825183232626, "correct_loss_per_token": 1.4065953493118286, "incorrect_loss_per_token": 1.4233650366465251, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1141760349273682, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -1.1141760349273682, "logits_per_char": -0.5570880174636841, "num_chars": 2}, {"sum_logits": -1.399661660194397, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.399661660194397, "logits_per_char": -0.6998308300971985, "num_chars": 2}, {"sum_logits": -1.75625741481781, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.75625741481781, "logits_per_char": -0.878128707408905, "num_chars": 2}, {"sum_logits": -1.4065953493118286, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.4065953493118286, "logits_per_char": -0.7032976746559143, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 271, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0684881210327148, "incorrect_loss_raw": 1.5808992783228557, "correct_loss_per_char": 0.5342440605163574, "incorrect_loss_per_char": 0.7904496391614279, "correct_loss_per_token": 1.0684881210327148, "incorrect_loss_per_token": 1.5808992783228557, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.148422122001648, "num_tokens": 1, "num_tokens_all": 1062, "is_greedy": false, "logits_per_token": -1.148422122001648, "logits_per_char": -0.574211061000824, "num_chars": 2}, {"sum_logits": -1.0684881210327148, "num_tokens": 1, "num_tokens_all": 1062, "is_greedy": true, "logits_per_token": -1.0684881210327148, "logits_per_char": -0.5342440605163574, "num_chars": 2}, {"sum_logits": -1.8908723592758179, "num_tokens": 1, "num_tokens_all": 1062, "is_greedy": false, "logits_per_token": -1.8908723592758179, "logits_per_char": -0.9454361796379089, "num_chars": 2}, {"sum_logits": -1.703403353691101, "num_tokens": 1, "num_tokens_all": 1062, "is_greedy": false, "logits_per_token": -1.703403353691101, "logits_per_char": -0.8517016768455505, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 272, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6961852312088013, "incorrect_loss_raw": 1.3240397771199544, "correct_loss_per_char": 0.8480926156044006, "incorrect_loss_per_char": 0.6620198885599772, "correct_loss_per_token": 1.6961852312088013, "incorrect_loss_per_token": 1.3240397771199544, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2060762643814087, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": true, "logits_per_token": -1.2060762643814087, "logits_per_char": -0.6030381321907043, "num_chars": 2}, {"sum_logits": -1.2075871229171753, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": false, "logits_per_token": -1.2075871229171753, "logits_per_char": -0.6037935614585876, "num_chars": 2}, {"sum_logits": -1.6961852312088013, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": false, "logits_per_token": -1.6961852312088013, "logits_per_char": -0.8480926156044006, "num_chars": 2}, {"sum_logits": -1.5584559440612793, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": false, "logits_per_token": -1.5584559440612793, "logits_per_char": -0.7792279720306396, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 273, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1845988035202026, "incorrect_loss_raw": 1.5274066527684529, "correct_loss_per_char": 0.5922994017601013, "incorrect_loss_per_char": 0.7637033263842264, "correct_loss_per_token": 1.1845988035202026, "incorrect_loss_per_token": 1.5274066527684529, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1031394004821777, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": true, "logits_per_token": -1.1031394004821777, "logits_per_char": -0.5515697002410889, "num_chars": 2}, {"sum_logits": -1.1845988035202026, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": false, "logits_per_token": -1.1845988035202026, "logits_per_char": -0.5922994017601013, "num_chars": 2}, {"sum_logits": -1.868842363357544, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": false, "logits_per_token": -1.868842363357544, "logits_per_char": -0.934421181678772, "num_chars": 2}, {"sum_logits": -1.6102381944656372, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": false, "logits_per_token": -1.6102381944656372, "logits_per_char": -0.8051190972328186, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 274, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9094352722167969, "incorrect_loss_raw": 1.3173375129699707, "correct_loss_per_char": 0.9547176361083984, "incorrect_loss_per_char": 0.6586687564849854, "correct_loss_per_token": 1.9094352722167969, "incorrect_loss_per_token": 1.3173375129699707, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0402189493179321, "num_tokens": 1, "num_tokens_all": 1107, "is_greedy": true, "logits_per_token": -1.0402189493179321, "logits_per_char": -0.5201094746589661, "num_chars": 2}, {"sum_logits": -1.147855281829834, "num_tokens": 1, "num_tokens_all": 1107, "is_greedy": false, "logits_per_token": -1.147855281829834, "logits_per_char": -0.573927640914917, "num_chars": 2}, {"sum_logits": -1.9094352722167969, "num_tokens": 1, "num_tokens_all": 1107, "is_greedy": false, "logits_per_token": -1.9094352722167969, "logits_per_char": -0.9547176361083984, "num_chars": 2}, {"sum_logits": -1.763938307762146, "num_tokens": 1, "num_tokens_all": 1107, "is_greedy": false, "logits_per_token": -1.763938307762146, "logits_per_char": -0.881969153881073, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 275, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2528878450393677, "incorrect_loss_raw": 1.5089270273844402, "correct_loss_per_char": 0.6264439225196838, "incorrect_loss_per_char": 0.7544635136922201, "correct_loss_per_token": 1.2528878450393677, "incorrect_loss_per_token": 1.5089270273844402, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0550671815872192, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": true, "logits_per_token": -1.0550671815872192, "logits_per_char": -0.5275335907936096, "num_chars": 2}, {"sum_logits": -1.2528878450393677, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": false, "logits_per_token": -1.2528878450393677, "logits_per_char": -0.6264439225196838, "num_chars": 2}, {"sum_logits": -1.8797672986984253, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": false, "logits_per_token": -1.8797672986984253, "logits_per_char": -0.9398836493492126, "num_chars": 2}, {"sum_logits": -1.5919466018676758, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": false, "logits_per_token": -1.5919466018676758, "logits_per_char": -0.7959733009338379, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 276, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1283164024353027, "incorrect_loss_raw": 1.5751795371373494, "correct_loss_per_char": 0.5641582012176514, "incorrect_loss_per_char": 0.7875897685686747, "correct_loss_per_token": 1.1283164024353027, "incorrect_loss_per_token": 1.5751795371373494, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0883760452270508, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": true, "logits_per_token": -1.0883760452270508, "logits_per_char": -0.5441880226135254, "num_chars": 2}, {"sum_logits": -1.1283164024353027, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.1283164024353027, "logits_per_char": -0.5641582012176514, "num_chars": 2}, {"sum_logits": -2.033838987350464, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -2.033838987350464, "logits_per_char": -1.016919493675232, "num_chars": 2}, {"sum_logits": -1.6033235788345337, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.6033235788345337, "logits_per_char": -0.8016617894172668, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 277, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.670717477798462, "incorrect_loss_raw": 1.401266058286031, "correct_loss_per_char": 0.835358738899231, "incorrect_loss_per_char": 0.7006330291430155, "correct_loss_per_token": 1.670717477798462, "incorrect_loss_per_token": 1.401266058286031, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0389224290847778, "num_tokens": 1, "num_tokens_all": 1106, "is_greedy": true, "logits_per_token": -1.0389224290847778, "logits_per_char": -0.5194612145423889, "num_chars": 2}, {"sum_logits": -1.1572859287261963, "num_tokens": 1, "num_tokens_all": 1106, "is_greedy": false, "logits_per_token": -1.1572859287261963, "logits_per_char": -0.5786429643630981, "num_chars": 2}, {"sum_logits": -2.007589817047119, "num_tokens": 1, "num_tokens_all": 1106, "is_greedy": false, "logits_per_token": -2.007589817047119, "logits_per_char": -1.0037949085235596, "num_chars": 2}, {"sum_logits": -1.670717477798462, "num_tokens": 1, "num_tokens_all": 1106, "is_greedy": false, "logits_per_token": -1.670717477798462, "logits_per_char": -0.835358738899231, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 278, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7055445909500122, "incorrect_loss_raw": 1.3292965094248455, "correct_loss_per_char": 0.8527722954750061, "incorrect_loss_per_char": 0.6646482547124227, "correct_loss_per_token": 1.7055445909500122, "incorrect_loss_per_token": 1.3292965094248455, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.155595302581787, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": true, "logits_per_token": -1.155595302581787, "logits_per_char": -0.5777976512908936, "num_chars": 2}, {"sum_logits": -1.217126488685608, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": false, "logits_per_token": -1.217126488685608, "logits_per_char": -0.608563244342804, "num_chars": 2}, {"sum_logits": -1.7055445909500122, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": false, "logits_per_token": -1.7055445909500122, "logits_per_char": -0.8527722954750061, "num_chars": 2}, {"sum_logits": -1.6151677370071411, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": false, "logits_per_token": -1.6151677370071411, "logits_per_char": -0.8075838685035706, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 279, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0837229490280151, "incorrect_loss_raw": 1.5872416496276855, "correct_loss_per_char": 0.5418614745140076, "incorrect_loss_per_char": 0.7936208248138428, "correct_loss_per_token": 1.0837229490280151, "incorrect_loss_per_token": 1.5872416496276855, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0837229490280151, "num_tokens": 1, "num_tokens_all": 1056, "is_greedy": true, "logits_per_token": -1.0837229490280151, "logits_per_char": -0.5418614745140076, "num_chars": 2}, {"sum_logits": -1.1461825370788574, "num_tokens": 1, "num_tokens_all": 1056, "is_greedy": false, "logits_per_token": -1.1461825370788574, "logits_per_char": -0.5730912685394287, "num_chars": 2}, {"sum_logits": -1.9753003120422363, "num_tokens": 1, "num_tokens_all": 1056, "is_greedy": false, "logits_per_token": -1.9753003120422363, "logits_per_char": -0.9876501560211182, "num_chars": 2}, {"sum_logits": -1.640242099761963, "num_tokens": 1, "num_tokens_all": 1056, "is_greedy": false, "logits_per_token": -1.640242099761963, "logits_per_char": -0.8201210498809814, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 280, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1394938230514526, "incorrect_loss_raw": 1.5387004216512044, "correct_loss_per_char": 0.5697469115257263, "incorrect_loss_per_char": 0.7693502108256022, "correct_loss_per_token": 1.1394938230514526, "incorrect_loss_per_token": 1.5387004216512044, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1488865613937378, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.1488865613937378, "logits_per_char": -0.5744432806968689, "num_chars": 2}, {"sum_logits": -1.1394938230514526, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": true, "logits_per_token": -1.1394938230514526, "logits_per_char": -0.5697469115257263, "num_chars": 2}, {"sum_logits": -1.848441243171692, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.848441243171692, "logits_per_char": -0.924220621585846, "num_chars": 2}, {"sum_logits": -1.6187734603881836, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.6187734603881836, "logits_per_char": -0.8093867301940918, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 281, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1299207210540771, "incorrect_loss_raw": 1.5808755159378052, "correct_loss_per_char": 0.5649603605270386, "incorrect_loss_per_char": 0.7904377579689026, "correct_loss_per_token": 1.1299207210540771, "incorrect_loss_per_token": 1.5808755159378052, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0592732429504395, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": true, "logits_per_token": -1.0592732429504395, "logits_per_char": -0.5296366214752197, "num_chars": 2}, {"sum_logits": -1.1299207210540771, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": false, "logits_per_token": -1.1299207210540771, "logits_per_char": -0.5649603605270386, "num_chars": 2}, {"sum_logits": -1.9938085079193115, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": false, "logits_per_token": -1.9938085079193115, "logits_per_char": -0.9969042539596558, "num_chars": 2}, {"sum_logits": -1.6895447969436646, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": false, "logits_per_token": -1.6895447969436646, "logits_per_char": -0.8447723984718323, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 282, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0765509605407715, "incorrect_loss_raw": 1.5677729447682698, "correct_loss_per_char": 0.5382754802703857, "incorrect_loss_per_char": 0.7838864723841349, "correct_loss_per_token": 1.0765509605407715, "incorrect_loss_per_token": 1.5677729447682698, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0765509605407715, "num_tokens": 1, "num_tokens_all": 1056, "is_greedy": true, "logits_per_token": -1.0765509605407715, "logits_per_char": -0.5382754802703857, "num_chars": 2}, {"sum_logits": -1.1944963932037354, "num_tokens": 1, "num_tokens_all": 1056, "is_greedy": false, "logits_per_token": -1.1944963932037354, "logits_per_char": -0.5972481966018677, "num_chars": 2}, {"sum_logits": -1.821312665939331, "num_tokens": 1, "num_tokens_all": 1056, "is_greedy": false, "logits_per_token": -1.821312665939331, "logits_per_char": -0.9106563329696655, "num_chars": 2}, {"sum_logits": -1.6875097751617432, "num_tokens": 1, "num_tokens_all": 1056, "is_greedy": false, "logits_per_token": -1.6875097751617432, "logits_per_char": -0.8437548875808716, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 283, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0756107568740845, "incorrect_loss_raw": 1.564752181371053, "correct_loss_per_char": 0.5378053784370422, "incorrect_loss_per_char": 0.7823760906855265, "correct_loss_per_token": 1.0756107568740845, "incorrect_loss_per_token": 1.564752181371053, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0756107568740845, "num_tokens": 1, "num_tokens_all": 1106, "is_greedy": true, "logits_per_token": -1.0756107568740845, "logits_per_char": -0.5378053784370422, "num_chars": 2}, {"sum_logits": -1.237647294998169, "num_tokens": 1, "num_tokens_all": 1106, "is_greedy": false, "logits_per_token": -1.237647294998169, "logits_per_char": -0.6188236474990845, "num_chars": 2}, {"sum_logits": -1.9087449312210083, "num_tokens": 1, "num_tokens_all": 1106, "is_greedy": false, "logits_per_token": -1.9087449312210083, "logits_per_char": -0.9543724656105042, "num_chars": 2}, {"sum_logits": -1.547864317893982, "num_tokens": 1, "num_tokens_all": 1106, "is_greedy": false, "logits_per_token": -1.547864317893982, "logits_per_char": -0.773932158946991, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 284, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.891233205795288, "incorrect_loss_raw": 1.3625067869822185, "correct_loss_per_char": 0.945616602897644, "incorrect_loss_per_char": 0.6812533934911092, "correct_loss_per_token": 1.891233205795288, "incorrect_loss_per_token": 1.3625067869822185, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0407977104187012, "num_tokens": 1, "num_tokens_all": 1125, "is_greedy": true, "logits_per_token": -1.0407977104187012, "logits_per_char": -0.5203988552093506, "num_chars": 2}, {"sum_logits": -1.0665805339813232, "num_tokens": 1, "num_tokens_all": 1125, "is_greedy": false, "logits_per_token": -1.0665805339813232, "logits_per_char": -0.5332902669906616, "num_chars": 2}, {"sum_logits": -1.9801421165466309, "num_tokens": 1, "num_tokens_all": 1125, "is_greedy": false, "logits_per_token": -1.9801421165466309, "logits_per_char": -0.9900710582733154, "num_chars": 2}, {"sum_logits": -1.891233205795288, "num_tokens": 1, "num_tokens_all": 1125, "is_greedy": false, "logits_per_token": -1.891233205795288, "logits_per_char": -0.945616602897644, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 285, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7846100330352783, "incorrect_loss_raw": 1.390841007232666, "correct_loss_per_char": 0.8923050165176392, "incorrect_loss_per_char": 0.695420503616333, "correct_loss_per_token": 1.7846100330352783, "incorrect_loss_per_token": 1.390841007232666, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8780535459518433, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": true, "logits_per_token": -0.8780535459518433, "logits_per_char": -0.43902677297592163, "num_chars": 2}, {"sum_logits": -1.32133150100708, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": false, "logits_per_token": -1.32133150100708, "logits_per_char": -0.66066575050354, "num_chars": 2}, {"sum_logits": -1.9731379747390747, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": false, "logits_per_token": -1.9731379747390747, "logits_per_char": -0.9865689873695374, "num_chars": 2}, {"sum_logits": -1.7846100330352783, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": false, "logits_per_token": -1.7846100330352783, "logits_per_char": -0.8923050165176392, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 286, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5728917121887207, "incorrect_loss_raw": 1.3651076952616374, "correct_loss_per_char": 0.7864458560943604, "incorrect_loss_per_char": 0.6825538476308187, "correct_loss_per_token": 1.5728917121887207, "incorrect_loss_per_token": 1.3651076952616374, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2014522552490234, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": true, "logits_per_token": -1.2014522552490234, "logits_per_char": -0.6007261276245117, "num_chars": 2}, {"sum_logits": -1.2175935506820679, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": false, "logits_per_token": -1.2175935506820679, "logits_per_char": -0.6087967753410339, "num_chars": 2}, {"sum_logits": -1.6762772798538208, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": false, "logits_per_token": -1.6762772798538208, "logits_per_char": -0.8381386399269104, "num_chars": 2}, {"sum_logits": -1.5728917121887207, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": false, "logits_per_token": -1.5728917121887207, "logits_per_char": -0.7864458560943604, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 287, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2739660739898682, "incorrect_loss_raw": 1.4526771704355876, "correct_loss_per_char": 0.6369830369949341, "incorrect_loss_per_char": 0.7263385852177938, "correct_loss_per_token": 1.2739660739898682, "incorrect_loss_per_token": 1.4526771704355876, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2119287252426147, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": true, "logits_per_token": -1.2119287252426147, "logits_per_char": -0.6059643626213074, "num_chars": 2}, {"sum_logits": -1.2739660739898682, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": false, "logits_per_token": -1.2739660739898682, "logits_per_char": -0.6369830369949341, "num_chars": 2}, {"sum_logits": -1.6335102319717407, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": false, "logits_per_token": -1.6335102319717407, "logits_per_char": -0.8167551159858704, "num_chars": 2}, {"sum_logits": -1.5125925540924072, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": false, "logits_per_token": -1.5125925540924072, "logits_per_char": -0.7562962770462036, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 288, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9182491302490234, "incorrect_loss_raw": 1.290853500366211, "correct_loss_per_char": 0.9591245651245117, "incorrect_loss_per_char": 0.6454267501831055, "correct_loss_per_token": 1.9182491302490234, "incorrect_loss_per_token": 1.290853500366211, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0273752212524414, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": true, "logits_per_token": -1.0273752212524414, "logits_per_char": -0.5136876106262207, "num_chars": 2}, {"sum_logits": -1.2788267135620117, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.2788267135620117, "logits_per_char": -0.6394133567810059, "num_chars": 2}, {"sum_logits": -1.9182491302490234, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.9182491302490234, "logits_per_char": -0.9591245651245117, "num_chars": 2}, {"sum_logits": -1.5663585662841797, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.5663585662841797, "logits_per_char": -0.7831792831420898, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 289, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8976768255233765, "incorrect_loss_raw": 1.293425480524699, "correct_loss_per_char": 0.9488384127616882, "incorrect_loss_per_char": 0.6467127402623495, "correct_loss_per_token": 1.8976768255233765, "incorrect_loss_per_token": 1.293425480524699, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0893621444702148, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": true, "logits_per_token": -1.0893621444702148, "logits_per_char": -0.5446810722351074, "num_chars": 2}, {"sum_logits": -1.1995183229446411, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.1995183229446411, "logits_per_char": -0.5997591614723206, "num_chars": 2}, {"sum_logits": -1.8976768255233765, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.8976768255233765, "logits_per_char": -0.9488384127616882, "num_chars": 2}, {"sum_logits": -1.5913959741592407, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.5913959741592407, "logits_per_char": -0.7956979870796204, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 290, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.170715570449829, "incorrect_loss_raw": 1.5602158308029175, "correct_loss_per_char": 0.5853577852249146, "incorrect_loss_per_char": 0.7801079154014587, "correct_loss_per_token": 1.170715570449829, "incorrect_loss_per_token": 1.5602158308029175, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0289453268051147, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": true, "logits_per_token": -1.0289453268051147, "logits_per_char": -0.5144726634025574, "num_chars": 2}, {"sum_logits": -1.170715570449829, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.170715570449829, "logits_per_char": -0.5853577852249146, "num_chars": 2}, {"sum_logits": -1.9322994947433472, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.9322994947433472, "logits_per_char": -0.9661497473716736, "num_chars": 2}, {"sum_logits": -1.7194026708602905, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.7194026708602905, "logits_per_char": -0.8597013354301453, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 291, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1236915588378906, "incorrect_loss_raw": 1.5406853755315144, "correct_loss_per_char": 0.5618457794189453, "incorrect_loss_per_char": 0.7703426877657572, "correct_loss_per_token": 1.1236915588378906, "incorrect_loss_per_token": 1.5406853755315144, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1236915588378906, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": true, "logits_per_token": -1.1236915588378906, "logits_per_char": -0.5618457794189453, "num_chars": 2}, {"sum_logits": -1.2110955715179443, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": false, "logits_per_token": -1.2110955715179443, "logits_per_char": -0.6055477857589722, "num_chars": 2}, {"sum_logits": -1.8846789598464966, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": false, "logits_per_token": -1.8846789598464966, "logits_per_char": -0.9423394799232483, "num_chars": 2}, {"sum_logits": -1.5262815952301025, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": false, "logits_per_token": -1.5262815952301025, "logits_per_char": -0.7631407976150513, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 292, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3071329593658447, "incorrect_loss_raw": 1.4389019807179768, "correct_loss_per_char": 0.6535664796829224, "incorrect_loss_per_char": 0.7194509903589884, "correct_loss_per_token": 1.3071329593658447, "incorrect_loss_per_token": 1.4389019807179768, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2311886548995972, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": true, "logits_per_token": -1.2311886548995972, "logits_per_char": -0.6155943274497986, "num_chars": 2}, {"sum_logits": -1.3071329593658447, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": false, "logits_per_token": -1.3071329593658447, "logits_per_char": -0.6535664796829224, "num_chars": 2}, {"sum_logits": -1.6123460531234741, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": false, "logits_per_token": -1.6123460531234741, "logits_per_char": -0.8061730265617371, "num_chars": 2}, {"sum_logits": -1.4731712341308594, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": false, "logits_per_token": -1.4731712341308594, "logits_per_char": -0.7365856170654297, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 293, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1460312604904175, "incorrect_loss_raw": 1.516185959180196, "correct_loss_per_char": 0.5730156302452087, "incorrect_loss_per_char": 0.758092979590098, "correct_loss_per_token": 1.1460312604904175, "incorrect_loss_per_token": 1.516185959180196, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1460312604904175, "num_tokens": 1, "num_tokens_all": 1048, "is_greedy": true, "logits_per_token": -1.1460312604904175, "logits_per_char": -0.5730156302452087, "num_chars": 2}, {"sum_logits": -1.2577272653579712, "num_tokens": 1, "num_tokens_all": 1048, "is_greedy": false, "logits_per_token": -1.2577272653579712, "logits_per_char": -0.6288636326789856, "num_chars": 2}, {"sum_logits": -1.7606972455978394, "num_tokens": 1, "num_tokens_all": 1048, "is_greedy": false, "logits_per_token": -1.7606972455978394, "logits_per_char": -0.8803486227989197, "num_chars": 2}, {"sum_logits": -1.5301333665847778, "num_tokens": 1, "num_tokens_all": 1048, "is_greedy": false, "logits_per_token": -1.5301333665847778, "logits_per_char": -0.7650666832923889, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 294, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.758851408958435, "incorrect_loss_raw": 1.308935244878133, "correct_loss_per_char": 0.8794257044792175, "incorrect_loss_per_char": 0.6544676224390665, "correct_loss_per_token": 1.758851408958435, "incorrect_loss_per_token": 1.308935244878133, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2297018766403198, "num_tokens": 1, "num_tokens_all": 1040, "is_greedy": false, "logits_per_token": -1.2297018766403198, "logits_per_char": -0.6148509383201599, "num_chars": 2}, {"sum_logits": -1.1631287336349487, "num_tokens": 1, "num_tokens_all": 1040, "is_greedy": true, "logits_per_token": -1.1631287336349487, "logits_per_char": -0.5815643668174744, "num_chars": 2}, {"sum_logits": -1.758851408958435, "num_tokens": 1, "num_tokens_all": 1040, "is_greedy": false, "logits_per_token": -1.758851408958435, "logits_per_char": -0.8794257044792175, "num_chars": 2}, {"sum_logits": -1.5339751243591309, "num_tokens": 1, "num_tokens_all": 1040, "is_greedy": false, "logits_per_token": -1.5339751243591309, "logits_per_char": -0.7669875621795654, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 295, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4472408294677734, "incorrect_loss_raw": 1.4077984889348347, "correct_loss_per_char": 0.7236204147338867, "incorrect_loss_per_char": 0.7038992444674174, "correct_loss_per_token": 1.4472408294677734, "incorrect_loss_per_token": 1.4077984889348347, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0738829374313354, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": true, "logits_per_token": -1.0738829374313354, "logits_per_char": -0.5369414687156677, "num_chars": 2}, {"sum_logits": -1.4472408294677734, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -1.4472408294677734, "logits_per_char": -0.7236204147338867, "num_chars": 2}, {"sum_logits": -1.6764390468597412, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -1.6764390468597412, "logits_per_char": -0.8382195234298706, "num_chars": 2}, {"sum_logits": -1.4730734825134277, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -1.4730734825134277, "logits_per_char": -0.7365367412567139, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 296, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.622257113456726, "incorrect_loss_raw": 1.418021321296692, "correct_loss_per_char": 0.811128556728363, "incorrect_loss_per_char": 0.709010660648346, "correct_loss_per_token": 1.622257113456726, "incorrect_loss_per_token": 1.418021321296692, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0937306880950928, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": true, "logits_per_token": -1.0937306880950928, "logits_per_char": -0.5468653440475464, "num_chars": 2}, {"sum_logits": -1.0967389345169067, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": false, "logits_per_token": -1.0967389345169067, "logits_per_char": -0.5483694672584534, "num_chars": 2}, {"sum_logits": -2.063594341278076, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": false, "logits_per_token": -2.063594341278076, "logits_per_char": -1.031797170639038, "num_chars": 2}, {"sum_logits": -1.622257113456726, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": false, "logits_per_token": -1.622257113456726, "logits_per_char": -0.811128556728363, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 297, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.343346357345581, "incorrect_loss_raw": 1.4372952779134114, "correct_loss_per_char": 0.6716731786727905, "incorrect_loss_per_char": 0.7186476389567057, "correct_loss_per_token": 1.343346357345581, "incorrect_loss_per_token": 1.4372952779134114, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.129944086074829, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": true, "logits_per_token": -1.129944086074829, "logits_per_char": -0.5649720430374146, "num_chars": 2}, {"sum_logits": -1.343346357345581, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.343346357345581, "logits_per_char": -0.6716731786727905, "num_chars": 2}, {"sum_logits": -1.6582980155944824, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.6582980155944824, "logits_per_char": -0.8291490077972412, "num_chars": 2}, {"sum_logits": -1.5236437320709229, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.5236437320709229, "logits_per_char": -0.7618218660354614, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 298, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.391625165939331, "incorrect_loss_raw": 1.4076695044835408, "correct_loss_per_char": 0.6958125829696655, "incorrect_loss_per_char": 0.7038347522417704, "correct_loss_per_token": 1.391625165939331, "incorrect_loss_per_token": 1.4076695044835408, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.205650806427002, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": true, "logits_per_token": -1.205650806427002, "logits_per_char": -0.602825403213501, "num_chars": 2}, {"sum_logits": -1.391625165939331, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.391625165939331, "logits_per_char": -0.6958125829696655, "num_chars": 2}, {"sum_logits": -1.5978479385375977, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.5978479385375977, "logits_per_char": -0.7989239692687988, "num_chars": 2}, {"sum_logits": -1.419509768486023, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.419509768486023, "logits_per_char": -0.7097548842430115, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 299, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9436779022216797, "incorrect_loss_raw": 1.3889574805895488, "correct_loss_per_char": 0.9718389511108398, "incorrect_loss_per_char": 0.6944787402947744, "correct_loss_per_token": 1.9436779022216797, "incorrect_loss_per_token": 1.3889574805895488, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.893133819103241, "num_tokens": 1, "num_tokens_all": 1126, "is_greedy": true, "logits_per_token": -0.893133819103241, "logits_per_char": -0.4465669095516205, "num_chars": 2}, {"sum_logits": -1.1373474597930908, "num_tokens": 1, "num_tokens_all": 1126, "is_greedy": false, "logits_per_token": -1.1373474597930908, "logits_per_char": -0.5686737298965454, "num_chars": 2}, {"sum_logits": -2.1363911628723145, "num_tokens": 1, "num_tokens_all": 1126, "is_greedy": false, "logits_per_token": -2.1363911628723145, "logits_per_char": -1.0681955814361572, "num_chars": 2}, {"sum_logits": -1.9436779022216797, "num_tokens": 1, "num_tokens_all": 1126, "is_greedy": false, "logits_per_token": -1.9436779022216797, "logits_per_char": -0.9718389511108398, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 300, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8000471591949463, "incorrect_loss_raw": 1.306703249613444, "correct_loss_per_char": 0.9000235795974731, "incorrect_loss_per_char": 0.653351624806722, "correct_loss_per_token": 1.8000471591949463, "incorrect_loss_per_token": 1.306703249613444, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1228017807006836, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": true, "logits_per_token": -1.1228017807006836, "logits_per_char": -0.5614008903503418, "num_chars": 2}, {"sum_logits": -1.2397007942199707, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.2397007942199707, "logits_per_char": -0.6198503971099854, "num_chars": 2}, {"sum_logits": -1.8000471591949463, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.8000471591949463, "logits_per_char": -0.9000235795974731, "num_chars": 2}, {"sum_logits": -1.5576071739196777, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.5576071739196777, "logits_per_char": -0.7788035869598389, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 301, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.363532304763794, "incorrect_loss_raw": 1.419210950533549, "correct_loss_per_char": 0.681766152381897, "incorrect_loss_per_char": 0.7096054752667745, "correct_loss_per_token": 1.363532304763794, "incorrect_loss_per_token": 1.419210950533549, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.363532304763794, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": false, "logits_per_token": -1.363532304763794, "logits_per_char": -0.681766152381897, "num_chars": 2}, {"sum_logits": -1.2206898927688599, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": true, "logits_per_token": -1.2206898927688599, "logits_per_char": -0.6103449463844299, "num_chars": 2}, {"sum_logits": -1.6468538045883179, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": false, "logits_per_token": -1.6468538045883179, "logits_per_char": -0.8234269022941589, "num_chars": 2}, {"sum_logits": -1.3900891542434692, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": false, "logits_per_token": -1.3900891542434692, "logits_per_char": -0.6950445771217346, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 302, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0447804927825928, "incorrect_loss_raw": 1.2796708345413208, "correct_loss_per_char": 1.0223902463912964, "incorrect_loss_per_char": 0.6398354172706604, "correct_loss_per_token": 2.0447804927825928, "incorrect_loss_per_token": 1.2796708345413208, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0492116212844849, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": true, "logits_per_token": -1.0492116212844849, "logits_per_char": -0.5246058106422424, "num_chars": 2}, {"sum_logits": -1.1355860233306885, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.1355860233306885, "logits_per_char": -0.5677930116653442, "num_chars": 2}, {"sum_logits": -2.0447804927825928, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -2.0447804927825928, "logits_per_char": -1.0223902463912964, "num_chars": 2}, {"sum_logits": -1.654214859008789, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.654214859008789, "logits_per_char": -0.8271074295043945, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 303, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1303303241729736, "incorrect_loss_raw": 1.5834858417510986, "correct_loss_per_char": 0.5651651620864868, "incorrect_loss_per_char": 0.7917429208755493, "correct_loss_per_token": 1.1303303241729736, "incorrect_loss_per_token": 1.5834858417510986, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0588245391845703, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": true, "logits_per_token": -1.0588245391845703, "logits_per_char": -0.5294122695922852, "num_chars": 2}, {"sum_logits": -1.1303303241729736, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": false, "logits_per_token": -1.1303303241729736, "logits_per_char": -0.5651651620864868, "num_chars": 2}, {"sum_logits": -1.9786489009857178, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": false, "logits_per_token": -1.9786489009857178, "logits_per_char": -0.9893244504928589, "num_chars": 2}, {"sum_logits": -1.7129840850830078, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": false, "logits_per_token": -1.7129840850830078, "logits_per_char": -0.8564920425415039, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 304, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2113137245178223, "incorrect_loss_raw": 1.5291386445363362, "correct_loss_per_char": 0.6056568622589111, "incorrect_loss_per_char": 0.7645693222681681, "correct_loss_per_token": 1.2113137245178223, "incorrect_loss_per_token": 1.5291386445363362, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0691187381744385, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": true, "logits_per_token": -1.0691187381744385, "logits_per_char": -0.5345593690872192, "num_chars": 2}, {"sum_logits": -1.2113137245178223, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.2113137245178223, "logits_per_char": -0.6056568622589111, "num_chars": 2}, {"sum_logits": -1.9565280675888062, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.9565280675888062, "logits_per_char": -0.9782640337944031, "num_chars": 2}, {"sum_logits": -1.5617691278457642, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.5617691278457642, "logits_per_char": -0.7808845639228821, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 305, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4986143112182617, "incorrect_loss_raw": 1.4524527390797932, "correct_loss_per_char": 0.7493071556091309, "incorrect_loss_per_char": 0.7262263695398966, "correct_loss_per_token": 1.4986143112182617, "incorrect_loss_per_token": 1.4524527390797932, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0683962106704712, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": true, "logits_per_token": -1.0683962106704712, "logits_per_char": -0.5341981053352356, "num_chars": 2}, {"sum_logits": -1.201309323310852, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.201309323310852, "logits_per_char": -0.600654661655426, "num_chars": 2}, {"sum_logits": -2.0876526832580566, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -2.0876526832580566, "logits_per_char": -1.0438263416290283, "num_chars": 2}, {"sum_logits": -1.4986143112182617, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.4986143112182617, "logits_per_char": -0.7493071556091309, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 306, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4168891906738281, "incorrect_loss_raw": 1.412198265393575, "correct_loss_per_char": 0.7084445953369141, "incorrect_loss_per_char": 0.7060991326967875, "correct_loss_per_token": 1.4168891906738281, "incorrect_loss_per_token": 1.412198265393575, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1365635395050049, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": true, "logits_per_token": -1.1365635395050049, "logits_per_char": -0.5682817697525024, "num_chars": 2}, {"sum_logits": -1.4168891906738281, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.4168891906738281, "logits_per_char": -0.7084445953369141, "num_chars": 2}, {"sum_logits": -1.712951898574829, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.712951898574829, "logits_per_char": -0.8564759492874146, "num_chars": 2}, {"sum_logits": -1.3870793581008911, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.3870793581008911, "logits_per_char": -0.6935396790504456, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 307, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2393091917037964, "incorrect_loss_raw": 1.5087230205535889, "correct_loss_per_char": 0.6196545958518982, "incorrect_loss_per_char": 0.7543615102767944, "correct_loss_per_token": 1.2393091917037964, "incorrect_loss_per_token": 1.5087230205535889, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0610898733139038, "num_tokens": 1, "num_tokens_all": 1063, "is_greedy": true, "logits_per_token": -1.0610898733139038, "logits_per_char": -0.5305449366569519, "num_chars": 2}, {"sum_logits": -1.2393091917037964, "num_tokens": 1, "num_tokens_all": 1063, "is_greedy": false, "logits_per_token": -1.2393091917037964, "logits_per_char": -0.6196545958518982, "num_chars": 2}, {"sum_logits": -1.7541546821594238, "num_tokens": 1, "num_tokens_all": 1063, "is_greedy": false, "logits_per_token": -1.7541546821594238, "logits_per_char": -0.8770773410797119, "num_chars": 2}, {"sum_logits": -1.710924506187439, "num_tokens": 1, "num_tokens_all": 1063, "is_greedy": false, "logits_per_token": -1.710924506187439, "logits_per_char": -0.8554622530937195, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 308, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2178583145141602, "incorrect_loss_raw": 1.506142218907674, "correct_loss_per_char": 0.6089291572570801, "incorrect_loss_per_char": 0.753071109453837, "correct_loss_per_token": 1.2178583145141602, "incorrect_loss_per_token": 1.506142218907674, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1226990222930908, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": true, "logits_per_token": -1.1226990222930908, "logits_per_char": -0.5613495111465454, "num_chars": 2}, {"sum_logits": -1.2178583145141602, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.2178583145141602, "logits_per_char": -0.6089291572570801, "num_chars": 2}, {"sum_logits": -1.829282522201538, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.829282522201538, "logits_per_char": -0.914641261100769, "num_chars": 2}, {"sum_logits": -1.5664451122283936, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.5664451122283936, "logits_per_char": -0.7832225561141968, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 309, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5740478038787842, "incorrect_loss_raw": 1.3917354345321655, "correct_loss_per_char": 0.7870239019393921, "incorrect_loss_per_char": 0.6958677172660828, "correct_loss_per_token": 1.5740478038787842, "incorrect_loss_per_token": 1.3917354345321655, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0853760242462158, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": true, "logits_per_token": -1.0853760242462158, "logits_per_char": -0.5426880121231079, "num_chars": 2}, {"sum_logits": -1.242145299911499, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.242145299911499, "logits_per_char": -0.6210726499557495, "num_chars": 2}, {"sum_logits": -1.8476849794387817, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.8476849794387817, "logits_per_char": -0.9238424897193909, "num_chars": 2}, {"sum_logits": -1.5740478038787842, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.5740478038787842, "logits_per_char": -0.7870239019393921, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 310, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0794039964675903, "incorrect_loss_raw": 1.554664969444275, "correct_loss_per_char": 0.5397019982337952, "incorrect_loss_per_char": 0.7773324847221375, "correct_loss_per_token": 1.0794039964675903, "incorrect_loss_per_token": 1.554664969444275, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2544742822647095, "num_tokens": 1, "num_tokens_all": 1099, "is_greedy": false, "logits_per_token": -1.2544742822647095, "logits_per_char": -0.6272371411323547, "num_chars": 2}, {"sum_logits": -1.0794039964675903, "num_tokens": 1, "num_tokens_all": 1099, "is_greedy": true, "logits_per_token": -1.0794039964675903, "logits_per_char": -0.5397019982337952, "num_chars": 2}, {"sum_logits": -1.809023380279541, "num_tokens": 1, "num_tokens_all": 1099, "is_greedy": false, "logits_per_token": -1.809023380279541, "logits_per_char": -0.9045116901397705, "num_chars": 2}, {"sum_logits": -1.6004972457885742, "num_tokens": 1, "num_tokens_all": 1099, "is_greedy": false, "logits_per_token": -1.6004972457885742, "logits_per_char": -0.8002486228942871, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 311, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0869786739349365, "incorrect_loss_raw": 1.5503766934076946, "correct_loss_per_char": 0.5434893369674683, "incorrect_loss_per_char": 0.7751883467038473, "correct_loss_per_token": 1.0869786739349365, "incorrect_loss_per_token": 1.5503766934076946, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0869786739349365, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": true, "logits_per_token": -1.0869786739349365, "logits_per_char": -0.5434893369674683, "num_chars": 2}, {"sum_logits": -1.247807264328003, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.247807264328003, "logits_per_char": -0.6239036321640015, "num_chars": 2}, {"sum_logits": -1.8348206281661987, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.8348206281661987, "logits_per_char": -0.9174103140830994, "num_chars": 2}, {"sum_logits": -1.5685021877288818, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.5685021877288818, "logits_per_char": -0.7842510938644409, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 312, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4889776706695557, "incorrect_loss_raw": 1.4232668081919353, "correct_loss_per_char": 0.7444888353347778, "incorrect_loss_per_char": 0.7116334040959676, "correct_loss_per_token": 1.4889776706695557, "incorrect_loss_per_token": 1.4232668081919353, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.142374038696289, "num_tokens": 1, "num_tokens_all": 1099, "is_greedy": true, "logits_per_token": -1.142374038696289, "logits_per_char": -0.5711870193481445, "num_chars": 2}, {"sum_logits": -1.2015256881713867, "num_tokens": 1, "num_tokens_all": 1099, "is_greedy": false, "logits_per_token": -1.2015256881713867, "logits_per_char": -0.6007628440856934, "num_chars": 2}, {"sum_logits": -1.9259006977081299, "num_tokens": 1, "num_tokens_all": 1099, "is_greedy": false, "logits_per_token": -1.9259006977081299, "logits_per_char": -0.9629503488540649, "num_chars": 2}, {"sum_logits": -1.4889776706695557, "num_tokens": 1, "num_tokens_all": 1099, "is_greedy": false, "logits_per_token": -1.4889776706695557, "logits_per_char": -0.7444888353347778, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 313, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3158007860183716, "incorrect_loss_raw": 1.4450854063034058, "correct_loss_per_char": 0.6579003930091858, "incorrect_loss_per_char": 0.7225427031517029, "correct_loss_per_token": 1.3158007860183716, "incorrect_loss_per_token": 1.4450854063034058, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3158007860183716, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": false, "logits_per_token": -1.3158007860183716, "logits_per_char": -0.6579003930091858, "num_chars": 2}, {"sum_logits": -1.1793121099472046, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": true, "logits_per_token": -1.1793121099472046, "logits_per_char": -0.5896560549736023, "num_chars": 2}, {"sum_logits": -1.7267283201217651, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": false, "logits_per_token": -1.7267283201217651, "logits_per_char": -0.8633641600608826, "num_chars": 2}, {"sum_logits": -1.4292157888412476, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": false, "logits_per_token": -1.4292157888412476, "logits_per_char": -0.7146078944206238, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 314, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6198036670684814, "incorrect_loss_raw": 1.3853910366694133, "correct_loss_per_char": 0.8099018335342407, "incorrect_loss_per_char": 0.6926955183347067, "correct_loss_per_token": 1.6198036670684814, "incorrect_loss_per_token": 1.3853910366694133, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1736146211624146, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -1.1736146211624146, "logits_per_char": -0.5868073105812073, "num_chars": 2}, {"sum_logits": -1.108799695968628, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": true, "logits_per_token": -1.108799695968628, "logits_per_char": -0.554399847984314, "num_chars": 2}, {"sum_logits": -1.8737587928771973, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -1.8737587928771973, "logits_per_char": -0.9368793964385986, "num_chars": 2}, {"sum_logits": -1.6198036670684814, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -1.6198036670684814, "logits_per_char": -0.8099018335342407, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 315, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3941864967346191, "incorrect_loss_raw": 1.4179371198018391, "correct_loss_per_char": 0.6970932483673096, "incorrect_loss_per_char": 0.7089685599009196, "correct_loss_per_token": 1.3941864967346191, "incorrect_loss_per_token": 1.4179371198018391, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1477373838424683, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": true, "logits_per_token": -1.1477373838424683, "logits_per_char": -0.5738686919212341, "num_chars": 2}, {"sum_logits": -1.3934228420257568, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.3934228420257568, "logits_per_char": -0.6967114210128784, "num_chars": 2}, {"sum_logits": -1.7126511335372925, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.7126511335372925, "logits_per_char": -0.8563255667686462, "num_chars": 2}, {"sum_logits": -1.3941864967346191, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.3941864967346191, "logits_per_char": -0.6970932483673096, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 316, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.199201226234436, "incorrect_loss_raw": 1.5366235574086506, "correct_loss_per_char": 0.599600613117218, "incorrect_loss_per_char": 0.7683117787043253, "correct_loss_per_token": 1.199201226234436, "incorrect_loss_per_token": 1.5366235574086506, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1113604307174683, "num_tokens": 1, "num_tokens_all": 1063, "is_greedy": true, "logits_per_token": -1.1113604307174683, "logits_per_char": -0.5556802153587341, "num_chars": 2}, {"sum_logits": -1.199201226234436, "num_tokens": 1, "num_tokens_all": 1063, "is_greedy": false, "logits_per_token": -1.199201226234436, "logits_per_char": -0.599600613117218, "num_chars": 2}, {"sum_logits": -2.0365138053894043, "num_tokens": 1, "num_tokens_all": 1063, "is_greedy": false, "logits_per_token": -2.0365138053894043, "logits_per_char": -1.0182569026947021, "num_chars": 2}, {"sum_logits": -1.4619964361190796, "num_tokens": 1, "num_tokens_all": 1063, "is_greedy": false, "logits_per_token": -1.4619964361190796, "logits_per_char": -0.7309982180595398, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 317, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1881698369979858, "incorrect_loss_raw": 1.5887470245361328, "correct_loss_per_char": 0.5940849184989929, "incorrect_loss_per_char": 0.7943735122680664, "correct_loss_per_token": 1.1881698369979858, "incorrect_loss_per_token": 1.5887470245361328, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9461818933486938, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": true, "logits_per_token": -0.9461818933486938, "logits_per_char": -0.4730909466743469, "num_chars": 2}, {"sum_logits": -1.1881698369979858, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.1881698369979858, "logits_per_char": -0.5940849184989929, "num_chars": 2}, {"sum_logits": -2.009800434112549, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -2.009800434112549, "logits_per_char": -1.0049002170562744, "num_chars": 2}, {"sum_logits": -1.8102587461471558, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.8102587461471558, "logits_per_char": -0.9051293730735779, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 318, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.069446086883545, "incorrect_loss_raw": 1.541019598642985, "correct_loss_per_char": 0.5347230434417725, "incorrect_loss_per_char": 0.7705097993214926, "correct_loss_per_token": 1.069446086883545, "incorrect_loss_per_token": 1.541019598642985, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.069446086883545, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": true, "logits_per_token": -1.069446086883545, "logits_per_char": -0.5347230434417725, "num_chars": 2}, {"sum_logits": -1.33418607711792, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -1.33418607711792, "logits_per_char": -0.66709303855896, "num_chars": 2}, {"sum_logits": -1.659173846244812, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -1.659173846244812, "logits_per_char": -0.829586923122406, "num_chars": 2}, {"sum_logits": -1.6296988725662231, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -1.6296988725662231, "logits_per_char": -0.8148494362831116, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 319, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.002868175506592, "incorrect_loss_raw": 1.2996110518773396, "correct_loss_per_char": 1.001434087753296, "incorrect_loss_per_char": 0.6498055259386698, "correct_loss_per_token": 2.002868175506592, "incorrect_loss_per_token": 1.2996110518773396, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9847265481948853, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": true, "logits_per_token": -0.9847265481948853, "logits_per_char": -0.4923632740974426, "num_chars": 2}, {"sum_logits": -1.2144622802734375, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": false, "logits_per_token": -1.2144622802734375, "logits_per_char": -0.6072311401367188, "num_chars": 2}, {"sum_logits": -2.002868175506592, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": false, "logits_per_token": -2.002868175506592, "logits_per_char": -1.001434087753296, "num_chars": 2}, {"sum_logits": -1.6996443271636963, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": false, "logits_per_token": -1.6996443271636963, "logits_per_char": -0.8498221635818481, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 320, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5155768394470215, "incorrect_loss_raw": 1.3916856050491333, "correct_loss_per_char": 0.7577884197235107, "incorrect_loss_per_char": 0.6958428025245667, "correct_loss_per_token": 1.5155768394470215, "incorrect_loss_per_token": 1.3916856050491333, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2281928062438965, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": false, "logits_per_token": -1.2281928062438965, "logits_per_char": -0.6140964031219482, "num_chars": 2}, {"sum_logits": -1.164239525794983, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": true, "logits_per_token": -1.164239525794983, "logits_per_char": -0.5821197628974915, "num_chars": 2}, {"sum_logits": -1.7826244831085205, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": false, "logits_per_token": -1.7826244831085205, "logits_per_char": -0.8913122415542603, "num_chars": 2}, {"sum_logits": -1.5155768394470215, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": false, "logits_per_token": -1.5155768394470215, "logits_per_char": -0.7577884197235107, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 321, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1786491870880127, "incorrect_loss_raw": 1.4808268944422405, "correct_loss_per_char": 0.5893245935440063, "incorrect_loss_per_char": 0.7404134472211202, "correct_loss_per_token": 1.1786491870880127, "incorrect_loss_per_token": 1.4808268944422405, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1786491870880127, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": true, "logits_per_token": -1.1786491870880127, "logits_per_char": -0.5893245935440063, "num_chars": 2}, {"sum_logits": -1.3814750909805298, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.3814750909805298, "logits_per_char": -0.6907375454902649, "num_chars": 2}, {"sum_logits": -1.6032109260559082, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.6032109260559082, "logits_per_char": -0.8016054630279541, "num_chars": 2}, {"sum_logits": -1.4577946662902832, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.4577946662902832, "logits_per_char": -0.7288973331451416, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 322, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1531405448913574, "incorrect_loss_raw": 1.5386017163594563, "correct_loss_per_char": 0.5765702724456787, "incorrect_loss_per_char": 0.7693008581797282, "correct_loss_per_token": 1.1531405448913574, "incorrect_loss_per_token": 1.5386017163594563, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1419532299041748, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": true, "logits_per_token": -1.1419532299041748, "logits_per_char": -0.5709766149520874, "num_chars": 2}, {"sum_logits": -1.1531405448913574, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": false, "logits_per_token": -1.1531405448913574, "logits_per_char": -0.5765702724456787, "num_chars": 2}, {"sum_logits": -1.8618686199188232, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": false, "logits_per_token": -1.8618686199188232, "logits_per_char": -0.9309343099594116, "num_chars": 2}, {"sum_logits": -1.611983299255371, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": false, "logits_per_token": -1.611983299255371, "logits_per_char": -0.8059916496276855, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 323, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7710891962051392, "incorrect_loss_raw": 1.3486875693003337, "correct_loss_per_char": 0.8855445981025696, "incorrect_loss_per_char": 0.6743437846501669, "correct_loss_per_token": 1.7710891962051392, "incorrect_loss_per_token": 1.3486875693003337, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0080997943878174, "num_tokens": 1, "num_tokens_all": 1021, "is_greedy": true, "logits_per_token": -1.0080997943878174, "logits_per_char": -0.5040498971939087, "num_chars": 2}, {"sum_logits": -1.2362143993377686, "num_tokens": 1, "num_tokens_all": 1021, "is_greedy": false, "logits_per_token": -1.2362143993377686, "logits_per_char": -0.6181071996688843, "num_chars": 2}, {"sum_logits": -1.801748514175415, "num_tokens": 1, "num_tokens_all": 1021, "is_greedy": false, "logits_per_token": -1.801748514175415, "logits_per_char": -0.9008742570877075, "num_chars": 2}, {"sum_logits": -1.7710891962051392, "num_tokens": 1, "num_tokens_all": 1021, "is_greedy": false, "logits_per_token": -1.7710891962051392, "logits_per_char": -0.8855445981025696, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 324, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3178893327713013, "incorrect_loss_raw": 1.492325226465861, "correct_loss_per_char": 0.6589446663856506, "incorrect_loss_per_char": 0.7461626132329305, "correct_loss_per_token": 1.3178893327713013, "incorrect_loss_per_token": 1.492325226465861, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9917398691177368, "num_tokens": 1, "num_tokens_all": 1051, "is_greedy": true, "logits_per_token": -0.9917398691177368, "logits_per_char": -0.4958699345588684, "num_chars": 2}, {"sum_logits": -1.3178893327713013, "num_tokens": 1, "num_tokens_all": 1051, "is_greedy": false, "logits_per_token": -1.3178893327713013, "logits_per_char": -0.6589446663856506, "num_chars": 2}, {"sum_logits": -1.8380597829818726, "num_tokens": 1, "num_tokens_all": 1051, "is_greedy": false, "logits_per_token": -1.8380597829818726, "logits_per_char": -0.9190298914909363, "num_chars": 2}, {"sum_logits": -1.6471760272979736, "num_tokens": 1, "num_tokens_all": 1051, "is_greedy": false, "logits_per_token": -1.6471760272979736, "logits_per_char": -0.8235880136489868, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 325, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.980843186378479, "incorrect_loss_raw": 1.654797116915385, "correct_loss_per_char": 0.4904215931892395, "incorrect_loss_per_char": 0.8273985584576925, "correct_loss_per_token": 0.980843186378479, "incorrect_loss_per_token": 1.654797116915385, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.980843186378479, "num_tokens": 1, "num_tokens_all": 1109, "is_greedy": true, "logits_per_token": -0.980843186378479, "logits_per_char": -0.4904215931892395, "num_chars": 2}, {"sum_logits": -1.1487492322921753, "num_tokens": 1, "num_tokens_all": 1109, "is_greedy": false, "logits_per_token": -1.1487492322921753, "logits_per_char": -0.5743746161460876, "num_chars": 2}, {"sum_logits": -2.0384836196899414, "num_tokens": 1, "num_tokens_all": 1109, "is_greedy": false, "logits_per_token": -2.0384836196899414, "logits_per_char": -1.0192418098449707, "num_chars": 2}, {"sum_logits": -1.777158498764038, "num_tokens": 1, "num_tokens_all": 1109, "is_greedy": false, "logits_per_token": -1.777158498764038, "logits_per_char": -0.888579249382019, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 326, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7363169193267822, "incorrect_loss_raw": 1.3592998186747234, "correct_loss_per_char": 0.8681584596633911, "incorrect_loss_per_char": 0.6796499093373617, "correct_loss_per_token": 1.7363169193267822, "incorrect_loss_per_token": 1.3592998186747234, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0219266414642334, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": true, "logits_per_token": -1.0219266414642334, "logits_per_char": -0.5109633207321167, "num_chars": 2}, {"sum_logits": -1.2171564102172852, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.2171564102172852, "logits_per_char": -0.6085782051086426, "num_chars": 2}, {"sum_logits": -1.8388164043426514, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.8388164043426514, "logits_per_char": -0.9194082021713257, "num_chars": 2}, {"sum_logits": -1.7363169193267822, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.7363169193267822, "logits_per_char": -0.8681584596633911, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 327, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.957958459854126, "incorrect_loss_raw": 1.2889340321222942, "correct_loss_per_char": 0.978979229927063, "incorrect_loss_per_char": 0.6444670160611471, "correct_loss_per_token": 1.957958459854126, "incorrect_loss_per_token": 1.2889340321222942, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0440641641616821, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": true, "logits_per_token": -1.0440641641616821, "logits_per_char": -0.5220320820808411, "num_chars": 2}, {"sum_logits": -1.211759090423584, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.211759090423584, "logits_per_char": -0.605879545211792, "num_chars": 2}, {"sum_logits": -1.957958459854126, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.957958459854126, "logits_per_char": -0.978979229927063, "num_chars": 2}, {"sum_logits": -1.6109788417816162, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.6109788417816162, "logits_per_char": -0.8054894208908081, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 328, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9799271821975708, "incorrect_loss_raw": 1.6538522640864055, "correct_loss_per_char": 0.4899635910987854, "incorrect_loss_per_char": 0.8269261320432028, "correct_loss_per_token": 0.9799271821975708, "incorrect_loss_per_token": 1.6538522640864055, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9799271821975708, "num_tokens": 1, "num_tokens_all": 1112, "is_greedy": true, "logits_per_token": -0.9799271821975708, "logits_per_char": -0.4899635910987854, "num_chars": 2}, {"sum_logits": -1.20344877243042, "num_tokens": 1, "num_tokens_all": 1112, "is_greedy": false, "logits_per_token": -1.20344877243042, "logits_per_char": -0.60172438621521, "num_chars": 2}, {"sum_logits": -2.1182026863098145, "num_tokens": 1, "num_tokens_all": 1112, "is_greedy": false, "logits_per_token": -2.1182026863098145, "logits_per_char": -1.0591013431549072, "num_chars": 2}, {"sum_logits": -1.639905333518982, "num_tokens": 1, "num_tokens_all": 1112, "is_greedy": false, "logits_per_token": -1.639905333518982, "logits_per_char": -0.819952666759491, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 329, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5016708374023438, "incorrect_loss_raw": 1.3814178705215454, "correct_loss_per_char": 0.7508354187011719, "incorrect_loss_per_char": 0.6907089352607727, "correct_loss_per_token": 1.5016708374023438, "incorrect_loss_per_token": 1.3814178705215454, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2449307441711426, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": false, "logits_per_token": -1.2449307441711426, "logits_per_char": -0.6224653720855713, "num_chars": 2}, {"sum_logits": -1.2273118495941162, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": true, "logits_per_token": -1.2273118495941162, "logits_per_char": -0.6136559247970581, "num_chars": 2}, {"sum_logits": -1.6720110177993774, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": false, "logits_per_token": -1.6720110177993774, "logits_per_char": -0.8360055088996887, "num_chars": 2}, {"sum_logits": -1.5016708374023438, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": false, "logits_per_token": -1.5016708374023438, "logits_per_char": -0.7508354187011719, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 330, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2611045837402344, "incorrect_loss_raw": 1.4555633862813313, "correct_loss_per_char": 0.6305522918701172, "incorrect_loss_per_char": 0.7277816931406657, "correct_loss_per_token": 1.2611045837402344, "incorrect_loss_per_token": 1.4555633862813313, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2611045837402344, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": true, "logits_per_token": -1.2611045837402344, "logits_per_char": -0.6305522918701172, "num_chars": 2}, {"sum_logits": -1.298949956893921, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": false, "logits_per_token": -1.298949956893921, "logits_per_char": -0.6494749784469604, "num_chars": 2}, {"sum_logits": -1.6992510557174683, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": false, "logits_per_token": -1.6992510557174683, "logits_per_char": -0.8496255278587341, "num_chars": 2}, {"sum_logits": -1.368489146232605, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": false, "logits_per_token": -1.368489146232605, "logits_per_char": -0.6842445731163025, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 331, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.410423755645752, "incorrect_loss_raw": 1.433038592338562, "correct_loss_per_char": 0.705211877822876, "incorrect_loss_per_char": 0.716519296169281, "correct_loss_per_token": 1.410423755645752, "incorrect_loss_per_token": 1.433038592338562, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0981464385986328, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": true, "logits_per_token": -1.0981464385986328, "logits_per_char": -0.5490732192993164, "num_chars": 2}, {"sum_logits": -1.351276159286499, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.351276159286499, "logits_per_char": -0.6756380796432495, "num_chars": 2}, {"sum_logits": -1.8496931791305542, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.8496931791305542, "logits_per_char": -0.9248465895652771, "num_chars": 2}, {"sum_logits": -1.410423755645752, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.410423755645752, "logits_per_char": -0.705211877822876, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 332, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2031335830688477, "incorrect_loss_raw": 1.5065793593724568, "correct_loss_per_char": 0.6015667915344238, "incorrect_loss_per_char": 0.7532896796862284, "correct_loss_per_token": 1.2031335830688477, "incorrect_loss_per_token": 1.5065793593724568, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1411265134811401, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": true, "logits_per_token": -1.1411265134811401, "logits_per_char": -0.5705632567405701, "num_chars": 2}, {"sum_logits": -1.2031335830688477, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.2031335830688477, "logits_per_char": -0.6015667915344238, "num_chars": 2}, {"sum_logits": -1.8141568899154663, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.8141568899154663, "logits_per_char": -0.9070784449577332, "num_chars": 2}, {"sum_logits": -1.5644546747207642, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.5644546747207642, "logits_per_char": -0.7822273373603821, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 333, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7638784646987915, "incorrect_loss_raw": 1.3138678868611653, "correct_loss_per_char": 0.8819392323493958, "incorrect_loss_per_char": 0.6569339434305826, "correct_loss_per_token": 1.7638784646987915, "incorrect_loss_per_token": 1.3138678868611653, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0702043771743774, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": true, "logits_per_token": -1.0702043771743774, "logits_per_char": -0.5351021885871887, "num_chars": 2}, {"sum_logits": -1.5516951084136963, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.5516951084136963, "logits_per_char": -0.7758475542068481, "num_chars": 2}, {"sum_logits": -1.7638784646987915, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.7638784646987915, "logits_per_char": -0.8819392323493958, "num_chars": 2}, {"sum_logits": -1.3197041749954224, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.3197041749954224, "logits_per_char": -0.6598520874977112, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 334, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1412349939346313, "incorrect_loss_raw": 1.5212119817733765, "correct_loss_per_char": 0.5706174969673157, "incorrect_loss_per_char": 0.7606059908866882, "correct_loss_per_token": 1.1412349939346313, "incorrect_loss_per_token": 1.5212119817733765, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1412349939346313, "num_tokens": 1, "num_tokens_all": 1066, "is_greedy": true, "logits_per_token": -1.1412349939346313, "logits_per_char": -0.5706174969673157, "num_chars": 2}, {"sum_logits": -1.2358942031860352, "num_tokens": 1, "num_tokens_all": 1066, "is_greedy": false, "logits_per_token": -1.2358942031860352, "logits_per_char": -0.6179471015930176, "num_chars": 2}, {"sum_logits": -1.7813019752502441, "num_tokens": 1, "num_tokens_all": 1066, "is_greedy": false, "logits_per_token": -1.7813019752502441, "logits_per_char": -0.8906509876251221, "num_chars": 2}, {"sum_logits": -1.54643976688385, "num_tokens": 1, "num_tokens_all": 1066, "is_greedy": false, "logits_per_token": -1.54643976688385, "logits_per_char": -0.773219883441925, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 335, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1950831413269043, "incorrect_loss_raw": 1.510077436765035, "correct_loss_per_char": 0.5975415706634521, "incorrect_loss_per_char": 0.7550387183825175, "correct_loss_per_token": 1.1950831413269043, "incorrect_loss_per_token": 1.510077436765035, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.166439414024353, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": true, "logits_per_token": -1.166439414024353, "logits_per_char": -0.5832197070121765, "num_chars": 2}, {"sum_logits": -1.1950831413269043, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": false, "logits_per_token": -1.1950831413269043, "logits_per_char": -0.5975415706634521, "num_chars": 2}, {"sum_logits": -1.8294765949249268, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": false, "logits_per_token": -1.8294765949249268, "logits_per_char": -0.9147382974624634, "num_chars": 2}, {"sum_logits": -1.5343163013458252, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": false, "logits_per_token": -1.5343163013458252, "logits_per_char": -0.7671581506729126, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 336, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.1393699645996094, "incorrect_loss_raw": 1.2745085557301838, "correct_loss_per_char": 1.0696849822998047, "incorrect_loss_per_char": 0.6372542778650919, "correct_loss_per_token": 2.1393699645996094, "incorrect_loss_per_token": 1.2745085557301838, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0153162479400635, "num_tokens": 1, "num_tokens_all": 1119, "is_greedy": true, "logits_per_token": -1.0153162479400635, "logits_per_char": -0.5076581239700317, "num_chars": 2}, {"sum_logits": -1.1274003982543945, "num_tokens": 1, "num_tokens_all": 1119, "is_greedy": false, "logits_per_token": -1.1274003982543945, "logits_per_char": -0.5637001991271973, "num_chars": 2}, {"sum_logits": -2.1393699645996094, "num_tokens": 1, "num_tokens_all": 1119, "is_greedy": false, "logits_per_token": -2.1393699645996094, "logits_per_char": -1.0696849822998047, "num_chars": 2}, {"sum_logits": -1.6808090209960938, "num_tokens": 1, "num_tokens_all": 1119, "is_greedy": false, "logits_per_token": -1.6808090209960938, "logits_per_char": -0.8404045104980469, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 337, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5521602630615234, "incorrect_loss_raw": 1.390469233194987, "correct_loss_per_char": 0.7760801315307617, "incorrect_loss_per_char": 0.6952346165974935, "correct_loss_per_token": 1.5521602630615234, "incorrect_loss_per_token": 1.390469233194987, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0218994617462158, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": true, "logits_per_token": -1.0218994617462158, "logits_per_char": -0.5109497308731079, "num_chars": 2}, {"sum_logits": -1.4076513051986694, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -1.4076513051986694, "logits_per_char": -0.7038256525993347, "num_chars": 2}, {"sum_logits": -1.7418569326400757, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -1.7418569326400757, "logits_per_char": -0.8709284663200378, "num_chars": 2}, {"sum_logits": -1.5521602630615234, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -1.5521602630615234, "logits_per_char": -0.7760801315307617, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 338, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.860172986984253, "incorrect_loss_raw": 1.303380290667216, "correct_loss_per_char": 0.9300864934921265, "incorrect_loss_per_char": 0.651690145333608, "correct_loss_per_token": 1.860172986984253, "incorrect_loss_per_token": 1.303380290667216, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0603361129760742, "num_tokens": 1, "num_tokens_all": 1063, "is_greedy": true, "logits_per_token": -1.0603361129760742, "logits_per_char": -0.5301680564880371, "num_chars": 2}, {"sum_logits": -1.237788438796997, "num_tokens": 1, "num_tokens_all": 1063, "is_greedy": false, "logits_per_token": -1.237788438796997, "logits_per_char": -0.6188942193984985, "num_chars": 2}, {"sum_logits": -1.860172986984253, "num_tokens": 1, "num_tokens_all": 1063, "is_greedy": false, "logits_per_token": -1.860172986984253, "logits_per_char": -0.9300864934921265, "num_chars": 2}, {"sum_logits": -1.6120163202285767, "num_tokens": 1, "num_tokens_all": 1063, "is_greedy": false, "logits_per_token": -1.6120163202285767, "logits_per_char": -0.8060081601142883, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 339, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5572959184646606, "incorrect_loss_raw": 1.3792273998260498, "correct_loss_per_char": 0.7786479592323303, "incorrect_loss_per_char": 0.6896136999130249, "correct_loss_per_token": 1.5572959184646606, "incorrect_loss_per_token": 1.3792273998260498, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1151487827301025, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": true, "logits_per_token": -1.1151487827301025, "logits_per_char": -0.5575743913650513, "num_chars": 2}, {"sum_logits": -1.2762277126312256, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": false, "logits_per_token": -1.2762277126312256, "logits_per_char": -0.6381138563156128, "num_chars": 2}, {"sum_logits": -1.7463057041168213, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": false, "logits_per_token": -1.7463057041168213, "logits_per_char": -0.8731528520584106, "num_chars": 2}, {"sum_logits": -1.5572959184646606, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": false, "logits_per_token": -1.5572959184646606, "logits_per_char": -0.7786479592323303, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 340, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1722211837768555, "incorrect_loss_raw": 1.5584102471669514, "correct_loss_per_char": 0.5861105918884277, "incorrect_loss_per_char": 0.7792051235834757, "correct_loss_per_token": 1.1722211837768555, "incorrect_loss_per_token": 1.5584102471669514, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0455458164215088, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": true, "logits_per_token": -1.0455458164215088, "logits_per_char": -0.5227729082107544, "num_chars": 2}, {"sum_logits": -1.1722211837768555, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.1722211837768555, "logits_per_char": -0.5861105918884277, "num_chars": 2}, {"sum_logits": -1.9780340194702148, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.9780340194702148, "logits_per_char": -0.9890170097351074, "num_chars": 2}, {"sum_logits": -1.6516509056091309, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.6516509056091309, "logits_per_char": -0.8258254528045654, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 341, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1890324354171753, "incorrect_loss_raw": 1.5436579783757527, "correct_loss_per_char": 0.5945162177085876, "incorrect_loss_per_char": 0.7718289891878763, "correct_loss_per_token": 1.1890324354171753, "incorrect_loss_per_token": 1.5436579783757527, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0647540092468262, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": true, "logits_per_token": -1.0647540092468262, "logits_per_char": -0.5323770046234131, "num_chars": 2}, {"sum_logits": -1.1890324354171753, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.1890324354171753, "logits_per_char": -0.5945162177085876, "num_chars": 2}, {"sum_logits": -1.9581042528152466, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.9581042528152466, "logits_per_char": -0.9790521264076233, "num_chars": 2}, {"sum_logits": -1.6081156730651855, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.6081156730651855, "logits_per_char": -0.8040578365325928, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 342, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.1767406463623047, "incorrect_loss_raw": 1.2856850624084473, "correct_loss_per_char": 1.0883703231811523, "incorrect_loss_per_char": 0.6428425312042236, "correct_loss_per_token": 2.1767406463623047, "incorrect_loss_per_token": 1.2856850624084473, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9331815242767334, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": true, "logits_per_token": -0.9331815242767334, "logits_per_char": -0.4665907621383667, "num_chars": 2}, {"sum_logits": -1.152172327041626, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.152172327041626, "logits_per_char": -0.576086163520813, "num_chars": 2}, {"sum_logits": -2.1767406463623047, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -2.1767406463623047, "logits_per_char": -1.0883703231811523, "num_chars": 2}, {"sum_logits": -1.7717013359069824, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.7717013359069824, "logits_per_char": -0.8858506679534912, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 343, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5729538202285767, "incorrect_loss_raw": 1.4189106623331706, "correct_loss_per_char": 0.7864769101142883, "incorrect_loss_per_char": 0.7094553311665853, "correct_loss_per_token": 1.5729538202285767, "incorrect_loss_per_token": 1.4189106623331706, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0396020412445068, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": true, "logits_per_token": -1.0396020412445068, "logits_per_char": -0.5198010206222534, "num_chars": 2}, {"sum_logits": -1.2122166156768799, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": false, "logits_per_token": -1.2122166156768799, "logits_per_char": -0.6061083078384399, "num_chars": 2}, {"sum_logits": -2.004913330078125, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": false, "logits_per_token": -2.004913330078125, "logits_per_char": -1.0024566650390625, "num_chars": 2}, {"sum_logits": -1.5729538202285767, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": false, "logits_per_token": -1.5729538202285767, "logits_per_char": -0.7864769101142883, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 344, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1722323894500732, "incorrect_loss_raw": 1.512963096300761, "correct_loss_per_char": 0.5861161947250366, "incorrect_loss_per_char": 0.7564815481503805, "correct_loss_per_token": 1.1722323894500732, "incorrect_loss_per_token": 1.512963096300761, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1722323894500732, "num_tokens": 1, "num_tokens_all": 986, "is_greedy": false, "logits_per_token": -1.1722323894500732, "logits_per_char": -0.5861161947250366, "num_chars": 2}, {"sum_logits": -1.1644141674041748, "num_tokens": 1, "num_tokens_all": 986, "is_greedy": true, "logits_per_token": -1.1644141674041748, "logits_per_char": -0.5822070837020874, "num_chars": 2}, {"sum_logits": -1.729411005973816, "num_tokens": 1, "num_tokens_all": 986, "is_greedy": false, "logits_per_token": -1.729411005973816, "logits_per_char": -0.864705502986908, "num_chars": 2}, {"sum_logits": -1.645064115524292, "num_tokens": 1, "num_tokens_all": 986, "is_greedy": false, "logits_per_token": -1.645064115524292, "logits_per_char": -0.822532057762146, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 345, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.491248369216919, "incorrect_loss_raw": 1.42006520430247, "correct_loss_per_char": 0.7456241846084595, "incorrect_loss_per_char": 0.710032602151235, "correct_loss_per_token": 1.491248369216919, "incorrect_loss_per_token": 1.42006520430247, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0262749195098877, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": true, "logits_per_token": -1.0262749195098877, "logits_per_char": -0.5131374597549438, "num_chars": 2}, {"sum_logits": -1.3757072687149048, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.3757072687149048, "logits_per_char": -0.6878536343574524, "num_chars": 2}, {"sum_logits": -1.8582134246826172, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.8582134246826172, "logits_per_char": -0.9291067123413086, "num_chars": 2}, {"sum_logits": -1.491248369216919, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.491248369216919, "logits_per_char": -0.7456241846084595, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 346, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6474353075027466, "incorrect_loss_raw": 1.336700479189555, "correct_loss_per_char": 0.8237176537513733, "incorrect_loss_per_char": 0.6683502395947775, "correct_loss_per_token": 1.6474353075027466, "incorrect_loss_per_token": 1.336700479189555, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2886316776275635, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": false, "logits_per_token": -1.2886316776275635, "logits_per_char": -0.6443158388137817, "num_chars": 2}, {"sum_logits": -1.1374574899673462, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": true, "logits_per_token": -1.1374574899673462, "logits_per_char": -0.5687287449836731, "num_chars": 2}, {"sum_logits": -1.6474353075027466, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": false, "logits_per_token": -1.6474353075027466, "logits_per_char": -0.8237176537513733, "num_chars": 2}, {"sum_logits": -1.5840122699737549, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": false, "logits_per_token": -1.5840122699737549, "logits_per_char": -0.7920061349868774, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 347, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.190758228302002, "incorrect_loss_raw": 1.5366859436035156, "correct_loss_per_char": 0.595379114151001, "incorrect_loss_per_char": 0.7683429718017578, "correct_loss_per_token": 1.190758228302002, "incorrect_loss_per_token": 1.5366859436035156, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0766819715499878, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": true, "logits_per_token": -1.0766819715499878, "logits_per_char": -0.5383409857749939, "num_chars": 2}, {"sum_logits": -1.190758228302002, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.190758228302002, "logits_per_char": -0.595379114151001, "num_chars": 2}, {"sum_logits": -1.943960189819336, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.943960189819336, "logits_per_char": -0.971980094909668, "num_chars": 2}, {"sum_logits": -1.5894156694412231, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.5894156694412231, "logits_per_char": -0.7947078347206116, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 348, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3896957635879517, "incorrect_loss_raw": 1.413878361384074, "correct_loss_per_char": 0.6948478817939758, "incorrect_loss_per_char": 0.706939180692037, "correct_loss_per_token": 1.3896957635879517, "incorrect_loss_per_token": 1.413878361384074, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1671028137207031, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -1.1671028137207031, "logits_per_char": -0.5835514068603516, "num_chars": 2}, {"sum_logits": -1.4113028049468994, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.4113028049468994, "logits_per_char": -0.7056514024734497, "num_chars": 2}, {"sum_logits": -1.6632294654846191, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.6632294654846191, "logits_per_char": -0.8316147327423096, "num_chars": 2}, {"sum_logits": -1.3896957635879517, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.3896957635879517, "logits_per_char": -0.6948478817939758, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 349, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2133538722991943, "incorrect_loss_raw": 1.4932109117507935, "correct_loss_per_char": 0.6066769361495972, "incorrect_loss_per_char": 0.7466054558753967, "correct_loss_per_token": 1.2133538722991943, "incorrect_loss_per_token": 1.4932109117507935, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2133538722991943, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": true, "logits_per_token": -1.2133538722991943, "logits_per_char": -0.6066769361495972, "num_chars": 2}, {"sum_logits": -1.2466238737106323, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -1.2466238737106323, "logits_per_char": -0.6233119368553162, "num_chars": 2}, {"sum_logits": -1.8406435251235962, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -1.8406435251235962, "logits_per_char": -0.9203217625617981, "num_chars": 2}, {"sum_logits": -1.3923653364181519, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -1.3923653364181519, "logits_per_char": -0.6961826682090759, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 350, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2967891693115234, "incorrect_loss_raw": 1.5536174774169922, "correct_loss_per_char": 0.6483945846557617, "incorrect_loss_per_char": 0.7768087387084961, "correct_loss_per_token": 1.2967891693115234, "incorrect_loss_per_token": 1.5536174774169922, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8974528312683105, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": true, "logits_per_token": -0.8974528312683105, "logits_per_char": -0.4487264156341553, "num_chars": 2}, {"sum_logits": -1.2967891693115234, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.2967891693115234, "logits_per_char": -0.6483945846557617, "num_chars": 2}, {"sum_logits": -2.061084032058716, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -2.061084032058716, "logits_per_char": -1.030542016029358, "num_chars": 2}, {"sum_logits": -1.7023155689239502, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.7023155689239502, "logits_per_char": -0.8511577844619751, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 351, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4788180589675903, "incorrect_loss_raw": 1.4113928476969402, "correct_loss_per_char": 0.7394090294837952, "incorrect_loss_per_char": 0.7056964238484701, "correct_loss_per_token": 1.4788180589675903, "incorrect_loss_per_token": 1.4113928476969402, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0259050130844116, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": true, "logits_per_token": -1.0259050130844116, "logits_per_char": -0.5129525065422058, "num_chars": 2}, {"sum_logits": -1.4788180589675903, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": false, "logits_per_token": -1.4788180589675903, "logits_per_char": -0.7394090294837952, "num_chars": 2}, {"sum_logits": -1.747489333152771, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": false, "logits_per_token": -1.747489333152771, "logits_per_char": -0.8737446665763855, "num_chars": 2}, {"sum_logits": -1.4607841968536377, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": false, "logits_per_token": -1.4607841968536377, "logits_per_char": -0.7303920984268188, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 352, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.225912094116211, "incorrect_loss_raw": 1.5044971307118733, "correct_loss_per_char": 0.6129560470581055, "incorrect_loss_per_char": 0.7522485653559366, "correct_loss_per_token": 1.225912094116211, "incorrect_loss_per_token": 1.5044971307118733, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.136336088180542, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": true, "logits_per_token": -1.136336088180542, "logits_per_char": -0.568168044090271, "num_chars": 2}, {"sum_logits": -1.225912094116211, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.225912094116211, "logits_per_char": -0.6129560470581055, "num_chars": 2}, {"sum_logits": -1.8730634450912476, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.8730634450912476, "logits_per_char": -0.9365317225456238, "num_chars": 2}, {"sum_logits": -1.5040918588638306, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.5040918588638306, "logits_per_char": -0.7520459294319153, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 353, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3791426420211792, "incorrect_loss_raw": 1.4305331707000732, "correct_loss_per_char": 0.6895713210105896, "incorrect_loss_per_char": 0.7152665853500366, "correct_loss_per_token": 1.3791426420211792, "incorrect_loss_per_token": 1.4305331707000732, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1173298358917236, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": true, "logits_per_token": -1.1173298358917236, "logits_per_char": -0.5586649179458618, "num_chars": 2}, {"sum_logits": -1.3791426420211792, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": false, "logits_per_token": -1.3791426420211792, "logits_per_char": -0.6895713210105896, "num_chars": 2}, {"sum_logits": -1.7403724193572998, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": false, "logits_per_token": -1.7403724193572998, "logits_per_char": -0.8701862096786499, "num_chars": 2}, {"sum_logits": -1.4338972568511963, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": false, "logits_per_token": -1.4338972568511963, "logits_per_char": -0.7169486284255981, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 354, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2297872304916382, "incorrect_loss_raw": 1.512668530146281, "correct_loss_per_char": 0.6148936152458191, "incorrect_loss_per_char": 0.7563342650731405, "correct_loss_per_token": 1.2297872304916382, "incorrect_loss_per_token": 1.512668530146281, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0709242820739746, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": true, "logits_per_token": -1.0709242820739746, "logits_per_char": -0.5354621410369873, "num_chars": 2}, {"sum_logits": -1.2297872304916382, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": false, "logits_per_token": -1.2297872304916382, "logits_per_char": -0.6148936152458191, "num_chars": 2}, {"sum_logits": -1.8631255626678467, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": false, "logits_per_token": -1.8631255626678467, "logits_per_char": -0.9315627813339233, "num_chars": 2}, {"sum_logits": -1.6039557456970215, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": false, "logits_per_token": -1.6039557456970215, "logits_per_char": -0.8019778728485107, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 355, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5405954122543335, "incorrect_loss_raw": 1.3647181987762451, "correct_loss_per_char": 0.7702977061271667, "incorrect_loss_per_char": 0.6823590993881226, "correct_loss_per_token": 1.5405954122543335, "incorrect_loss_per_token": 1.3647181987762451, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.297059178352356, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": false, "logits_per_token": -1.297059178352356, "logits_per_char": -0.648529589176178, "num_chars": 2}, {"sum_logits": -1.1975167989730835, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": true, "logits_per_token": -1.1975167989730835, "logits_per_char": -0.5987583994865417, "num_chars": 2}, {"sum_logits": -1.599578619003296, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": false, "logits_per_token": -1.599578619003296, "logits_per_char": -0.799789309501648, "num_chars": 2}, {"sum_logits": -1.5405954122543335, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": false, "logits_per_token": -1.5405954122543335, "logits_per_char": -0.7702977061271667, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 356, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0906479358673096, "incorrect_loss_raw": 1.3015385468800862, "correct_loss_per_char": 1.0453239679336548, "incorrect_loss_per_char": 0.6507692734400431, "correct_loss_per_token": 2.0906479358673096, "incorrect_loss_per_token": 1.3015385468800862, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9197559356689453, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": true, "logits_per_token": -0.9197559356689453, "logits_per_char": -0.45987796783447266, "num_chars": 2}, {"sum_logits": -1.2152736186981201, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -1.2152736186981201, "logits_per_char": -0.6076368093490601, "num_chars": 2}, {"sum_logits": -2.0906479358673096, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -2.0906479358673096, "logits_per_char": -1.0453239679336548, "num_chars": 2}, {"sum_logits": -1.7695860862731934, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -1.7695860862731934, "logits_per_char": -0.8847930431365967, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 357, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1848640441894531, "incorrect_loss_raw": 1.5852203965187073, "correct_loss_per_char": 0.5924320220947266, "incorrect_loss_per_char": 0.7926101982593536, "correct_loss_per_token": 1.1848640441894531, "incorrect_loss_per_token": 1.5852203965187073, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9540829062461853, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": true, "logits_per_token": -0.9540829062461853, "logits_per_char": -0.47704145312309265, "num_chars": 2}, {"sum_logits": -1.1848640441894531, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.1848640441894531, "logits_per_char": -0.5924320220947266, "num_chars": 2}, {"sum_logits": -2.0274996757507324, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -2.0274996757507324, "logits_per_char": -1.0137498378753662, "num_chars": 2}, {"sum_logits": -1.774078607559204, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.774078607559204, "logits_per_char": -0.887039303779602, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 358, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2517774105072021, "incorrect_loss_raw": 1.510431965192159, "correct_loss_per_char": 0.6258887052536011, "incorrect_loss_per_char": 0.7552159825960795, "correct_loss_per_token": 1.2517774105072021, "incorrect_loss_per_token": 1.510431965192159, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.033444881439209, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": true, "logits_per_token": -1.033444881439209, "logits_per_char": -0.5167224407196045, "num_chars": 2}, {"sum_logits": -1.2517774105072021, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": false, "logits_per_token": -1.2517774105072021, "logits_per_char": -0.6258887052536011, "num_chars": 2}, {"sum_logits": -1.863349437713623, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": false, "logits_per_token": -1.863349437713623, "logits_per_char": -0.9316747188568115, "num_chars": 2}, {"sum_logits": -1.634501576423645, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": false, "logits_per_token": -1.634501576423645, "logits_per_char": -0.8172507882118225, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 359, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1695489883422852, "incorrect_loss_raw": 1.5934009949366252, "correct_loss_per_char": 0.5847744941711426, "incorrect_loss_per_char": 0.7967004974683126, "correct_loss_per_token": 1.1695489883422852, "incorrect_loss_per_token": 1.5934009949366252, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9968017339706421, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": true, "logits_per_token": -0.9968017339706421, "logits_per_char": -0.49840086698532104, "num_chars": 2}, {"sum_logits": -1.1695489883422852, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -1.1695489883422852, "logits_per_char": -0.5847744941711426, "num_chars": 2}, {"sum_logits": -2.133769989013672, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -2.133769989013672, "logits_per_char": -1.066884994506836, "num_chars": 2}, {"sum_logits": -1.6496312618255615, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -1.6496312618255615, "logits_per_char": -0.8248156309127808, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 360, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.644873023033142, "incorrect_loss_raw": 1.3755824565887451, "correct_loss_per_char": 0.822436511516571, "incorrect_loss_per_char": 0.6877912282943726, "correct_loss_per_token": 1.644873023033142, "incorrect_loss_per_token": 1.3755824565887451, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1015335321426392, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": true, "logits_per_token": -1.1015335321426392, "logits_per_char": -0.5507667660713196, "num_chars": 2}, {"sum_logits": -1.179558277130127, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.179558277130127, "logits_per_char": -0.5897791385650635, "num_chars": 2}, {"sum_logits": -1.8456555604934692, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.8456555604934692, "logits_per_char": -0.9228277802467346, "num_chars": 2}, {"sum_logits": -1.644873023033142, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.644873023033142, "logits_per_char": -0.822436511516571, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 361, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.801682949066162, "incorrect_loss_raw": 1.3918212254842122, "correct_loss_per_char": 0.900841474533081, "incorrect_loss_per_char": 0.6959106127421061, "correct_loss_per_token": 1.801682949066162, "incorrect_loss_per_token": 1.3918212254842122, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0221316814422607, "num_tokens": 1, "num_tokens_all": 1117, "is_greedy": true, "logits_per_token": -1.0221316814422607, "logits_per_char": -0.5110658407211304, "num_chars": 2}, {"sum_logits": -1.0776050090789795, "num_tokens": 1, "num_tokens_all": 1117, "is_greedy": false, "logits_per_token": -1.0776050090789795, "logits_per_char": -0.5388025045394897, "num_chars": 2}, {"sum_logits": -2.0757269859313965, "num_tokens": 1, "num_tokens_all": 1117, "is_greedy": false, "logits_per_token": -2.0757269859313965, "logits_per_char": -1.0378634929656982, "num_chars": 2}, {"sum_logits": -1.801682949066162, "num_tokens": 1, "num_tokens_all": 1117, "is_greedy": false, "logits_per_token": -1.801682949066162, "logits_per_char": -0.900841474533081, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 362, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1555485725402832, "incorrect_loss_raw": 1.5253764788309734, "correct_loss_per_char": 0.5777742862701416, "incorrect_loss_per_char": 0.7626882394154867, "correct_loss_per_token": 1.1555485725402832, "incorrect_loss_per_token": 1.5253764788309734, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1555485725402832, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": true, "logits_per_token": -1.1555485725402832, "logits_per_char": -0.5777742862701416, "num_chars": 2}, {"sum_logits": -1.1918338537216187, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": false, "logits_per_token": -1.1918338537216187, "logits_per_char": -0.5959169268608093, "num_chars": 2}, {"sum_logits": -1.8287043571472168, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": false, "logits_per_token": -1.8287043571472168, "logits_per_char": -0.9143521785736084, "num_chars": 2}, {"sum_logits": -1.5555912256240845, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": false, "logits_per_token": -1.5555912256240845, "logits_per_char": -0.7777956128120422, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 363, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2922831773757935, "incorrect_loss_raw": 1.4666069348653157, "correct_loss_per_char": 0.6461415886878967, "incorrect_loss_per_char": 0.7333034674326578, "correct_loss_per_token": 1.2922831773757935, "incorrect_loss_per_token": 1.4666069348653157, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1294481754302979, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": true, "logits_per_token": -1.1294481754302979, "logits_per_char": -0.5647240877151489, "num_chars": 2}, {"sum_logits": -1.2922831773757935, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.2922831773757935, "logits_per_char": -0.6461415886878967, "num_chars": 2}, {"sum_logits": -1.7836220264434814, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.7836220264434814, "logits_per_char": -0.8918110132217407, "num_chars": 2}, {"sum_logits": -1.486750602722168, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.486750602722168, "logits_per_char": -0.743375301361084, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 364, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1188158988952637, "incorrect_loss_raw": 1.5591309865315754, "correct_loss_per_char": 0.5594079494476318, "incorrect_loss_per_char": 0.7795654932657877, "correct_loss_per_token": 1.1188158988952637, "incorrect_loss_per_token": 1.5591309865315754, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1188158988952637, "num_tokens": 1, "num_tokens_all": 1109, "is_greedy": true, "logits_per_token": -1.1188158988952637, "logits_per_char": -0.5594079494476318, "num_chars": 2}, {"sum_logits": -1.1589393615722656, "num_tokens": 1, "num_tokens_all": 1109, "is_greedy": false, "logits_per_token": -1.1589393615722656, "logits_per_char": -0.5794696807861328, "num_chars": 2}, {"sum_logits": -1.9570766687393188, "num_tokens": 1, "num_tokens_all": 1109, "is_greedy": false, "logits_per_token": -1.9570766687393188, "logits_per_char": -0.9785383343696594, "num_chars": 2}, {"sum_logits": -1.561376929283142, "num_tokens": 1, "num_tokens_all": 1109, "is_greedy": false, "logits_per_token": -1.561376929283142, "logits_per_char": -0.780688464641571, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 365, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1347944736480713, "incorrect_loss_raw": 1.529301921526591, "correct_loss_per_char": 0.5673972368240356, "incorrect_loss_per_char": 0.7646509607632955, "correct_loss_per_token": 1.1347944736480713, "incorrect_loss_per_token": 1.529301921526591, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1347944736480713, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": true, "logits_per_token": -1.1347944736480713, "logits_per_char": -0.5673972368240356, "num_chars": 2}, {"sum_logits": -1.236986756324768, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.236986756324768, "logits_per_char": -0.618493378162384, "num_chars": 2}, {"sum_logits": -1.8438619375228882, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.8438619375228882, "logits_per_char": -0.9219309687614441, "num_chars": 2}, {"sum_logits": -1.5070570707321167, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.5070570707321167, "logits_per_char": -0.7535285353660583, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 366, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4846076965332031, "incorrect_loss_raw": 1.4185067613919575, "correct_loss_per_char": 0.7423038482666016, "incorrect_loss_per_char": 0.7092533806959788, "correct_loss_per_token": 1.4846076965332031, "incorrect_loss_per_token": 1.4185067613919575, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9886564612388611, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": true, "logits_per_token": -0.9886564612388611, "logits_per_char": -0.49432823061943054, "num_chars": 2}, {"sum_logits": -1.5063345432281494, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": false, "logits_per_token": -1.5063345432281494, "logits_per_char": -0.7531672716140747, "num_chars": 2}, {"sum_logits": -1.7605292797088623, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": false, "logits_per_token": -1.7605292797088623, "logits_per_char": -0.8802646398544312, "num_chars": 2}, {"sum_logits": -1.4846076965332031, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": false, "logits_per_token": -1.4846076965332031, "logits_per_char": -0.7423038482666016, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 367, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5766677856445312, "incorrect_loss_raw": 1.3695402145385742, "correct_loss_per_char": 0.7883338928222656, "incorrect_loss_per_char": 0.6847701072692871, "correct_loss_per_token": 1.5766677856445312, "incorrect_loss_per_token": 1.3695402145385742, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1646955013275146, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": true, "logits_per_token": -1.1646955013275146, "logits_per_char": -0.5823477506637573, "num_chars": 2}, {"sum_logits": -1.2382843494415283, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": false, "logits_per_token": -1.2382843494415283, "logits_per_char": -0.6191421747207642, "num_chars": 2}, {"sum_logits": -1.7056407928466797, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": false, "logits_per_token": -1.7056407928466797, "logits_per_char": -0.8528203964233398, "num_chars": 2}, {"sum_logits": -1.5766677856445312, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": false, "logits_per_token": -1.5766677856445312, "logits_per_char": -0.7883338928222656, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 368, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6955077648162842, "incorrect_loss_raw": 1.361947496732076, "correct_loss_per_char": 0.8477538824081421, "incorrect_loss_per_char": 0.680973748366038, "correct_loss_per_token": 1.6955077648162842, "incorrect_loss_per_token": 1.361947496732076, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0662118196487427, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": true, "logits_per_token": -1.0662118196487427, "logits_per_char": -0.5331059098243713, "num_chars": 2}, {"sum_logits": -1.2034177780151367, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": false, "logits_per_token": -1.2034177780151367, "logits_per_char": -0.6017088890075684, "num_chars": 2}, {"sum_logits": -1.8162128925323486, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": false, "logits_per_token": -1.8162128925323486, "logits_per_char": -0.9081064462661743, "num_chars": 2}, {"sum_logits": -1.6955077648162842, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": false, "logits_per_token": -1.6955077648162842, "logits_per_char": -0.8477538824081421, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 369, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.454391598701477, "incorrect_loss_raw": 1.405482331911723, "correct_loss_per_char": 0.7271957993507385, "incorrect_loss_per_char": 0.7027411659558614, "correct_loss_per_token": 1.454391598701477, "incorrect_loss_per_token": 1.405482331911723, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1038691997528076, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -1.1038691997528076, "logits_per_char": -0.5519345998764038, "num_chars": 2}, {"sum_logits": -1.3807659149169922, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.3807659149169922, "logits_per_char": -0.6903829574584961, "num_chars": 2}, {"sum_logits": -1.7318118810653687, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.7318118810653687, "logits_per_char": -0.8659059405326843, "num_chars": 2}, {"sum_logits": -1.454391598701477, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.454391598701477, "logits_per_char": -0.7271957993507385, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 370, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0503199100494385, "incorrect_loss_raw": 1.5986491044362385, "correct_loss_per_char": 0.5251599550247192, "incorrect_loss_per_char": 0.7993245522181193, "correct_loss_per_token": 1.0503199100494385, "incorrect_loss_per_token": 1.5986491044362385, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0503199100494385, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": true, "logits_per_token": -1.0503199100494385, "logits_per_char": -0.5251599550247192, "num_chars": 2}, {"sum_logits": -1.1604294776916504, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.1604294776916504, "logits_per_char": -0.5802147388458252, "num_chars": 2}, {"sum_logits": -1.9525710344314575, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.9525710344314575, "logits_per_char": -0.9762855172157288, "num_chars": 2}, {"sum_logits": -1.682946801185608, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.682946801185608, "logits_per_char": -0.841473400592804, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 371, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5151982307434082, "incorrect_loss_raw": 1.3680485884348552, "correct_loss_per_char": 0.7575991153717041, "incorrect_loss_per_char": 0.6840242942174276, "correct_loss_per_token": 1.5151982307434082, "incorrect_loss_per_token": 1.3680485884348552, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2033050060272217, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": true, "logits_per_token": -1.2033050060272217, "logits_per_char": -0.6016525030136108, "num_chars": 2}, {"sum_logits": -1.3903928995132446, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": false, "logits_per_token": -1.3903928995132446, "logits_per_char": -0.6951964497566223, "num_chars": 2}, {"sum_logits": -1.5151982307434082, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": false, "logits_per_token": -1.5151982307434082, "logits_per_char": -0.7575991153717041, "num_chars": 2}, {"sum_logits": -1.5104478597640991, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": false, "logits_per_token": -1.5104478597640991, "logits_per_char": -0.7552239298820496, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 372, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9580461978912354, "incorrect_loss_raw": 1.379229446252187, "correct_loss_per_char": 0.9790230989456177, "incorrect_loss_per_char": 0.6896147231260935, "correct_loss_per_token": 1.9580461978912354, "incorrect_loss_per_token": 1.379229446252187, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8381554484367371, "num_tokens": 1, "num_tokens_all": 1127, "is_greedy": true, "logits_per_token": -0.8381554484367371, "logits_per_char": -0.41907772421836853, "num_chars": 2}, {"sum_logits": -1.2496662139892578, "num_tokens": 1, "num_tokens_all": 1127, "is_greedy": false, "logits_per_token": -1.2496662139892578, "logits_per_char": -0.6248331069946289, "num_chars": 2}, {"sum_logits": -2.0498666763305664, "num_tokens": 1, "num_tokens_all": 1127, "is_greedy": false, "logits_per_token": -2.0498666763305664, "logits_per_char": -1.0249333381652832, "num_chars": 2}, {"sum_logits": -1.9580461978912354, "num_tokens": 1, "num_tokens_all": 1127, "is_greedy": false, "logits_per_token": -1.9580461978912354, "logits_per_char": -0.9790230989456177, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 373, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.854912519454956, "incorrect_loss_raw": 1.286536971728007, "correct_loss_per_char": 0.927456259727478, "incorrect_loss_per_char": 0.6432684858640035, "correct_loss_per_token": 1.854912519454956, "incorrect_loss_per_token": 1.286536971728007, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1648471355438232, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": true, "logits_per_token": -1.1648471355438232, "logits_per_char": -0.5824235677719116, "num_chars": 2}, {"sum_logits": -1.2645065784454346, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": false, "logits_per_token": -1.2645065784454346, "logits_per_char": -0.6322532892227173, "num_chars": 2}, {"sum_logits": -1.854912519454956, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": false, "logits_per_token": -1.854912519454956, "logits_per_char": -0.927456259727478, "num_chars": 2}, {"sum_logits": -1.4302572011947632, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": false, "logits_per_token": -1.4302572011947632, "logits_per_char": -0.7151286005973816, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 374, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.073911428451538, "incorrect_loss_raw": 1.624431888262431, "correct_loss_per_char": 0.536955714225769, "incorrect_loss_per_char": 0.8122159441312155, "correct_loss_per_token": 1.073911428451538, "incorrect_loss_per_token": 1.624431888262431, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0448768138885498, "num_tokens": 1, "num_tokens_all": 1157, "is_greedy": true, "logits_per_token": -1.0448768138885498, "logits_per_char": -0.5224384069442749, "num_chars": 2}, {"sum_logits": -1.073911428451538, "num_tokens": 1, "num_tokens_all": 1157, "is_greedy": false, "logits_per_token": -1.073911428451538, "logits_per_char": -0.536955714225769, "num_chars": 2}, {"sum_logits": -2.0053482055664062, "num_tokens": 1, "num_tokens_all": 1157, "is_greedy": false, "logits_per_token": -2.0053482055664062, "logits_per_char": -1.0026741027832031, "num_chars": 2}, {"sum_logits": -1.8230706453323364, "num_tokens": 1, "num_tokens_all": 1157, "is_greedy": false, "logits_per_token": -1.8230706453323364, "logits_per_char": -0.9115353226661682, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 375, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7240808010101318, "incorrect_loss_raw": 1.3228757778803508, "correct_loss_per_char": 0.8620404005050659, "incorrect_loss_per_char": 0.6614378889401754, "correct_loss_per_token": 1.7240808010101318, "incorrect_loss_per_token": 1.3228757778803508, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1099356412887573, "num_tokens": 1, "num_tokens_all": 1062, "is_greedy": true, "logits_per_token": -1.1099356412887573, "logits_per_char": -0.5549678206443787, "num_chars": 2}, {"sum_logits": -1.2770352363586426, "num_tokens": 1, "num_tokens_all": 1062, "is_greedy": false, "logits_per_token": -1.2770352363586426, "logits_per_char": -0.6385176181793213, "num_chars": 2}, {"sum_logits": -1.7240808010101318, "num_tokens": 1, "num_tokens_all": 1062, "is_greedy": false, "logits_per_token": -1.7240808010101318, "logits_per_char": -0.8620404005050659, "num_chars": 2}, {"sum_logits": -1.5816564559936523, "num_tokens": 1, "num_tokens_all": 1062, "is_greedy": false, "logits_per_token": -1.5816564559936523, "logits_per_char": -0.7908282279968262, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 376, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7161195278167725, "incorrect_loss_raw": 1.405906895796458, "correct_loss_per_char": 0.8580597639083862, "incorrect_loss_per_char": 0.702953447898229, "correct_loss_per_token": 1.7161195278167725, "incorrect_loss_per_token": 1.405906895796458, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9415120482444763, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": true, "logits_per_token": -0.9415120482444763, "logits_per_char": -0.47075602412223816, "num_chars": 2}, {"sum_logits": -1.2313387393951416, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -1.2313387393951416, "logits_per_char": -0.6156693696975708, "num_chars": 2}, {"sum_logits": -2.044869899749756, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -2.044869899749756, "logits_per_char": -1.022434949874878, "num_chars": 2}, {"sum_logits": -1.7161195278167725, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -1.7161195278167725, "logits_per_char": -0.8580597639083862, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 377, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6541903018951416, "incorrect_loss_raw": 1.381739576657613, "correct_loss_per_char": 0.8270951509475708, "incorrect_loss_per_char": 0.6908697883288065, "correct_loss_per_token": 1.6541903018951416, "incorrect_loss_per_token": 1.381739576657613, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0640769004821777, "num_tokens": 1, "num_tokens_all": 1141, "is_greedy": true, "logits_per_token": -1.0640769004821777, "logits_per_char": -0.5320384502410889, "num_chars": 2}, {"sum_logits": -1.1875885725021362, "num_tokens": 1, "num_tokens_all": 1141, "is_greedy": false, "logits_per_token": -1.1875885725021362, "logits_per_char": -0.5937942862510681, "num_chars": 2}, {"sum_logits": -1.8935532569885254, "num_tokens": 1, "num_tokens_all": 1141, "is_greedy": false, "logits_per_token": -1.8935532569885254, "logits_per_char": -0.9467766284942627, "num_chars": 2}, {"sum_logits": -1.6541903018951416, "num_tokens": 1, "num_tokens_all": 1141, "is_greedy": false, "logits_per_token": -1.6541903018951416, "logits_per_char": -0.8270951509475708, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 378, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3986095190048218, "incorrect_loss_raw": 1.4107760985692341, "correct_loss_per_char": 0.6993047595024109, "incorrect_loss_per_char": 0.7053880492846171, "correct_loss_per_token": 1.3986095190048218, "incorrect_loss_per_token": 1.4107760985692341, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2069379091262817, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": true, "logits_per_token": -1.2069379091262817, "logits_per_char": -0.6034689545631409, "num_chars": 2}, {"sum_logits": -1.3561713695526123, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": false, "logits_per_token": -1.3561713695526123, "logits_per_char": -0.6780856847763062, "num_chars": 2}, {"sum_logits": -1.6692190170288086, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": false, "logits_per_token": -1.6692190170288086, "logits_per_char": -0.8346095085144043, "num_chars": 2}, {"sum_logits": -1.3986095190048218, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": false, "logits_per_token": -1.3986095190048218, "logits_per_char": -0.6993047595024109, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 379, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.982304573059082, "incorrect_loss_raw": 1.6119366089502971, "correct_loss_per_char": 0.491152286529541, "incorrect_loss_per_char": 0.8059683044751486, "correct_loss_per_token": 0.982304573059082, "incorrect_loss_per_token": 1.6119366089502971, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.982304573059082, "num_tokens": 1, "num_tokens_all": 1058, "is_greedy": true, "logits_per_token": -0.982304573059082, "logits_per_char": -0.491152286529541, "num_chars": 2}, {"sum_logits": -1.2866909503936768, "num_tokens": 1, "num_tokens_all": 1058, "is_greedy": false, "logits_per_token": -1.2866909503936768, "logits_per_char": -0.6433454751968384, "num_chars": 2}, {"sum_logits": -1.8419674634933472, "num_tokens": 1, "num_tokens_all": 1058, "is_greedy": false, "logits_per_token": -1.8419674634933472, "logits_per_char": -0.9209837317466736, "num_chars": 2}, {"sum_logits": -1.7071514129638672, "num_tokens": 1, "num_tokens_all": 1058, "is_greedy": false, "logits_per_token": -1.7071514129638672, "logits_per_char": -0.8535757064819336, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 380, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.962990641593933, "incorrect_loss_raw": 1.291998823483785, "correct_loss_per_char": 0.9814953207969666, "incorrect_loss_per_char": 0.6459994117418925, "correct_loss_per_token": 1.962990641593933, "incorrect_loss_per_token": 1.291998823483785, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0358818769454956, "num_tokens": 1, "num_tokens_all": 1109, "is_greedy": true, "logits_per_token": -1.0358818769454956, "logits_per_char": -0.5179409384727478, "num_chars": 2}, {"sum_logits": -1.1949920654296875, "num_tokens": 1, "num_tokens_all": 1109, "is_greedy": false, "logits_per_token": -1.1949920654296875, "logits_per_char": -0.5974960327148438, "num_chars": 2}, {"sum_logits": -1.962990641593933, "num_tokens": 1, "num_tokens_all": 1109, "is_greedy": false, "logits_per_token": -1.962990641593933, "logits_per_char": -0.9814953207969666, "num_chars": 2}, {"sum_logits": -1.6451225280761719, "num_tokens": 1, "num_tokens_all": 1109, "is_greedy": false, "logits_per_token": -1.6451225280761719, "logits_per_char": -0.8225612640380859, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 381, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.028862714767456, "incorrect_loss_raw": 1.5994359652201335, "correct_loss_per_char": 0.514431357383728, "incorrect_loss_per_char": 0.7997179826100668, "correct_loss_per_token": 1.028862714767456, "incorrect_loss_per_token": 1.5994359652201335, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.028862714767456, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": true, "logits_per_token": -1.028862714767456, "logits_per_char": -0.514431357383728, "num_chars": 2}, {"sum_logits": -1.2043572664260864, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.2043572664260864, "logits_per_char": -0.6021786332130432, "num_chars": 2}, {"sum_logits": -1.8914673328399658, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.8914673328399658, "logits_per_char": -0.9457336664199829, "num_chars": 2}, {"sum_logits": -1.7024832963943481, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.7024832963943481, "logits_per_char": -0.8512416481971741, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 382, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1289533376693726, "incorrect_loss_raw": 1.5504164298375447, "correct_loss_per_char": 0.5644766688346863, "incorrect_loss_per_char": 0.7752082149187723, "correct_loss_per_token": 1.1289533376693726, "incorrect_loss_per_token": 1.5504164298375447, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1573759317398071, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.1573759317398071, "logits_per_char": -0.5786879658699036, "num_chars": 2}, {"sum_logits": -1.1289533376693726, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": true, "logits_per_token": -1.1289533376693726, "logits_per_char": -0.5644766688346863, "num_chars": 2}, {"sum_logits": -1.9237762689590454, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.9237762689590454, "logits_per_char": -0.9618881344795227, "num_chars": 2}, {"sum_logits": -1.5700970888137817, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.5700970888137817, "logits_per_char": -0.7850485444068909, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 383, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7845591306686401, "incorrect_loss_raw": 1.2981013059616089, "correct_loss_per_char": 0.8922795653343201, "incorrect_loss_per_char": 0.6490506529808044, "correct_loss_per_token": 1.7845591306686401, "incorrect_loss_per_token": 1.2981013059616089, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3083491325378418, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": false, "logits_per_token": -1.3083491325378418, "logits_per_char": -0.6541745662689209, "num_chars": 2}, {"sum_logits": -1.1563072204589844, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": true, "logits_per_token": -1.1563072204589844, "logits_per_char": -0.5781536102294922, "num_chars": 2}, {"sum_logits": -1.7845591306686401, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": false, "logits_per_token": -1.7845591306686401, "logits_per_char": -0.8922795653343201, "num_chars": 2}, {"sum_logits": -1.4296475648880005, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": false, "logits_per_token": -1.4296475648880005, "logits_per_char": -0.7148237824440002, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 384, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1475013494491577, "incorrect_loss_raw": 1.5350499550501506, "correct_loss_per_char": 0.5737506747245789, "incorrect_loss_per_char": 0.7675249775250753, "correct_loss_per_token": 1.1475013494491577, "incorrect_loss_per_token": 1.5350499550501506, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2024537324905396, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.2024537324905396, "logits_per_char": -0.6012268662452698, "num_chars": 2}, {"sum_logits": -1.1475013494491577, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": true, "logits_per_token": -1.1475013494491577, "logits_per_char": -0.5737506747245789, "num_chars": 2}, {"sum_logits": -1.8997259140014648, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.8997259140014648, "logits_per_char": -0.9498629570007324, "num_chars": 2}, {"sum_logits": -1.5029702186584473, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.5029702186584473, "logits_per_char": -0.7514851093292236, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 385, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.251303791999817, "incorrect_loss_raw": 1.4849827686945598, "correct_loss_per_char": 0.6256518959999084, "incorrect_loss_per_char": 0.7424913843472799, "correct_loss_per_token": 1.251303791999817, "incorrect_loss_per_token": 1.4849827686945598, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.251303791999817, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": false, "logits_per_token": -1.251303791999817, "logits_per_char": -0.6256518959999084, "num_chars": 2}, {"sum_logits": -1.1082370281219482, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": true, "logits_per_token": -1.1082370281219482, "logits_per_char": -0.5541185140609741, "num_chars": 2}, {"sum_logits": -1.744293212890625, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": false, "logits_per_token": -1.744293212890625, "logits_per_char": -0.8721466064453125, "num_chars": 2}, {"sum_logits": -1.602418065071106, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": false, "logits_per_token": -1.602418065071106, "logits_per_char": -0.801209032535553, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 386, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6064808368682861, "incorrect_loss_raw": 1.3335253794987996, "correct_loss_per_char": 0.8032404184341431, "incorrect_loss_per_char": 0.6667626897493998, "correct_loss_per_token": 1.6064808368682861, "incorrect_loss_per_token": 1.3335253794987996, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2881888151168823, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": true, "logits_per_token": -1.2881888151168823, "logits_per_char": -0.6440944075584412, "num_chars": 2}, {"sum_logits": -1.2920440435409546, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": false, "logits_per_token": -1.2920440435409546, "logits_per_char": -0.6460220217704773, "num_chars": 2}, {"sum_logits": -1.6064808368682861, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": false, "logits_per_token": -1.6064808368682861, "logits_per_char": -0.8032404184341431, "num_chars": 2}, {"sum_logits": -1.420343279838562, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": false, "logits_per_token": -1.420343279838562, "logits_per_char": -0.710171639919281, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 387, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2974212169647217, "incorrect_loss_raw": 1.4854857126871746, "correct_loss_per_char": 0.6487106084823608, "incorrect_loss_per_char": 0.7427428563435873, "correct_loss_per_token": 1.2974212169647217, "incorrect_loss_per_token": 1.4854857126871746, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.03870689868927, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": true, "logits_per_token": -1.03870689868927, "logits_per_char": -0.519353449344635, "num_chars": 2}, {"sum_logits": -1.2974212169647217, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.2974212169647217, "logits_per_char": -0.6487106084823608, "num_chars": 2}, {"sum_logits": -1.7735601663589478, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.7735601663589478, "logits_per_char": -0.8867800831794739, "num_chars": 2}, {"sum_logits": -1.6441900730133057, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.6441900730133057, "logits_per_char": -0.8220950365066528, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 388, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0982366800308228, "incorrect_loss_raw": 1.5466023683547974, "correct_loss_per_char": 0.5491183400154114, "incorrect_loss_per_char": 0.7733011841773987, "correct_loss_per_token": 1.0982366800308228, "incorrect_loss_per_token": 1.5466023683547974, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0982366800308228, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": true, "logits_per_token": -1.0982366800308228, "logits_per_char": -0.5491183400154114, "num_chars": 2}, {"sum_logits": -1.2570797204971313, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": false, "logits_per_token": -1.2570797204971313, "logits_per_char": -0.6285398602485657, "num_chars": 2}, {"sum_logits": -1.8488881587982178, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": false, "logits_per_token": -1.8488881587982178, "logits_per_char": -0.9244440793991089, "num_chars": 2}, {"sum_logits": -1.533839225769043, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": false, "logits_per_token": -1.533839225769043, "logits_per_char": -0.7669196128845215, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 389, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1597964763641357, "incorrect_loss_raw": 1.5379918813705444, "correct_loss_per_char": 0.5798982381820679, "incorrect_loss_per_char": 0.7689959406852722, "correct_loss_per_token": 1.1597964763641357, "incorrect_loss_per_token": 1.5379918813705444, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1597964763641357, "num_tokens": 1, "num_tokens_all": 1111, "is_greedy": false, "logits_per_token": -1.1597964763641357, "logits_per_char": -0.5798982381820679, "num_chars": 2}, {"sum_logits": -1.1262731552124023, "num_tokens": 1, "num_tokens_all": 1111, "is_greedy": true, "logits_per_token": -1.1262731552124023, "logits_per_char": -0.5631365776062012, "num_chars": 2}, {"sum_logits": -1.8963159322738647, "num_tokens": 1, "num_tokens_all": 1111, "is_greedy": false, "logits_per_token": -1.8963159322738647, "logits_per_char": -0.9481579661369324, "num_chars": 2}, {"sum_logits": -1.5913865566253662, "num_tokens": 1, "num_tokens_all": 1111, "is_greedy": false, "logits_per_token": -1.5913865566253662, "logits_per_char": -0.7956932783126831, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 390, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0459529161453247, "incorrect_loss_raw": 1.551868478457133, "correct_loss_per_char": 0.5229764580726624, "incorrect_loss_per_char": 0.7759342392285665, "correct_loss_per_token": 1.0459529161453247, "incorrect_loss_per_token": 1.551868478457133, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0459529161453247, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": true, "logits_per_token": -1.0459529161453247, "logits_per_char": -0.5229764580726624, "num_chars": 2}, {"sum_logits": -1.4096543788909912, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": false, "logits_per_token": -1.4096543788909912, "logits_per_char": -0.7048271894454956, "num_chars": 2}, {"sum_logits": -1.7395710945129395, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": false, "logits_per_token": -1.7395710945129395, "logits_per_char": -0.8697855472564697, "num_chars": 2}, {"sum_logits": -1.5063799619674683, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": false, "logits_per_token": -1.5063799619674683, "logits_per_char": -0.7531899809837341, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 391, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8448954820632935, "incorrect_loss_raw": 1.3050119876861572, "correct_loss_per_char": 0.9224477410316467, "incorrect_loss_per_char": 0.6525059938430786, "correct_loss_per_token": 1.8448954820632935, "incorrect_loss_per_token": 1.3050119876861572, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1486988067626953, "num_tokens": 1, "num_tokens_all": 1053, "is_greedy": false, "logits_per_token": -1.1486988067626953, "logits_per_char": -0.5743494033813477, "num_chars": 2}, {"sum_logits": -1.1373852491378784, "num_tokens": 1, "num_tokens_all": 1053, "is_greedy": true, "logits_per_token": -1.1373852491378784, "logits_per_char": -0.5686926245689392, "num_chars": 2}, {"sum_logits": -1.8448954820632935, "num_tokens": 1, "num_tokens_all": 1053, "is_greedy": false, "logits_per_token": -1.8448954820632935, "logits_per_char": -0.9224477410316467, "num_chars": 2}, {"sum_logits": -1.628951907157898, "num_tokens": 1, "num_tokens_all": 1053, "is_greedy": false, "logits_per_token": -1.628951907157898, "logits_per_char": -0.814475953578949, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 392, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2255842685699463, "incorrect_loss_raw": 1.5179332494735718, "correct_loss_per_char": 0.6127921342849731, "incorrect_loss_per_char": 0.7589666247367859, "correct_loss_per_token": 1.2255842685699463, "incorrect_loss_per_token": 1.5179332494735718, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.056219458580017, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": true, "logits_per_token": -1.056219458580017, "logits_per_char": -0.5281097292900085, "num_chars": 2}, {"sum_logits": -1.2255842685699463, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.2255842685699463, "logits_per_char": -0.6127921342849731, "num_chars": 2}, {"sum_logits": -1.8560543060302734, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.8560543060302734, "logits_per_char": -0.9280271530151367, "num_chars": 2}, {"sum_logits": -1.6415259838104248, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.6415259838104248, "logits_per_char": -0.8207629919052124, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 393, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1932368278503418, "incorrect_loss_raw": 1.4848539431889851, "correct_loss_per_char": 0.5966184139251709, "incorrect_loss_per_char": 0.7424269715944926, "correct_loss_per_token": 1.1932368278503418, "incorrect_loss_per_token": 1.4848539431889851, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2512623071670532, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": false, "logits_per_token": -1.2512623071670532, "logits_per_char": -0.6256311535835266, "num_chars": 2}, {"sum_logits": -1.1932368278503418, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": true, "logits_per_token": -1.1932368278503418, "logits_per_char": -0.5966184139251709, "num_chars": 2}, {"sum_logits": -1.658415675163269, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": false, "logits_per_token": -1.658415675163269, "logits_per_char": -0.8292078375816345, "num_chars": 2}, {"sum_logits": -1.5448838472366333, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": false, "logits_per_token": -1.5448838472366333, "logits_per_char": -0.7724419236183167, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 394, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8265290260314941, "incorrect_loss_raw": 1.3965312043825786, "correct_loss_per_char": 0.9132645130157471, "incorrect_loss_per_char": 0.6982656021912893, "correct_loss_per_token": 1.8265290260314941, "incorrect_loss_per_token": 1.3965312043825786, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9193615317344666, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": true, "logits_per_token": -0.9193615317344666, "logits_per_char": -0.4596807658672333, "num_chars": 2}, {"sum_logits": -1.1846843957901, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": false, "logits_per_token": -1.1846843957901, "logits_per_char": -0.59234219789505, "num_chars": 2}, {"sum_logits": -2.085547685623169, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": false, "logits_per_token": -2.085547685623169, "logits_per_char": -1.0427738428115845, "num_chars": 2}, {"sum_logits": -1.8265290260314941, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": false, "logits_per_token": -1.8265290260314941, "logits_per_char": -0.9132645130157471, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 395, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1083039045333862, "incorrect_loss_raw": 1.5769434372584026, "correct_loss_per_char": 0.5541519522666931, "incorrect_loss_per_char": 0.7884717186292013, "correct_loss_per_token": 1.1083039045333862, "incorrect_loss_per_token": 1.5769434372584026, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1218453645706177, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.1218453645706177, "logits_per_char": -0.5609226822853088, "num_chars": 2}, {"sum_logits": -1.1083039045333862, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": true, "logits_per_token": -1.1083039045333862, "logits_per_char": -0.5541519522666931, "num_chars": 2}, {"sum_logits": -2.0300087928771973, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -2.0300087928771973, "logits_per_char": -1.0150043964385986, "num_chars": 2}, {"sum_logits": -1.5789761543273926, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.5789761543273926, "logits_per_char": -0.7894880771636963, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 396, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8922686576843262, "incorrect_loss_raw": 1.3134442965189617, "correct_loss_per_char": 0.9461343288421631, "incorrect_loss_per_char": 0.6567221482594808, "correct_loss_per_token": 1.8922686576843262, "incorrect_loss_per_token": 1.3134442965189617, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0851821899414062, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": true, "logits_per_token": -1.0851821899414062, "logits_per_char": -0.5425910949707031, "num_chars": 2}, {"sum_logits": -1.1293675899505615, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": false, "logits_per_token": -1.1293675899505615, "logits_per_char": -0.5646837949752808, "num_chars": 2}, {"sum_logits": -1.8922686576843262, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": false, "logits_per_token": -1.8922686576843262, "logits_per_char": -0.9461343288421631, "num_chars": 2}, {"sum_logits": -1.725783109664917, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": false, "logits_per_token": -1.725783109664917, "logits_per_char": -0.8628915548324585, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 397, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.234533429145813, "incorrect_loss_raw": 1.4968740542729695, "correct_loss_per_char": 0.6172667145729065, "incorrect_loss_per_char": 0.7484370271364847, "correct_loss_per_token": 1.234533429145813, "incorrect_loss_per_token": 1.4968740542729695, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.117770791053772, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": true, "logits_per_token": -1.117770791053772, "logits_per_char": -0.558885395526886, "num_chars": 2}, {"sum_logits": -1.234533429145813, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -1.234533429145813, "logits_per_char": -0.6172667145729065, "num_chars": 2}, {"sum_logits": -1.8130990266799927, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -1.8130990266799927, "logits_per_char": -0.9065495133399963, "num_chars": 2}, {"sum_logits": -1.559752345085144, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -1.559752345085144, "logits_per_char": -0.779876172542572, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 398, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2589569091796875, "incorrect_loss_raw": 1.5053149064381917, "correct_loss_per_char": 0.6294784545898438, "incorrect_loss_per_char": 0.7526574532190958, "correct_loss_per_token": 1.2589569091796875, "incorrect_loss_per_token": 1.5053149064381917, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0528075695037842, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": true, "logits_per_token": -1.0528075695037842, "logits_per_char": -0.5264037847518921, "num_chars": 2}, {"sum_logits": -1.2589569091796875, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": false, "logits_per_token": -1.2589569091796875, "logits_per_char": -0.6294784545898438, "num_chars": 2}, {"sum_logits": -1.90510892868042, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": false, "logits_per_token": -1.90510892868042, "logits_per_char": -0.95255446434021, "num_chars": 2}, {"sum_logits": -1.558028221130371, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": false, "logits_per_token": -1.558028221130371, "logits_per_char": -0.7790141105651855, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 399, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0686852931976318, "incorrect_loss_raw": 1.5813748836517334, "correct_loss_per_char": 0.5343426465988159, "incorrect_loss_per_char": 0.7906874418258667, "correct_loss_per_token": 1.0686852931976318, "incorrect_loss_per_token": 1.5813748836517334, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0686852931976318, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": true, "logits_per_token": -1.0686852931976318, "logits_per_char": -0.5343426465988159, "num_chars": 2}, {"sum_logits": -1.1637773513793945, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.1637773513793945, "logits_per_char": -0.5818886756896973, "num_chars": 2}, {"sum_logits": -1.887481689453125, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.887481689453125, "logits_per_char": -0.9437408447265625, "num_chars": 2}, {"sum_logits": -1.6928656101226807, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.6928656101226807, "logits_per_char": -0.8464328050613403, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 400, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1323201656341553, "incorrect_loss_raw": 1.5739607016245525, "correct_loss_per_char": 0.5661600828170776, "incorrect_loss_per_char": 0.7869803508122762, "correct_loss_per_token": 1.1323201656341553, "incorrect_loss_per_token": 1.5739607016245525, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.102924108505249, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": true, "logits_per_token": -1.102924108505249, "logits_per_char": -0.5514620542526245, "num_chars": 2}, {"sum_logits": -1.1323201656341553, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -1.1323201656341553, "logits_per_char": -0.5661600828170776, "num_chars": 2}, {"sum_logits": -2.041790008544922, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -2.041790008544922, "logits_per_char": -1.020895004272461, "num_chars": 2}, {"sum_logits": -1.5771679878234863, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -1.5771679878234863, "logits_per_char": -0.7885839939117432, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 401, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2275972366333008, "incorrect_loss_raw": 1.5150266885757446, "correct_loss_per_char": 0.6137986183166504, "incorrect_loss_per_char": 0.7575133442878723, "correct_loss_per_token": 1.2275972366333008, "incorrect_loss_per_token": 1.5150266885757446, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0798723697662354, "num_tokens": 1, "num_tokens_all": 1096, "is_greedy": true, "logits_per_token": -1.0798723697662354, "logits_per_char": -0.5399361848831177, "num_chars": 2}, {"sum_logits": -1.2275972366333008, "num_tokens": 1, "num_tokens_all": 1096, "is_greedy": false, "logits_per_token": -1.2275972366333008, "logits_per_char": -0.6137986183166504, "num_chars": 2}, {"sum_logits": -1.8952170610427856, "num_tokens": 1, "num_tokens_all": 1096, "is_greedy": false, "logits_per_token": -1.8952170610427856, "logits_per_char": -0.9476085305213928, "num_chars": 2}, {"sum_logits": -1.569990634918213, "num_tokens": 1, "num_tokens_all": 1096, "is_greedy": false, "logits_per_token": -1.569990634918213, "logits_per_char": -0.7849953174591064, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 402, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.669379472732544, "incorrect_loss_raw": 1.410802920659383, "correct_loss_per_char": 0.834689736366272, "incorrect_loss_per_char": 0.7054014603296915, "correct_loss_per_token": 1.669379472732544, "incorrect_loss_per_token": 1.410802920659383, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0136206150054932, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": true, "logits_per_token": -1.0136206150054932, "logits_per_char": -0.5068103075027466, "num_chars": 2}, {"sum_logits": -1.1671791076660156, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -1.1671791076660156, "logits_per_char": -0.5835895538330078, "num_chars": 2}, {"sum_logits": -2.0516090393066406, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -2.0516090393066406, "logits_per_char": -1.0258045196533203, "num_chars": 2}, {"sum_logits": -1.669379472732544, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -1.669379472732544, "logits_per_char": -0.834689736366272, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 403, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.1549887657165527, "incorrect_loss_raw": 1.2701730529467266, "correct_loss_per_char": 1.0774943828582764, "incorrect_loss_per_char": 0.6350865264733633, "correct_loss_per_token": 2.1549887657165527, "incorrect_loss_per_token": 1.2701730529467266, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9571223855018616, "num_tokens": 1, "num_tokens_all": 1122, "is_greedy": true, "logits_per_token": -0.9571223855018616, "logits_per_char": -0.4785611927509308, "num_chars": 2}, {"sum_logits": -1.2252922058105469, "num_tokens": 1, "num_tokens_all": 1122, "is_greedy": false, "logits_per_token": -1.2252922058105469, "logits_per_char": -0.6126461029052734, "num_chars": 2}, {"sum_logits": -2.1549887657165527, "num_tokens": 1, "num_tokens_all": 1122, "is_greedy": false, "logits_per_token": -2.1549887657165527, "logits_per_char": -1.0774943828582764, "num_chars": 2}, {"sum_logits": -1.628104567527771, "num_tokens": 1, "num_tokens_all": 1122, "is_greedy": false, "logits_per_token": -1.628104567527771, "logits_per_char": -0.8140522837638855, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 404, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4388420581817627, "incorrect_loss_raw": 1.4351174036661785, "correct_loss_per_char": 0.7194210290908813, "incorrect_loss_per_char": 0.7175587018330892, "correct_loss_per_token": 1.4388420581817627, "incorrect_loss_per_token": 1.4351174036661785, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9889717102050781, "num_tokens": 1, "num_tokens_all": 941, "is_greedy": true, "logits_per_token": -0.9889717102050781, "logits_per_char": -0.49448585510253906, "num_chars": 2}, {"sum_logits": -1.4388420581817627, "num_tokens": 1, "num_tokens_all": 941, "is_greedy": false, "logits_per_token": -1.4388420581817627, "logits_per_char": -0.7194210290908813, "num_chars": 2}, {"sum_logits": -1.7785601615905762, "num_tokens": 1, "num_tokens_all": 941, "is_greedy": false, "logits_per_token": -1.7785601615905762, "logits_per_char": -0.8892800807952881, "num_chars": 2}, {"sum_logits": -1.5378203392028809, "num_tokens": 1, "num_tokens_all": 941, "is_greedy": false, "logits_per_token": -1.5378203392028809, "logits_per_char": -0.7689101696014404, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 405, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6750876903533936, "incorrect_loss_raw": 1.387303392092387, "correct_loss_per_char": 0.8375438451766968, "incorrect_loss_per_char": 0.6936516960461935, "correct_loss_per_token": 1.6750876903533936, "incorrect_loss_per_token": 1.387303392092387, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0195232629776, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": true, "logits_per_token": -1.0195232629776, "logits_per_char": -0.5097616314888, "num_chars": 2}, {"sum_logits": -1.2131023406982422, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": false, "logits_per_token": -1.2131023406982422, "logits_per_char": -0.6065511703491211, "num_chars": 2}, {"sum_logits": -1.9292845726013184, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": false, "logits_per_token": -1.9292845726013184, "logits_per_char": -0.9646422863006592, "num_chars": 2}, {"sum_logits": -1.6750876903533936, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": false, "logits_per_token": -1.6750876903533936, "logits_per_char": -0.8375438451766968, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 406, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2346546649932861, "incorrect_loss_raw": 1.5660948157310486, "correct_loss_per_char": 0.6173273324966431, "incorrect_loss_per_char": 0.7830474078655243, "correct_loss_per_token": 1.2346546649932861, "incorrect_loss_per_token": 1.5660948157310486, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9384849667549133, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": true, "logits_per_token": -0.9384849667549133, "logits_per_char": -0.46924248337745667, "num_chars": 2}, {"sum_logits": -1.2346546649932861, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": false, "logits_per_token": -1.2346546649932861, "logits_per_char": -0.6173273324966431, "num_chars": 2}, {"sum_logits": -2.041966438293457, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": false, "logits_per_token": -2.041966438293457, "logits_per_char": -1.0209832191467285, "num_chars": 2}, {"sum_logits": -1.7178330421447754, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": false, "logits_per_token": -1.7178330421447754, "logits_per_char": -0.8589165210723877, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 407, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5890166759490967, "incorrect_loss_raw": 1.3790148099263508, "correct_loss_per_char": 0.7945083379745483, "incorrect_loss_per_char": 0.6895074049631754, "correct_loss_per_token": 1.5890166759490967, "incorrect_loss_per_token": 1.3790148099263508, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1433452367782593, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": true, "logits_per_token": -1.1433452367782593, "logits_per_char": -0.5716726183891296, "num_chars": 2}, {"sum_logits": -1.1849042177200317, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.1849042177200317, "logits_per_char": -0.5924521088600159, "num_chars": 2}, {"sum_logits": -1.8087949752807617, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.8087949752807617, "logits_per_char": -0.9043974876403809, "num_chars": 2}, {"sum_logits": -1.5890166759490967, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.5890166759490967, "logits_per_char": -0.7945083379745483, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 408, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3728793859481812, "incorrect_loss_raw": 1.4275180101394653, "correct_loss_per_char": 0.6864396929740906, "incorrect_loss_per_char": 0.7137590050697327, "correct_loss_per_token": 1.3728793859481812, "incorrect_loss_per_token": 1.4275180101394653, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1650488376617432, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": true, "logits_per_token": -1.1650488376617432, "logits_per_char": -0.5825244188308716, "num_chars": 2}, {"sum_logits": -1.3728793859481812, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -1.3728793859481812, "logits_per_char": -0.6864396929740906, "num_chars": 2}, {"sum_logits": -1.7428758144378662, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -1.7428758144378662, "logits_per_char": -0.8714379072189331, "num_chars": 2}, {"sum_logits": -1.3746293783187866, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -1.3746293783187866, "logits_per_char": -0.6873146891593933, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 409, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6242549419403076, "incorrect_loss_raw": 1.3455153306325276, "correct_loss_per_char": 0.8121274709701538, "incorrect_loss_per_char": 0.6727576653162638, "correct_loss_per_token": 1.6242549419403076, "incorrect_loss_per_token": 1.3455153306325276, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1611988544464111, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": true, "logits_per_token": -1.1611988544464111, "logits_per_char": -0.5805994272232056, "num_chars": 2}, {"sum_logits": -1.2641704082489014, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": false, "logits_per_token": -1.2641704082489014, "logits_per_char": -0.6320852041244507, "num_chars": 2}, {"sum_logits": -1.6242549419403076, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": false, "logits_per_token": -1.6242549419403076, "logits_per_char": -0.8121274709701538, "num_chars": 2}, {"sum_logits": -1.6111767292022705, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": false, "logits_per_token": -1.6111767292022705, "logits_per_char": -0.8055883646011353, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 410, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2800018787384033, "incorrect_loss_raw": 1.5052996476491292, "correct_loss_per_char": 0.6400009393692017, "incorrect_loss_per_char": 0.7526498238245646, "correct_loss_per_token": 1.2800018787384033, "incorrect_loss_per_token": 1.5052996476491292, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0139989852905273, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": true, "logits_per_token": -1.0139989852905273, "logits_per_char": -0.5069994926452637, "num_chars": 2}, {"sum_logits": -1.2800018787384033, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": false, "logits_per_token": -1.2800018787384033, "logits_per_char": -0.6400009393692017, "num_chars": 2}, {"sum_logits": -1.8351844549179077, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": false, "logits_per_token": -1.8351844549179077, "logits_per_char": -0.9175922274589539, "num_chars": 2}, {"sum_logits": -1.6667155027389526, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": false, "logits_per_token": -1.6667155027389526, "logits_per_char": -0.8333577513694763, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 411, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0921392440795898, "incorrect_loss_raw": 1.558257023493449, "correct_loss_per_char": 0.5460696220397949, "incorrect_loss_per_char": 0.7791285117467245, "correct_loss_per_token": 1.0921392440795898, "incorrect_loss_per_token": 1.558257023493449, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0921392440795898, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": true, "logits_per_token": -1.0921392440795898, "logits_per_char": -0.5460696220397949, "num_chars": 2}, {"sum_logits": -1.2227237224578857, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -1.2227237224578857, "logits_per_char": -0.6113618612289429, "num_chars": 2}, {"sum_logits": -1.896178960800171, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -1.896178960800171, "logits_per_char": -0.9480894804000854, "num_chars": 2}, {"sum_logits": -1.55586838722229, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -1.55586838722229, "logits_per_char": -0.777934193611145, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 412, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.074731469154358, "incorrect_loss_raw": 1.539361834526062, "correct_loss_per_char": 0.537365734577179, "incorrect_loss_per_char": 0.769680917263031, "correct_loss_per_token": 1.074731469154358, "incorrect_loss_per_token": 1.539361834526062, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.074731469154358, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": true, "logits_per_token": -1.074731469154358, "logits_per_char": -0.537365734577179, "num_chars": 2}, {"sum_logits": -1.4442158937454224, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": false, "logits_per_token": -1.4442158937454224, "logits_per_char": -0.7221079468727112, "num_chars": 2}, {"sum_logits": -1.7595064640045166, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": false, "logits_per_token": -1.7595064640045166, "logits_per_char": -0.8797532320022583, "num_chars": 2}, {"sum_logits": -1.414363145828247, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": false, "logits_per_token": -1.414363145828247, "logits_per_char": -0.7071815729141235, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 413, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7658106088638306, "incorrect_loss_raw": 1.3049053351084392, "correct_loss_per_char": 0.8829053044319153, "incorrect_loss_per_char": 0.6524526675542196, "correct_loss_per_token": 1.7658106088638306, "incorrect_loss_per_token": 1.3049053351084392, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1868517398834229, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": true, "logits_per_token": -1.1868517398834229, "logits_per_char": -0.5934258699417114, "num_chars": 2}, {"sum_logits": -1.259539246559143, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.259539246559143, "logits_per_char": -0.6297696232795715, "num_chars": 2}, {"sum_logits": -1.7658106088638306, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.7658106088638306, "logits_per_char": -0.8829053044319153, "num_chars": 2}, {"sum_logits": -1.4683250188827515, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.4683250188827515, "logits_per_char": -0.7341625094413757, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 414, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0068402290344238, "incorrect_loss_raw": 1.6385625998179119, "correct_loss_per_char": 0.5034201145172119, "incorrect_loss_per_char": 0.8192812999089559, "correct_loss_per_token": 1.0068402290344238, "incorrect_loss_per_token": 1.6385625998179119, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0068402290344238, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": true, "logits_per_token": -1.0068402290344238, "logits_per_char": -0.5034201145172119, "num_chars": 2}, {"sum_logits": -1.1409847736358643, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": false, "logits_per_token": -1.1409847736358643, "logits_per_char": -0.5704923868179321, "num_chars": 2}, {"sum_logits": -2.0239927768707275, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": false, "logits_per_token": -2.0239927768707275, "logits_per_char": -1.0119963884353638, "num_chars": 2}, {"sum_logits": -1.7507102489471436, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": false, "logits_per_token": -1.7507102489471436, "logits_per_char": -0.8753551244735718, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 415, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9980530738830566, "incorrect_loss_raw": 1.5800513823827107, "correct_loss_per_char": 0.4990265369415283, "incorrect_loss_per_char": 0.7900256911913554, "correct_loss_per_token": 0.9980530738830566, "incorrect_loss_per_token": 1.5800513823827107, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9980530738830566, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.9980530738830566, "logits_per_char": -0.4990265369415283, "num_chars": 2}, {"sum_logits": -1.4350602626800537, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.4350602626800537, "logits_per_char": -0.7175301313400269, "num_chars": 2}, {"sum_logits": -1.7676903009414673, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.7676903009414673, "logits_per_char": -0.8838451504707336, "num_chars": 2}, {"sum_logits": -1.5374035835266113, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.5374035835266113, "logits_per_char": -0.7687017917633057, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 416, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0260424613952637, "incorrect_loss_raw": 1.5983776648839314, "correct_loss_per_char": 0.5130212306976318, "incorrect_loss_per_char": 0.7991888324419657, "correct_loss_per_token": 1.0260424613952637, "incorrect_loss_per_token": 1.5983776648839314, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0260424613952637, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": true, "logits_per_token": -1.0260424613952637, "logits_per_char": -0.5130212306976318, "num_chars": 2}, {"sum_logits": -1.2366589307785034, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.2366589307785034, "logits_per_char": -0.6183294653892517, "num_chars": 2}, {"sum_logits": -1.9219242334365845, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.9219242334365845, "logits_per_char": -0.9609621167182922, "num_chars": 2}, {"sum_logits": -1.6365498304367065, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.6365498304367065, "logits_per_char": -0.8182749152183533, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 417, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6032371520996094, "incorrect_loss_raw": 1.4310887257258098, "correct_loss_per_char": 0.8016185760498047, "incorrect_loss_per_char": 0.7155443628629049, "correct_loss_per_token": 1.6032371520996094, "incorrect_loss_per_token": 1.4310887257258098, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.016481876373291, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": true, "logits_per_token": -1.016481876373291, "logits_per_char": -0.5082409381866455, "num_chars": 2}, {"sum_logits": -1.188583254814148, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.188583254814148, "logits_per_char": -0.594291627407074, "num_chars": 2}, {"sum_logits": -2.0882010459899902, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -2.0882010459899902, "logits_per_char": -1.0441005229949951, "num_chars": 2}, {"sum_logits": -1.6032371520996094, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.6032371520996094, "logits_per_char": -0.8016185760498047, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 418, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.585882544517517, "incorrect_loss_raw": 1.3979870080947876, "correct_loss_per_char": 0.7929412722587585, "incorrect_loss_per_char": 0.6989935040473938, "correct_loss_per_token": 1.585882544517517, "incorrect_loss_per_token": 1.3979870080947876, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1025753021240234, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": true, "logits_per_token": -1.1025753021240234, "logits_per_char": -0.5512876510620117, "num_chars": 2}, {"sum_logits": -1.171287178993225, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": false, "logits_per_token": -1.171287178993225, "logits_per_char": -0.5856435894966125, "num_chars": 2}, {"sum_logits": -1.9200985431671143, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": false, "logits_per_token": -1.9200985431671143, "logits_per_char": -0.9600492715835571, "num_chars": 2}, {"sum_logits": -1.585882544517517, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": false, "logits_per_token": -1.585882544517517, "logits_per_char": -0.7929412722587585, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 419, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6381429433822632, "incorrect_loss_raw": 1.3643684387207031, "correct_loss_per_char": 0.8190714716911316, "incorrect_loss_per_char": 0.6821842193603516, "correct_loss_per_token": 1.6381429433822632, "incorrect_loss_per_token": 1.3643684387207031, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0768616199493408, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": true, "logits_per_token": -1.0768616199493408, "logits_per_char": -0.5384308099746704, "num_chars": 2}, {"sum_logits": -1.2683210372924805, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": false, "logits_per_token": -1.2683210372924805, "logits_per_char": -0.6341605186462402, "num_chars": 2}, {"sum_logits": -1.747922658920288, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": false, "logits_per_token": -1.747922658920288, "logits_per_char": -0.873961329460144, "num_chars": 2}, {"sum_logits": -1.6381429433822632, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": false, "logits_per_token": -1.6381429433822632, "logits_per_char": -0.8190714716911316, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 420, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9468621015548706, "incorrect_loss_raw": 1.6684067646662395, "correct_loss_per_char": 0.4734310507774353, "incorrect_loss_per_char": 0.8342033823331197, "correct_loss_per_token": 0.9468621015548706, "incorrect_loss_per_token": 1.6684067646662395, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9468621015548706, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": true, "logits_per_token": -0.9468621015548706, "logits_per_char": -0.4734310507774353, "num_chars": 2}, {"sum_logits": -1.1974960565567017, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.1974960565567017, "logits_per_char": -0.5987480282783508, "num_chars": 2}, {"sum_logits": -2.0480875968933105, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -2.0480875968933105, "logits_per_char": -1.0240437984466553, "num_chars": 2}, {"sum_logits": -1.759636640548706, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.759636640548706, "logits_per_char": -0.879818320274353, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 421, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1805689334869385, "incorrect_loss_raw": 1.4996313254038494, "correct_loss_per_char": 0.5902844667434692, "incorrect_loss_per_char": 0.7498156627019247, "correct_loss_per_token": 1.1805689334869385, "incorrect_loss_per_token": 1.4996313254038494, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1805689334869385, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": true, "logits_per_token": -1.1805689334869385, "logits_per_char": -0.5902844667434692, "num_chars": 2}, {"sum_logits": -1.2283333539962769, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.2283333539962769, "logits_per_char": -0.6141666769981384, "num_chars": 2}, {"sum_logits": -1.725996732711792, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.725996732711792, "logits_per_char": -0.862998366355896, "num_chars": 2}, {"sum_logits": -1.544563889503479, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.544563889503479, "logits_per_char": -0.7722819447517395, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 422, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1271965503692627, "incorrect_loss_raw": 1.5243621667226155, "correct_loss_per_char": 0.5635982751846313, "incorrect_loss_per_char": 0.7621810833613077, "correct_loss_per_token": 1.1271965503692627, "incorrect_loss_per_token": 1.5243621667226155, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1271965503692627, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": true, "logits_per_token": -1.1271965503692627, "logits_per_char": -0.5635982751846313, "num_chars": 2}, {"sum_logits": -1.2439210414886475, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.2439210414886475, "logits_per_char": -0.6219605207443237, "num_chars": 2}, {"sum_logits": -1.7710236310958862, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.7710236310958862, "logits_per_char": -0.8855118155479431, "num_chars": 2}, {"sum_logits": -1.558141827583313, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.558141827583313, "logits_per_char": -0.7790709137916565, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 423, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1323274374008179, "incorrect_loss_raw": 1.5363119045893352, "correct_loss_per_char": 0.5661637187004089, "incorrect_loss_per_char": 0.7681559522946676, "correct_loss_per_token": 1.1323274374008179, "incorrect_loss_per_token": 1.5363119045893352, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1934627294540405, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.1934627294540405, "logits_per_char": -0.5967313647270203, "num_chars": 2}, {"sum_logits": -1.1323274374008179, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": true, "logits_per_token": -1.1323274374008179, "logits_per_char": -0.5661637187004089, "num_chars": 2}, {"sum_logits": -1.8732255697250366, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.8732255697250366, "logits_per_char": -0.9366127848625183, "num_chars": 2}, {"sum_logits": -1.5422474145889282, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.5422474145889282, "logits_per_char": -0.7711237072944641, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 424, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9445905685424805, "incorrect_loss_raw": 1.2681372960408528, "correct_loss_per_char": 0.9722952842712402, "incorrect_loss_per_char": 0.6340686480204264, "correct_loss_per_token": 1.9445905685424805, "incorrect_loss_per_token": 1.2681372960408528, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.196834921836853, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.196834921836853, "logits_per_char": -0.5984174609184265, "num_chars": 2}, {"sum_logits": -1.1692979335784912, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": true, "logits_per_token": -1.1692979335784912, "logits_per_char": -0.5846489667892456, "num_chars": 2}, {"sum_logits": -1.9445905685424805, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.9445905685424805, "logits_per_char": -0.9722952842712402, "num_chars": 2}, {"sum_logits": -1.4382790327072144, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.4382790327072144, "logits_per_char": -0.7191395163536072, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 425, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.32576322555542, "incorrect_loss_raw": 1.4332718054453533, "correct_loss_per_char": 0.66288161277771, "incorrect_loss_per_char": 0.7166359027226766, "correct_loss_per_token": 1.32576322555542, "incorrect_loss_per_token": 1.4332718054453533, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1912533044815063, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": true, "logits_per_token": -1.1912533044815063, "logits_per_char": -0.5956266522407532, "num_chars": 2}, {"sum_logits": -1.32576322555542, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -1.32576322555542, "logits_per_char": -0.66288161277771, "num_chars": 2}, {"sum_logits": -1.605324625968933, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -1.605324625968933, "logits_per_char": -0.8026623129844666, "num_chars": 2}, {"sum_logits": -1.5032374858856201, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -1.5032374858856201, "logits_per_char": -0.7516187429428101, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 426, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0103092193603516, "incorrect_loss_raw": 1.2688262065251668, "correct_loss_per_char": 1.0051546096801758, "incorrect_loss_per_char": 0.6344131032625834, "correct_loss_per_token": 2.0103092193603516, "incorrect_loss_per_token": 1.2688262065251668, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1030980348587036, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": true, "logits_per_token": -1.1030980348587036, "logits_per_char": -0.5515490174293518, "num_chars": 2}, {"sum_logits": -1.1553411483764648, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.1553411483764648, "logits_per_char": -0.5776705741882324, "num_chars": 2}, {"sum_logits": -2.0103092193603516, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -2.0103092193603516, "logits_per_char": -1.0051546096801758, "num_chars": 2}, {"sum_logits": -1.548039436340332, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.548039436340332, "logits_per_char": -0.774019718170166, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 427, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9255499839782715, "incorrect_loss_raw": 1.2990176677703857, "correct_loss_per_char": 0.9627749919891357, "incorrect_loss_per_char": 0.6495088338851929, "correct_loss_per_token": 1.9255499839782715, "incorrect_loss_per_token": 1.2990176677703857, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0294830799102783, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": true, "logits_per_token": -1.0294830799102783, "logits_per_char": -0.5147415399551392, "num_chars": 2}, {"sum_logits": -1.2154157161712646, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": false, "logits_per_token": -1.2154157161712646, "logits_per_char": -0.6077078580856323, "num_chars": 2}, {"sum_logits": -1.9255499839782715, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": false, "logits_per_token": -1.9255499839782715, "logits_per_char": -0.9627749919891357, "num_chars": 2}, {"sum_logits": -1.6521542072296143, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": false, "logits_per_token": -1.6521542072296143, "logits_per_char": -0.8260771036148071, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 428, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6685893535614014, "incorrect_loss_raw": 1.3670422236124675, "correct_loss_per_char": 0.8342946767807007, "incorrect_loss_per_char": 0.6835211118062338, "correct_loss_per_token": 1.6685893535614014, "incorrect_loss_per_token": 1.3670422236124675, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0937738418579102, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": true, "logits_per_token": -1.0937738418579102, "logits_per_char": -0.5468869209289551, "num_chars": 2}, {"sum_logits": -1.18110191822052, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.18110191822052, "logits_per_char": -0.59055095911026, "num_chars": 2}, {"sum_logits": -1.8262509107589722, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.8262509107589722, "logits_per_char": -0.9131254553794861, "num_chars": 2}, {"sum_logits": -1.6685893535614014, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.6685893535614014, "logits_per_char": -0.8342946767807007, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 429, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2335569858551025, "incorrect_loss_raw": 1.6457362174987793, "correct_loss_per_char": 0.6167784929275513, "incorrect_loss_per_char": 0.8228681087493896, "correct_loss_per_token": 1.2335569858551025, "incorrect_loss_per_token": 1.6457362174987793, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8218070268630981, "num_tokens": 1, "num_tokens_all": 1137, "is_greedy": true, "logits_per_token": -0.8218070268630981, "logits_per_char": -0.4109035134315491, "num_chars": 2}, {"sum_logits": -1.2335569858551025, "num_tokens": 1, "num_tokens_all": 1137, "is_greedy": false, "logits_per_token": -1.2335569858551025, "logits_per_char": -0.6167784929275513, "num_chars": 2}, {"sum_logits": -2.2412073612213135, "num_tokens": 1, "num_tokens_all": 1137, "is_greedy": false, "logits_per_token": -2.2412073612213135, "logits_per_char": -1.1206036806106567, "num_chars": 2}, {"sum_logits": -1.8741942644119263, "num_tokens": 1, "num_tokens_all": 1137, "is_greedy": false, "logits_per_token": -1.8741942644119263, "logits_per_char": -0.9370971322059631, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 430, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.014209508895874, "incorrect_loss_raw": 1.5923082033793132, "correct_loss_per_char": 0.507104754447937, "incorrect_loss_per_char": 0.7961541016896566, "correct_loss_per_token": 1.014209508895874, "incorrect_loss_per_token": 1.5923082033793132, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.014209508895874, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": true, "logits_per_token": -1.014209508895874, "logits_per_char": -0.507104754447937, "num_chars": 2}, {"sum_logits": -1.275148868560791, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": false, "logits_per_token": -1.275148868560791, "logits_per_char": -0.6375744342803955, "num_chars": 2}, {"sum_logits": -1.745626449584961, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": false, "logits_per_token": -1.745626449584961, "logits_per_char": -0.8728132247924805, "num_chars": 2}, {"sum_logits": -1.7561492919921875, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": false, "logits_per_token": -1.7561492919921875, "logits_per_char": -0.8780746459960938, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 431, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.099551200866699, "incorrect_loss_raw": 1.3892759084701538, "correct_loss_per_char": 1.0497756004333496, "incorrect_loss_per_char": 0.6946379542350769, "correct_loss_per_token": 2.099551200866699, "incorrect_loss_per_token": 1.3892759084701538, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8307503461837769, "num_tokens": 1, "num_tokens_all": 1129, "is_greedy": true, "logits_per_token": -0.8307503461837769, "logits_per_char": -0.4153751730918884, "num_chars": 2}, {"sum_logits": -1.1496927738189697, "num_tokens": 1, "num_tokens_all": 1129, "is_greedy": false, "logits_per_token": -1.1496927738189697, "logits_per_char": -0.5748463869094849, "num_chars": 2}, {"sum_logits": -2.187384605407715, "num_tokens": 1, "num_tokens_all": 1129, "is_greedy": false, "logits_per_token": -2.187384605407715, "logits_per_char": -1.0936923027038574, "num_chars": 2}, {"sum_logits": -2.099551200866699, "num_tokens": 1, "num_tokens_all": 1129, "is_greedy": false, "logits_per_token": -2.099551200866699, "logits_per_char": -1.0497756004333496, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 432, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2035183906555176, "incorrect_loss_raw": 1.490944226582845, "correct_loss_per_char": 0.6017591953277588, "incorrect_loss_per_char": 0.7454721132914225, "correct_loss_per_token": 1.2035183906555176, "incorrect_loss_per_token": 1.490944226582845, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2035183906555176, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": true, "logits_per_token": -1.2035183906555176, "logits_per_char": -0.6017591953277588, "num_chars": 2}, {"sum_logits": -1.2118586301803589, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": false, "logits_per_token": -1.2118586301803589, "logits_per_char": -0.6059293150901794, "num_chars": 2}, {"sum_logits": -1.7304490804672241, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": false, "logits_per_token": -1.7304490804672241, "logits_per_char": -0.8652245402336121, "num_chars": 2}, {"sum_logits": -1.5305249691009521, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": false, "logits_per_token": -1.5305249691009521, "logits_per_char": -0.7652624845504761, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 433, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0594220161437988, "incorrect_loss_raw": 1.5967005491256714, "correct_loss_per_char": 0.5297110080718994, "incorrect_loss_per_char": 0.7983502745628357, "correct_loss_per_token": 1.0594220161437988, "incorrect_loss_per_token": 1.5967005491256714, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0594220161437988, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": true, "logits_per_token": -1.0594220161437988, "logits_per_char": -0.5297110080718994, "num_chars": 2}, {"sum_logits": -1.1590735912322998, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.1590735912322998, "logits_per_char": -0.5795367956161499, "num_chars": 2}, {"sum_logits": -2.006554365158081, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -2.006554365158081, "logits_per_char": -1.0032771825790405, "num_chars": 2}, {"sum_logits": -1.6244736909866333, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.6244736909866333, "logits_per_char": -0.8122368454933167, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 434, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.021909236907959, "incorrect_loss_raw": 1.3009478251139324, "correct_loss_per_char": 1.0109546184539795, "incorrect_loss_per_char": 0.6504739125569662, "correct_loss_per_token": 2.021909236907959, "incorrect_loss_per_token": 1.3009478251139324, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0288606882095337, "num_tokens": 1, "num_tokens_all": 1131, "is_greedy": true, "logits_per_token": -1.0288606882095337, "logits_per_char": -0.5144303441047668, "num_chars": 2}, {"sum_logits": -1.107550859451294, "num_tokens": 1, "num_tokens_all": 1131, "is_greedy": false, "logits_per_token": -1.107550859451294, "logits_per_char": -0.553775429725647, "num_chars": 2}, {"sum_logits": -2.021909236907959, "num_tokens": 1, "num_tokens_all": 1131, "is_greedy": false, "logits_per_token": -2.021909236907959, "logits_per_char": -1.0109546184539795, "num_chars": 2}, {"sum_logits": -1.7664319276809692, "num_tokens": 1, "num_tokens_all": 1131, "is_greedy": false, "logits_per_token": -1.7664319276809692, "logits_per_char": -0.8832159638404846, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 435, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3161630630493164, "incorrect_loss_raw": 1.4445078770319622, "correct_loss_per_char": 0.6580815315246582, "incorrect_loss_per_char": 0.7222539385159811, "correct_loss_per_token": 1.3161630630493164, "incorrect_loss_per_token": 1.4445078770319622, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1505696773529053, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": true, "logits_per_token": -1.1505696773529053, "logits_per_char": -0.5752848386764526, "num_chars": 2}, {"sum_logits": -1.3161630630493164, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -1.3161630630493164, "logits_per_char": -0.6580815315246582, "num_chars": 2}, {"sum_logits": -1.647897481918335, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -1.647897481918335, "logits_per_char": -0.8239487409591675, "num_chars": 2}, {"sum_logits": -1.535056471824646, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -1.535056471824646, "logits_per_char": -0.767528235912323, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 436, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1130670309066772, "incorrect_loss_raw": 1.5205084482828777, "correct_loss_per_char": 0.5565335154533386, "incorrect_loss_per_char": 0.7602542241414388, "correct_loss_per_token": 1.1130670309066772, "incorrect_loss_per_token": 1.5205084482828777, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1130670309066772, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": true, "logits_per_token": -1.1130670309066772, "logits_per_char": -0.5565335154533386, "num_chars": 2}, {"sum_logits": -1.4432486295700073, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.4432486295700073, "logits_per_char": -0.7216243147850037, "num_chars": 2}, {"sum_logits": -1.743078351020813, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.743078351020813, "logits_per_char": -0.8715391755104065, "num_chars": 2}, {"sum_logits": -1.3751983642578125, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.3751983642578125, "logits_per_char": -0.6875991821289062, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 437, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0107311010360718, "incorrect_loss_raw": 1.5928718249003093, "correct_loss_per_char": 0.5053655505180359, "incorrect_loss_per_char": 0.7964359124501547, "correct_loss_per_token": 1.0107311010360718, "incorrect_loss_per_token": 1.5928718249003093, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0107311010360718, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": true, "logits_per_token": -1.0107311010360718, "logits_per_char": -0.5053655505180359, "num_chars": 2}, {"sum_logits": -1.3553318977355957, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.3553318977355957, "logits_per_char": -0.6776659488677979, "num_chars": 2}, {"sum_logits": -1.9497549533843994, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.9497549533843994, "logits_per_char": -0.9748774766921997, "num_chars": 2}, {"sum_logits": -1.4735286235809326, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.4735286235809326, "logits_per_char": -0.7367643117904663, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 438, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.508835792541504, "incorrect_loss_raw": 1.4168473879496257, "correct_loss_per_char": 0.754417896270752, "incorrect_loss_per_char": 0.7084236939748129, "correct_loss_per_token": 1.508835792541504, "incorrect_loss_per_token": 1.4168473879496257, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0100675821304321, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": true, "logits_per_token": -1.0100675821304321, "logits_per_char": -0.5050337910652161, "num_chars": 2}, {"sum_logits": -1.3788785934448242, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.3788785934448242, "logits_per_char": -0.6894392967224121, "num_chars": 2}, {"sum_logits": -1.8615959882736206, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.8615959882736206, "logits_per_char": -0.9307979941368103, "num_chars": 2}, {"sum_logits": -1.508835792541504, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.508835792541504, "logits_per_char": -0.754417896270752, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 439, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6662671566009521, "incorrect_loss_raw": 1.4218467871348064, "correct_loss_per_char": 0.8331335783004761, "incorrect_loss_per_char": 0.7109233935674032, "correct_loss_per_token": 1.6662671566009521, "incorrect_loss_per_token": 1.4218467871348064, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0053987503051758, "num_tokens": 1, "num_tokens_all": 1109, "is_greedy": true, "logits_per_token": -1.0053987503051758, "logits_per_char": -0.5026993751525879, "num_chars": 2}, {"sum_logits": -1.1794826984405518, "num_tokens": 1, "num_tokens_all": 1109, "is_greedy": false, "logits_per_token": -1.1794826984405518, "logits_per_char": -0.5897413492202759, "num_chars": 2}, {"sum_logits": -2.0806589126586914, "num_tokens": 1, "num_tokens_all": 1109, "is_greedy": false, "logits_per_token": -2.0806589126586914, "logits_per_char": -1.0403294563293457, "num_chars": 2}, {"sum_logits": -1.6662671566009521, "num_tokens": 1, "num_tokens_all": 1109, "is_greedy": false, "logits_per_token": -1.6662671566009521, "logits_per_char": -0.8331335783004761, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 440, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0347065925598145, "incorrect_loss_raw": 1.2955053647359211, "correct_loss_per_char": 1.0173532962799072, "incorrect_loss_per_char": 0.6477526823679606, "correct_loss_per_token": 2.0347065925598145, "incorrect_loss_per_token": 1.2955053647359211, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0275315046310425, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": true, "logits_per_token": -1.0275315046310425, "logits_per_char": -0.5137657523155212, "num_chars": 2}, {"sum_logits": -1.1366313695907593, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.1366313695907593, "logits_per_char": -0.5683156847953796, "num_chars": 2}, {"sum_logits": -2.0347065925598145, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -2.0347065925598145, "logits_per_char": -1.0173532962799072, "num_chars": 2}, {"sum_logits": -1.722353219985962, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.722353219985962, "logits_per_char": -0.861176609992981, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 441, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6127301454544067, "incorrect_loss_raw": 1.3399004538853962, "correct_loss_per_char": 0.8063650727272034, "incorrect_loss_per_char": 0.6699502269426981, "correct_loss_per_token": 1.6127301454544067, "incorrect_loss_per_token": 1.3399004538853962, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2775439023971558, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": false, "logits_per_token": -1.2775439023971558, "logits_per_char": -0.6387719511985779, "num_chars": 2}, {"sum_logits": -1.2184054851531982, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": true, "logits_per_token": -1.2184054851531982, "logits_per_char": -0.6092027425765991, "num_chars": 2}, {"sum_logits": -1.6127301454544067, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": false, "logits_per_token": -1.6127301454544067, "logits_per_char": -0.8063650727272034, "num_chars": 2}, {"sum_logits": -1.523751974105835, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": false, "logits_per_token": -1.523751974105835, "logits_per_char": -0.7618759870529175, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 442, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9432213306427002, "incorrect_loss_raw": 1.3448190887769063, "correct_loss_per_char": 0.9716106653213501, "incorrect_loss_per_char": 0.6724095443884531, "correct_loss_per_token": 1.9432213306427002, "incorrect_loss_per_token": 1.3448190887769063, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8995082974433899, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": true, "logits_per_token": -0.8995082974433899, "logits_per_char": -0.44975414872169495, "num_chars": 2}, {"sum_logits": -1.2562756538391113, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": false, "logits_per_token": -1.2562756538391113, "logits_per_char": -0.6281378269195557, "num_chars": 2}, {"sum_logits": -1.9432213306427002, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": false, "logits_per_token": -1.9432213306427002, "logits_per_char": -0.9716106653213501, "num_chars": 2}, {"sum_logits": -1.8786733150482178, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": false, "logits_per_token": -1.8786733150482178, "logits_per_char": -0.9393366575241089, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 443, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0388554334640503, "incorrect_loss_raw": 1.5848006010055542, "correct_loss_per_char": 0.5194277167320251, "incorrect_loss_per_char": 0.7924003005027771, "correct_loss_per_token": 1.0388554334640503, "incorrect_loss_per_token": 1.5848006010055542, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0388554334640503, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": true, "logits_per_token": -1.0388554334640503, "logits_per_char": -0.5194277167320251, "num_chars": 2}, {"sum_logits": -1.2139437198638916, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.2139437198638916, "logits_per_char": -0.6069718599319458, "num_chars": 2}, {"sum_logits": -1.8782132863998413, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.8782132863998413, "logits_per_char": -0.9391066431999207, "num_chars": 2}, {"sum_logits": -1.6622447967529297, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.6622447967529297, "logits_per_char": -0.8311223983764648, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 444, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9117329120635986, "incorrect_loss_raw": 1.3000866969426472, "correct_loss_per_char": 0.9558664560317993, "incorrect_loss_per_char": 0.6500433484713236, "correct_loss_per_token": 1.9117329120635986, "incorrect_loss_per_token": 1.3000866969426472, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0407531261444092, "num_tokens": 1, "num_tokens_all": 1058, "is_greedy": true, "logits_per_token": -1.0407531261444092, "logits_per_char": -0.5203765630722046, "num_chars": 2}, {"sum_logits": -1.2080289125442505, "num_tokens": 1, "num_tokens_all": 1058, "is_greedy": false, "logits_per_token": -1.2080289125442505, "logits_per_char": -0.6040144562721252, "num_chars": 2}, {"sum_logits": -1.9117329120635986, "num_tokens": 1, "num_tokens_all": 1058, "is_greedy": false, "logits_per_token": -1.9117329120635986, "logits_per_char": -0.9558664560317993, "num_chars": 2}, {"sum_logits": -1.6514780521392822, "num_tokens": 1, "num_tokens_all": 1058, "is_greedy": false, "logits_per_token": -1.6514780521392822, "logits_per_char": -0.8257390260696411, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 445, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1371780633926392, "incorrect_loss_raw": 1.5290921926498413, "correct_loss_per_char": 0.5685890316963196, "incorrect_loss_per_char": 0.7645460963249207, "correct_loss_per_token": 1.1371780633926392, "incorrect_loss_per_token": 1.5290921926498413, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1371780633926392, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": true, "logits_per_token": -1.1371780633926392, "logits_per_char": -0.5685890316963196, "num_chars": 2}, {"sum_logits": -1.1738884449005127, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.1738884449005127, "logits_per_char": -0.5869442224502563, "num_chars": 2}, {"sum_logits": -1.7821247577667236, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.7821247577667236, "logits_per_char": -0.8910623788833618, "num_chars": 2}, {"sum_logits": -1.6312633752822876, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.6312633752822876, "logits_per_char": -0.8156316876411438, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 446, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.084289789199829, "incorrect_loss_raw": 1.5266461769739788, "correct_loss_per_char": 0.5421448945999146, "incorrect_loss_per_char": 0.7633230884869894, "correct_loss_per_token": 1.084289789199829, "incorrect_loss_per_token": 1.5266461769739788, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.084289789199829, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -1.084289789199829, "logits_per_char": -0.5421448945999146, "num_chars": 2}, {"sum_logits": -1.44123113155365, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.44123113155365, "logits_per_char": -0.720615565776825, "num_chars": 2}, {"sum_logits": -1.6644861698150635, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.6644861698150635, "logits_per_char": -0.8322430849075317, "num_chars": 2}, {"sum_logits": -1.4742212295532227, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.4742212295532227, "logits_per_char": -0.7371106147766113, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 447, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.75664222240448, "incorrect_loss_raw": 1.3108864227930705, "correct_loss_per_char": 0.87832111120224, "incorrect_loss_per_char": 0.6554432113965353, "correct_loss_per_token": 1.75664222240448, "incorrect_loss_per_token": 1.3108864227930705, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.083035945892334, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": true, "logits_per_token": -1.083035945892334, "logits_per_char": -0.541517972946167, "num_chars": 2}, {"sum_logits": -1.3978848457336426, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -1.3978848457336426, "logits_per_char": -0.6989424228668213, "num_chars": 2}, {"sum_logits": -1.75664222240448, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -1.75664222240448, "logits_per_char": -0.87832111120224, "num_chars": 2}, {"sum_logits": -1.4517384767532349, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -1.4517384767532349, "logits_per_char": -0.7258692383766174, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 448, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9389132261276245, "incorrect_loss_raw": 1.2759108940760295, "correct_loss_per_char": 0.9694566130638123, "incorrect_loss_per_char": 0.6379554470380148, "correct_loss_per_token": 1.9389132261276245, "incorrect_loss_per_token": 1.2759108940760295, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1446411609649658, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": true, "logits_per_token": -1.1446411609649658, "logits_per_char": -0.5723205804824829, "num_chars": 2}, {"sum_logits": -1.1746762990951538, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.1746762990951538, "logits_per_char": -0.5873381495475769, "num_chars": 2}, {"sum_logits": -1.9389132261276245, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.9389132261276245, "logits_per_char": -0.9694566130638123, "num_chars": 2}, {"sum_logits": -1.5084152221679688, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.5084152221679688, "logits_per_char": -0.7542076110839844, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 449, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2297492027282715, "incorrect_loss_raw": 1.5166502793629963, "correct_loss_per_char": 0.6148746013641357, "incorrect_loss_per_char": 0.7583251396814982, "correct_loss_per_token": 1.2297492027282715, "incorrect_loss_per_token": 1.5166502793629963, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.054419755935669, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": true, "logits_per_token": -1.054419755935669, "logits_per_char": -0.5272098779678345, "num_chars": 2}, {"sum_logits": -1.2297492027282715, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": false, "logits_per_token": -1.2297492027282715, "logits_per_char": -0.6148746013641357, "num_chars": 2}, {"sum_logits": -1.8524580001831055, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": false, "logits_per_token": -1.8524580001831055, "logits_per_char": -0.9262290000915527, "num_chars": 2}, {"sum_logits": -1.6430730819702148, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": false, "logits_per_token": -1.6430730819702148, "logits_per_char": -0.8215365409851074, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 450, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2530004978179932, "incorrect_loss_raw": 1.4583396911621094, "correct_loss_per_char": 0.6265002489089966, "incorrect_loss_per_char": 0.7291698455810547, "correct_loss_per_token": 1.2530004978179932, "incorrect_loss_per_token": 1.4583396911621094, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.281712293624878, "num_tokens": 1, "num_tokens_all": 996, "is_greedy": false, "logits_per_token": -1.281712293624878, "logits_per_char": -0.640856146812439, "num_chars": 2}, {"sum_logits": -1.2530004978179932, "num_tokens": 1, "num_tokens_all": 996, "is_greedy": true, "logits_per_token": -1.2530004978179932, "logits_per_char": -0.6265002489089966, "num_chars": 2}, {"sum_logits": -1.6428369283676147, "num_tokens": 1, "num_tokens_all": 996, "is_greedy": false, "logits_per_token": -1.6428369283676147, "logits_per_char": -0.8214184641838074, "num_chars": 2}, {"sum_logits": -1.4504698514938354, "num_tokens": 1, "num_tokens_all": 996, "is_greedy": false, "logits_per_token": -1.4504698514938354, "logits_per_char": -0.7252349257469177, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 451, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7765909433364868, "incorrect_loss_raw": 1.3078529834747314, "correct_loss_per_char": 0.8882954716682434, "incorrect_loss_per_char": 0.6539264917373657, "correct_loss_per_token": 1.7765909433364868, "incorrect_loss_per_token": 1.3078529834747314, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.093584418296814, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": true, "logits_per_token": -1.093584418296814, "logits_per_char": -0.546792209148407, "num_chars": 2}, {"sum_logits": -1.5256550312042236, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.5256550312042236, "logits_per_char": -0.7628275156021118, "num_chars": 2}, {"sum_logits": -1.7765909433364868, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.7765909433364868, "logits_per_char": -0.8882954716682434, "num_chars": 2}, {"sum_logits": -1.3043195009231567, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.3043195009231567, "logits_per_char": -0.6521597504615784, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 452, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.71627938747406, "incorrect_loss_raw": 1.3145328760147095, "correct_loss_per_char": 0.85813969373703, "incorrect_loss_per_char": 0.6572664380073547, "correct_loss_per_token": 1.71627938747406, "incorrect_loss_per_token": 1.3145328760147095, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1825551986694336, "num_tokens": 1, "num_tokens_all": 980, "is_greedy": true, "logits_per_token": -1.1825551986694336, "logits_per_char": -0.5912775993347168, "num_chars": 2}, {"sum_logits": -1.2886545658111572, "num_tokens": 1, "num_tokens_all": 980, "is_greedy": false, "logits_per_token": -1.2886545658111572, "logits_per_char": -0.6443272829055786, "num_chars": 2}, {"sum_logits": -1.71627938747406, "num_tokens": 1, "num_tokens_all": 980, "is_greedy": false, "logits_per_token": -1.71627938747406, "logits_per_char": -0.85813969373703, "num_chars": 2}, {"sum_logits": -1.4723888635635376, "num_tokens": 1, "num_tokens_all": 980, "is_greedy": false, "logits_per_token": -1.4723888635635376, "logits_per_char": -0.7361944317817688, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 453, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6993898153305054, "incorrect_loss_raw": 1.3639968633651733, "correct_loss_per_char": 0.8496949076652527, "incorrect_loss_per_char": 0.6819984316825867, "correct_loss_per_token": 1.6993898153305054, "incorrect_loss_per_token": 1.3639968633651733, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0399670600891113, "num_tokens": 1, "num_tokens_all": 1066, "is_greedy": true, "logits_per_token": -1.0399670600891113, "logits_per_char": -0.5199835300445557, "num_chars": 2}, {"sum_logits": -1.2252905368804932, "num_tokens": 1, "num_tokens_all": 1066, "is_greedy": false, "logits_per_token": -1.2252905368804932, "logits_per_char": -0.6126452684402466, "num_chars": 2}, {"sum_logits": -1.8267329931259155, "num_tokens": 1, "num_tokens_all": 1066, "is_greedy": false, "logits_per_token": -1.8267329931259155, "logits_per_char": -0.9133664965629578, "num_chars": 2}, {"sum_logits": -1.6993898153305054, "num_tokens": 1, "num_tokens_all": 1066, "is_greedy": false, "logits_per_token": -1.6993898153305054, "logits_per_char": -0.8496949076652527, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 454, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6059564352035522, "incorrect_loss_raw": 1.388539989789327, "correct_loss_per_char": 0.8029782176017761, "incorrect_loss_per_char": 0.6942699948946635, "correct_loss_per_token": 1.6059564352035522, "incorrect_loss_per_token": 1.388539989789327, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1481913328170776, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": false, "logits_per_token": -1.1481913328170776, "logits_per_char": -0.5740956664085388, "num_chars": 2}, {"sum_logits": -1.133172869682312, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": true, "logits_per_token": -1.133172869682312, "logits_per_char": -0.566586434841156, "num_chars": 2}, {"sum_logits": -1.8842557668685913, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": false, "logits_per_token": -1.8842557668685913, "logits_per_char": -0.9421278834342957, "num_chars": 2}, {"sum_logits": -1.6059564352035522, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": false, "logits_per_token": -1.6059564352035522, "logits_per_char": -0.8029782176017761, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 455, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2880257368087769, "incorrect_loss_raw": 1.4862704277038574, "correct_loss_per_char": 0.6440128684043884, "incorrect_loss_per_char": 0.7431352138519287, "correct_loss_per_token": 1.2880257368087769, "incorrect_loss_per_token": 1.4862704277038574, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0784575939178467, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": true, "logits_per_token": -1.0784575939178467, "logits_per_char": -0.5392287969589233, "num_chars": 2}, {"sum_logits": -1.2880257368087769, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": false, "logits_per_token": -1.2880257368087769, "logits_per_char": -0.6440128684043884, "num_chars": 2}, {"sum_logits": -1.8341902494430542, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": false, "logits_per_token": -1.8341902494430542, "logits_per_char": -0.9170951247215271, "num_chars": 2}, {"sum_logits": -1.5461634397506714, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": false, "logits_per_token": -1.5461634397506714, "logits_per_char": -0.7730817198753357, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 456, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2448368072509766, "incorrect_loss_raw": 1.4746033350626628, "correct_loss_per_char": 0.6224184036254883, "incorrect_loss_per_char": 0.7373016675313314, "correct_loss_per_token": 1.2448368072509766, "incorrect_loss_per_token": 1.4746033350626628, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1889190673828125, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": true, "logits_per_token": -1.1889190673828125, "logits_per_char": -0.5944595336914062, "num_chars": 2}, {"sum_logits": -1.2448368072509766, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.2448368072509766, "logits_per_char": -0.6224184036254883, "num_chars": 2}, {"sum_logits": -1.6775264739990234, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.6775264739990234, "logits_per_char": -0.8387632369995117, "num_chars": 2}, {"sum_logits": -1.5573644638061523, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.5573644638061523, "logits_per_char": -0.7786822319030762, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 457, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0325119495391846, "incorrect_loss_raw": 1.5782335599263508, "correct_loss_per_char": 0.5162559747695923, "incorrect_loss_per_char": 0.7891167799631754, "correct_loss_per_token": 1.0325119495391846, "incorrect_loss_per_token": 1.5782335599263508, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0325119495391846, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": true, "logits_per_token": -1.0325119495391846, "logits_per_char": -0.5162559747695923, "num_chars": 2}, {"sum_logits": -1.291675090789795, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": false, "logits_per_token": -1.291675090789795, "logits_per_char": -0.6458375453948975, "num_chars": 2}, {"sum_logits": -1.819879412651062, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": false, "logits_per_token": -1.819879412651062, "logits_per_char": -0.909939706325531, "num_chars": 2}, {"sum_logits": -1.6231461763381958, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": false, "logits_per_token": -1.6231461763381958, "logits_per_char": -0.8115730881690979, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 458, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1710233688354492, "incorrect_loss_raw": 1.581575910250346, "correct_loss_per_char": 0.5855116844177246, "incorrect_loss_per_char": 0.790787955125173, "correct_loss_per_token": 1.1710233688354492, "incorrect_loss_per_token": 1.581575910250346, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9999991655349731, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": true, "logits_per_token": -0.9999991655349731, "logits_per_char": -0.4999995827674866, "num_chars": 2}, {"sum_logits": -1.1710233688354492, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.1710233688354492, "logits_per_char": -0.5855116844177246, "num_chars": 2}, {"sum_logits": -2.058349132537842, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -2.058349132537842, "logits_per_char": -1.029174566268921, "num_chars": 2}, {"sum_logits": -1.6863794326782227, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.6863794326782227, "logits_per_char": -0.8431897163391113, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 459, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.570352554321289, "incorrect_loss_raw": 1.4522466659545898, "correct_loss_per_char": 0.7851762771606445, "incorrect_loss_per_char": 0.7261233329772949, "correct_loss_per_token": 1.570352554321289, "incorrect_loss_per_token": 1.4522466659545898, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9908794164657593, "num_tokens": 1, "num_tokens_all": 1128, "is_greedy": true, "logits_per_token": -0.9908794164657593, "logits_per_char": -0.49543970823287964, "num_chars": 2}, {"sum_logits": -1.2178508043289185, "num_tokens": 1, "num_tokens_all": 1128, "is_greedy": false, "logits_per_token": -1.2178508043289185, "logits_per_char": -0.6089254021644592, "num_chars": 2}, {"sum_logits": -2.148009777069092, "num_tokens": 1, "num_tokens_all": 1128, "is_greedy": false, "logits_per_token": -2.148009777069092, "logits_per_char": -1.074004888534546, "num_chars": 2}, {"sum_logits": -1.570352554321289, "num_tokens": 1, "num_tokens_all": 1128, "is_greedy": false, "logits_per_token": -1.570352554321289, "logits_per_char": -0.7851762771606445, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 460, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3556756973266602, "incorrect_loss_raw": 1.4593237241109211, "correct_loss_per_char": 0.6778378486633301, "incorrect_loss_per_char": 0.7296618620554606, "correct_loss_per_token": 1.3556756973266602, "incorrect_loss_per_token": 1.4593237241109211, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0492944717407227, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": true, "logits_per_token": -1.0492944717407227, "logits_per_char": -0.5246472358703613, "num_chars": 2}, {"sum_logits": -1.3556756973266602, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": false, "logits_per_token": -1.3556756973266602, "logits_per_char": -0.6778378486633301, "num_chars": 2}, {"sum_logits": -1.8174036741256714, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": false, "logits_per_token": -1.8174036741256714, "logits_per_char": -0.9087018370628357, "num_chars": 2}, {"sum_logits": -1.5112730264663696, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": false, "logits_per_token": -1.5112730264663696, "logits_per_char": -0.7556365132331848, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 461, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1821914911270142, "incorrect_loss_raw": 1.5263262192408245, "correct_loss_per_char": 0.5910957455635071, "incorrect_loss_per_char": 0.7631631096204122, "correct_loss_per_token": 1.1821914911270142, "incorrect_loss_per_token": 1.5263262192408245, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1044150590896606, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": true, "logits_per_token": -1.1044150590896606, "logits_per_char": -0.5522075295448303, "num_chars": 2}, {"sum_logits": -1.1821914911270142, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": false, "logits_per_token": -1.1821914911270142, "logits_per_char": -0.5910957455635071, "num_chars": 2}, {"sum_logits": -1.8733960390090942, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": false, "logits_per_token": -1.8733960390090942, "logits_per_char": -0.9366980195045471, "num_chars": 2}, {"sum_logits": -1.6011675596237183, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": false, "logits_per_token": -1.6011675596237183, "logits_per_char": -0.8005837798118591, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 462, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5506627559661865, "incorrect_loss_raw": 1.3724888960520427, "correct_loss_per_char": 0.7753313779830933, "incorrect_loss_per_char": 0.6862444480260214, "correct_loss_per_token": 1.5506627559661865, "incorrect_loss_per_token": 1.3724888960520427, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1600661277770996, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": true, "logits_per_token": -1.1600661277770996, "logits_per_char": -0.5800330638885498, "num_chars": 2}, {"sum_logits": -1.2923736572265625, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": false, "logits_per_token": -1.2923736572265625, "logits_per_char": -0.6461868286132812, "num_chars": 2}, {"sum_logits": -1.6650269031524658, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": false, "logits_per_token": -1.6650269031524658, "logits_per_char": -0.8325134515762329, "num_chars": 2}, {"sum_logits": -1.5506627559661865, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": false, "logits_per_token": -1.5506627559661865, "logits_per_char": -0.7753313779830933, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 463, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1214121580123901, "incorrect_loss_raw": 1.5389111042022705, "correct_loss_per_char": 0.5607060790061951, "incorrect_loss_per_char": 0.7694555521011353, "correct_loss_per_token": 1.1214121580123901, "incorrect_loss_per_token": 1.5389111042022705, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2344647645950317, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": false, "logits_per_token": -1.2344647645950317, "logits_per_char": -0.6172323822975159, "num_chars": 2}, {"sum_logits": -1.1214121580123901, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": true, "logits_per_token": -1.1214121580123901, "logits_per_char": -0.5607060790061951, "num_chars": 2}, {"sum_logits": -1.859907865524292, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": false, "logits_per_token": -1.859907865524292, "logits_per_char": -0.929953932762146, "num_chars": 2}, {"sum_logits": -1.5223606824874878, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": false, "logits_per_token": -1.5223606824874878, "logits_per_char": -0.7611803412437439, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 464, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8951704502105713, "incorrect_loss_raw": 1.2799917062123616, "correct_loss_per_char": 0.9475852251052856, "incorrect_loss_per_char": 0.6399958531061808, "correct_loss_per_token": 1.8951704502105713, "incorrect_loss_per_token": 1.2799917062123616, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1872754096984863, "num_tokens": 1, "num_tokens_all": 1053, "is_greedy": false, "logits_per_token": -1.1872754096984863, "logits_per_char": -0.5936377048492432, "num_chars": 2}, {"sum_logits": -1.1747561693191528, "num_tokens": 1, "num_tokens_all": 1053, "is_greedy": true, "logits_per_token": -1.1747561693191528, "logits_per_char": -0.5873780846595764, "num_chars": 2}, {"sum_logits": -1.8951704502105713, "num_tokens": 1, "num_tokens_all": 1053, "is_greedy": false, "logits_per_token": -1.8951704502105713, "logits_per_char": -0.9475852251052856, "num_chars": 2}, {"sum_logits": -1.4779435396194458, "num_tokens": 1, "num_tokens_all": 1053, "is_greedy": false, "logits_per_token": -1.4779435396194458, "logits_per_char": -0.7389717698097229, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 465, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7572543621063232, "incorrect_loss_raw": 1.3151508569717407, "correct_loss_per_char": 0.8786271810531616, "incorrect_loss_per_char": 0.6575754284858704, "correct_loss_per_token": 1.7572543621063232, "incorrect_loss_per_token": 1.3151508569717407, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.132042646408081, "num_tokens": 1, "num_tokens_all": 1047, "is_greedy": true, "logits_per_token": -1.132042646408081, "logits_per_char": -0.5660213232040405, "num_chars": 2}, {"sum_logits": -1.2347546815872192, "num_tokens": 1, "num_tokens_all": 1047, "is_greedy": false, "logits_per_token": -1.2347546815872192, "logits_per_char": -0.6173773407936096, "num_chars": 2}, {"sum_logits": -1.7572543621063232, "num_tokens": 1, "num_tokens_all": 1047, "is_greedy": false, "logits_per_token": -1.7572543621063232, "logits_per_char": -0.8786271810531616, "num_chars": 2}, {"sum_logits": -1.5786552429199219, "num_tokens": 1, "num_tokens_all": 1047, "is_greedy": false, "logits_per_token": -1.5786552429199219, "logits_per_char": -0.7893276214599609, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 466, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0770784616470337, "incorrect_loss_raw": 1.5735320250193279, "correct_loss_per_char": 0.5385392308235168, "incorrect_loss_per_char": 0.7867660125096639, "correct_loss_per_token": 1.0770784616470337, "incorrect_loss_per_token": 1.5735320250193279, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0770784616470337, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": true, "logits_per_token": -1.0770784616470337, "logits_per_char": -0.5385392308235168, "num_chars": 2}, {"sum_logits": -1.2309916019439697, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": false, "logits_per_token": -1.2309916019439697, "logits_per_char": -0.6154958009719849, "num_chars": 2}, {"sum_logits": -1.978655457496643, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": false, "logits_per_token": -1.978655457496643, "logits_per_char": -0.9893277287483215, "num_chars": 2}, {"sum_logits": -1.5109490156173706, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": false, "logits_per_token": -1.5109490156173706, "logits_per_char": -0.7554745078086853, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 467, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0676312446594238, "incorrect_loss_raw": 1.589441140492757, "correct_loss_per_char": 0.5338156223297119, "incorrect_loss_per_char": 0.7947205702463785, "correct_loss_per_token": 1.0676312446594238, "incorrect_loss_per_token": 1.589441140492757, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0676312446594238, "num_tokens": 1, "num_tokens_all": 1129, "is_greedy": true, "logits_per_token": -1.0676312446594238, "logits_per_char": -0.5338156223297119, "num_chars": 2}, {"sum_logits": -1.1884509325027466, "num_tokens": 1, "num_tokens_all": 1129, "is_greedy": false, "logits_per_token": -1.1884509325027466, "logits_per_char": -0.5942254662513733, "num_chars": 2}, {"sum_logits": -1.992806077003479, "num_tokens": 1, "num_tokens_all": 1129, "is_greedy": false, "logits_per_token": -1.992806077003479, "logits_per_char": -0.9964030385017395, "num_chars": 2}, {"sum_logits": -1.587066411972046, "num_tokens": 1, "num_tokens_all": 1129, "is_greedy": false, "logits_per_token": -1.587066411972046, "logits_per_char": -0.793533205986023, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 468, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3160914182662964, "incorrect_loss_raw": 1.4954149723052979, "correct_loss_per_char": 0.6580457091331482, "incorrect_loss_per_char": 0.7477074861526489, "correct_loss_per_token": 1.3160914182662964, "incorrect_loss_per_token": 1.4954149723052979, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9788870811462402, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": true, "logits_per_token": -0.9788870811462402, "logits_per_char": -0.4894435405731201, "num_chars": 2}, {"sum_logits": -1.3160914182662964, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": false, "logits_per_token": -1.3160914182662964, "logits_per_char": -0.6580457091331482, "num_chars": 2}, {"sum_logits": -1.8367112874984741, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": false, "logits_per_token": -1.8367112874984741, "logits_per_char": -0.9183556437492371, "num_chars": 2}, {"sum_logits": -1.6706465482711792, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": false, "logits_per_token": -1.6706465482711792, "logits_per_char": -0.8353232741355896, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 469, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2127530574798584, "incorrect_loss_raw": 1.5505858262379963, "correct_loss_per_char": 0.6063765287399292, "incorrect_loss_per_char": 0.7752929131189982, "correct_loss_per_token": 1.2127530574798584, "incorrect_loss_per_token": 1.5505858262379963, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0134966373443604, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": true, "logits_per_token": -1.0134966373443604, "logits_per_char": -0.5067483186721802, "num_chars": 2}, {"sum_logits": -1.2127530574798584, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -1.2127530574798584, "logits_per_char": -0.6063765287399292, "num_chars": 2}, {"sum_logits": -2.021984577178955, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -2.021984577178955, "logits_per_char": -1.0109922885894775, "num_chars": 2}, {"sum_logits": -1.6162762641906738, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -1.6162762641906738, "logits_per_char": -0.8081381320953369, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 470, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5295274257659912, "incorrect_loss_raw": 1.3867807388305664, "correct_loss_per_char": 0.7647637128829956, "incorrect_loss_per_char": 0.6933903694152832, "correct_loss_per_token": 1.5295274257659912, "incorrect_loss_per_token": 1.3867807388305664, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.052270770072937, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": true, "logits_per_token": -1.052270770072937, "logits_per_char": -0.5261353850364685, "num_chars": 2}, {"sum_logits": -1.4138811826705933, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.4138811826705933, "logits_per_char": -0.7069405913352966, "num_chars": 2}, {"sum_logits": -1.694190263748169, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.694190263748169, "logits_per_char": -0.8470951318740845, "num_chars": 2}, {"sum_logits": -1.5295274257659912, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.5295274257659912, "logits_per_char": -0.7647637128829956, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 471, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4093436002731323, "incorrect_loss_raw": 1.423750678698222, "correct_loss_per_char": 0.7046718001365662, "incorrect_loss_per_char": 0.711875339349111, "correct_loss_per_token": 1.4093436002731323, "incorrect_loss_per_token": 1.423750678698222, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0645086765289307, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": true, "logits_per_token": -1.0645086765289307, "logits_per_char": -0.5322543382644653, "num_chars": 2}, {"sum_logits": -1.4093436002731323, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": false, "logits_per_token": -1.4093436002731323, "logits_per_char": -0.7046718001365662, "num_chars": 2}, {"sum_logits": -1.6909935474395752, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": false, "logits_per_token": -1.6909935474395752, "logits_per_char": -0.8454967737197876, "num_chars": 2}, {"sum_logits": -1.5157498121261597, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": false, "logits_per_token": -1.5157498121261597, "logits_per_char": -0.7578749060630798, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 472, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.650730848312378, "incorrect_loss_raw": 1.377018888791402, "correct_loss_per_char": 0.825365424156189, "incorrect_loss_per_char": 0.688509444395701, "correct_loss_per_token": 1.650730848312378, "incorrect_loss_per_token": 1.377018888791402, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.065158724784851, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": true, "logits_per_token": -1.065158724784851, "logits_per_char": -0.5325793623924255, "num_chars": 2}, {"sum_logits": -1.2218042612075806, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.2218042612075806, "logits_per_char": -0.6109021306037903, "num_chars": 2}, {"sum_logits": -1.844093680381775, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.844093680381775, "logits_per_char": -0.9220468401908875, "num_chars": 2}, {"sum_logits": -1.650730848312378, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.650730848312378, "logits_per_char": -0.825365424156189, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 473, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6311839818954468, "incorrect_loss_raw": 1.3707026243209839, "correct_loss_per_char": 0.8155919909477234, "incorrect_loss_per_char": 0.6853513121604919, "correct_loss_per_token": 1.6311839818954468, "incorrect_loss_per_token": 1.3707026243209839, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0947436094284058, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": true, "logits_per_token": -1.0947436094284058, "logits_per_char": -0.5473718047142029, "num_chars": 2}, {"sum_logits": -1.2284332513809204, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": false, "logits_per_token": -1.2284332513809204, "logits_per_char": -0.6142166256904602, "num_chars": 2}, {"sum_logits": -1.7889310121536255, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": false, "logits_per_token": -1.7889310121536255, "logits_per_char": -0.8944655060768127, "num_chars": 2}, {"sum_logits": -1.6311839818954468, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": false, "logits_per_token": -1.6311839818954468, "logits_per_char": -0.8155919909477234, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 474, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6717908382415771, "incorrect_loss_raw": 1.4188835422197978, "correct_loss_per_char": 0.8358954191207886, "incorrect_loss_per_char": 0.7094417711098989, "correct_loss_per_token": 1.6717908382415771, "incorrect_loss_per_token": 1.4188835422197978, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9780811667442322, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": true, "logits_per_token": -0.9780811667442322, "logits_per_char": -0.4890405833721161, "num_chars": 2}, {"sum_logits": -1.197577953338623, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -1.197577953338623, "logits_per_char": -0.5987889766693115, "num_chars": 2}, {"sum_logits": -2.080991506576538, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -2.080991506576538, "logits_per_char": -1.040495753288269, "num_chars": 2}, {"sum_logits": -1.6717908382415771, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -1.6717908382415771, "logits_per_char": -0.8358954191207886, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 475, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7576099634170532, "incorrect_loss_raw": 1.3219650586446126, "correct_loss_per_char": 0.8788049817085266, "incorrect_loss_per_char": 0.6609825293223063, "correct_loss_per_token": 1.7576099634170532, "incorrect_loss_per_token": 1.3219650586446126, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1325287818908691, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": true, "logits_per_token": -1.1325287818908691, "logits_per_char": -0.5662643909454346, "num_chars": 2}, {"sum_logits": -1.2052189111709595, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": false, "logits_per_token": -1.2052189111709595, "logits_per_char": -0.6026094555854797, "num_chars": 2}, {"sum_logits": -1.7576099634170532, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": false, "logits_per_token": -1.7576099634170532, "logits_per_char": -0.8788049817085266, "num_chars": 2}, {"sum_logits": -1.6281474828720093, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": false, "logits_per_token": -1.6281474828720093, "logits_per_char": -0.8140737414360046, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 476, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.018285870552063, "incorrect_loss_raw": 1.6124250491460164, "correct_loss_per_char": 0.5091429352760315, "incorrect_loss_per_char": 0.8062125245730082, "correct_loss_per_token": 1.018285870552063, "incorrect_loss_per_token": 1.6124250491460164, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.018285870552063, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": true, "logits_per_token": -1.018285870552063, "logits_per_char": -0.5091429352760315, "num_chars": 2}, {"sum_logits": -1.1909542083740234, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.1909542083740234, "logits_per_char": -0.5954771041870117, "num_chars": 2}, {"sum_logits": -1.9519635438919067, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.9519635438919067, "logits_per_char": -0.9759817719459534, "num_chars": 2}, {"sum_logits": -1.6943573951721191, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.6943573951721191, "logits_per_char": -0.8471786975860596, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 477, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0445523262023926, "incorrect_loss_raw": 1.2689285278320312, "correct_loss_per_char": 1.0222761631011963, "incorrect_loss_per_char": 0.6344642639160156, "correct_loss_per_token": 2.0445523262023926, "incorrect_loss_per_token": 1.2689285278320312, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0838446617126465, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": true, "logits_per_token": -1.0838446617126465, "logits_per_char": -0.5419223308563232, "num_chars": 2}, {"sum_logits": -1.1491507291793823, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": false, "logits_per_token": -1.1491507291793823, "logits_per_char": -0.5745753645896912, "num_chars": 2}, {"sum_logits": -2.0445523262023926, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": false, "logits_per_token": -2.0445523262023926, "logits_per_char": -1.0222761631011963, "num_chars": 2}, {"sum_logits": -1.573790192604065, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": false, "logits_per_token": -1.573790192604065, "logits_per_char": -0.7868950963020325, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 478, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.563002586364746, "incorrect_loss_raw": 1.414587418238322, "correct_loss_per_char": 0.781501293182373, "incorrect_loss_per_char": 0.707293709119161, "correct_loss_per_token": 1.563002586364746, "incorrect_loss_per_token": 1.414587418238322, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1068477630615234, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": true, "logits_per_token": -1.1068477630615234, "logits_per_char": -0.5534238815307617, "num_chars": 2}, {"sum_logits": -1.1549921035766602, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.1549921035766602, "logits_per_char": -0.5774960517883301, "num_chars": 2}, {"sum_logits": -1.9819223880767822, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.9819223880767822, "logits_per_char": -0.9909611940383911, "num_chars": 2}, {"sum_logits": -1.563002586364746, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.563002586364746, "logits_per_char": -0.781501293182373, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 479, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4963593482971191, "incorrect_loss_raw": 1.4162228504816692, "correct_loss_per_char": 0.7481796741485596, "incorrect_loss_per_char": 0.7081114252408346, "correct_loss_per_token": 1.4963593482971191, "incorrect_loss_per_token": 1.4162228504816692, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1302316188812256, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": true, "logits_per_token": -1.1302316188812256, "logits_per_char": -0.5651158094406128, "num_chars": 2}, {"sum_logits": -1.2193976640701294, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.2193976640701294, "logits_per_char": -0.6096988320350647, "num_chars": 2}, {"sum_logits": -1.8990392684936523, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.8990392684936523, "logits_per_char": -0.9495196342468262, "num_chars": 2}, {"sum_logits": -1.4963593482971191, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.4963593482971191, "logits_per_char": -0.7481796741485596, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 480, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9310571551322937, "incorrect_loss_raw": 1.66156800587972, "correct_loss_per_char": 0.46552857756614685, "incorrect_loss_per_char": 0.83078400293986, "correct_loss_per_token": 0.9310571551322937, "incorrect_loss_per_token": 1.66156800587972, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9310571551322937, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": true, "logits_per_token": -0.9310571551322937, "logits_per_char": -0.46552857756614685, "num_chars": 2}, {"sum_logits": -1.2638475894927979, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.2638475894927979, "logits_per_char": -0.6319237947463989, "num_chars": 2}, {"sum_logits": -1.970801830291748, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.970801830291748, "logits_per_char": -0.985400915145874, "num_chars": 2}, {"sum_logits": -1.7500545978546143, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.7500545978546143, "logits_per_char": -0.8750272989273071, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 481, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7631926536560059, "incorrect_loss_raw": 1.3828245401382446, "correct_loss_per_char": 0.8815963268280029, "incorrect_loss_per_char": 0.6914122700691223, "correct_loss_per_token": 1.7631926536560059, "incorrect_loss_per_token": 1.3828245401382446, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0214087963104248, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": true, "logits_per_token": -1.0214087963104248, "logits_per_char": -0.5107043981552124, "num_chars": 2}, {"sum_logits": -1.1251620054244995, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -1.1251620054244995, "logits_per_char": -0.5625810027122498, "num_chars": 2}, {"sum_logits": -2.0019028186798096, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -2.0019028186798096, "logits_per_char": -1.0009514093399048, "num_chars": 2}, {"sum_logits": -1.7631926536560059, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -1.7631926536560059, "logits_per_char": -0.8815963268280029, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 482, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0217740535736084, "incorrect_loss_raw": 1.6368340651194255, "correct_loss_per_char": 0.5108870267868042, "incorrect_loss_per_char": 0.8184170325597128, "correct_loss_per_token": 1.0217740535736084, "incorrect_loss_per_token": 1.6368340651194255, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0217740535736084, "num_tokens": 1, "num_tokens_all": 1114, "is_greedy": true, "logits_per_token": -1.0217740535736084, "logits_per_char": -0.5108870267868042, "num_chars": 2}, {"sum_logits": -1.1163444519042969, "num_tokens": 1, "num_tokens_all": 1114, "is_greedy": false, "logits_per_token": -1.1163444519042969, "logits_per_char": -0.5581722259521484, "num_chars": 2}, {"sum_logits": -2.0696563720703125, "num_tokens": 1, "num_tokens_all": 1114, "is_greedy": false, "logits_per_token": -2.0696563720703125, "logits_per_char": -1.0348281860351562, "num_chars": 2}, {"sum_logits": -1.724501371383667, "num_tokens": 1, "num_tokens_all": 1114, "is_greedy": false, "logits_per_token": -1.724501371383667, "logits_per_char": -0.8622506856918335, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 483, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.89594304561615, "incorrect_loss_raw": 1.3204580942789714, "correct_loss_per_char": 0.947971522808075, "incorrect_loss_per_char": 0.6602290471394857, "correct_loss_per_token": 1.89594304561615, "incorrect_loss_per_token": 1.3204580942789714, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0347405672073364, "num_tokens": 1, "num_tokens_all": 1026, "is_greedy": true, "logits_per_token": -1.0347405672073364, "logits_per_char": -0.5173702836036682, "num_chars": 2}, {"sum_logits": -1.1597514152526855, "num_tokens": 1, "num_tokens_all": 1026, "is_greedy": false, "logits_per_token": -1.1597514152526855, "logits_per_char": -0.5798757076263428, "num_chars": 2}, {"sum_logits": -1.89594304561615, "num_tokens": 1, "num_tokens_all": 1026, "is_greedy": false, "logits_per_token": -1.89594304561615, "logits_per_char": -0.947971522808075, "num_chars": 2}, {"sum_logits": -1.766882300376892, "num_tokens": 1, "num_tokens_all": 1026, "is_greedy": false, "logits_per_token": -1.766882300376892, "logits_per_char": -0.883441150188446, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 484, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0888928174972534, "incorrect_loss_raw": 1.63959797223409, "correct_loss_per_char": 0.5444464087486267, "incorrect_loss_per_char": 0.819798986117045, "correct_loss_per_token": 1.0888928174972534, "incorrect_loss_per_token": 1.63959797223409, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0888928174972534, "num_tokens": 1, "num_tokens_all": 1167, "is_greedy": false, "logits_per_token": -1.0888928174972534, "logits_per_char": -0.5444464087486267, "num_chars": 2}, {"sum_logits": -0.9742786884307861, "num_tokens": 1, "num_tokens_all": 1167, "is_greedy": true, "logits_per_token": -0.9742786884307861, "logits_per_char": -0.48713934421539307, "num_chars": 2}, {"sum_logits": -1.9680507183074951, "num_tokens": 1, "num_tokens_all": 1167, "is_greedy": false, "logits_per_token": -1.9680507183074951, "logits_per_char": -0.9840253591537476, "num_chars": 2}, {"sum_logits": -1.9764645099639893, "num_tokens": 1, "num_tokens_all": 1167, "is_greedy": false, "logits_per_token": -1.9764645099639893, "logits_per_char": -0.9882322549819946, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 485, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2207456827163696, "incorrect_loss_raw": 1.5074464480082195, "correct_loss_per_char": 0.6103728413581848, "incorrect_loss_per_char": 0.7537232240041097, "correct_loss_per_token": 1.2207456827163696, "incorrect_loss_per_token": 1.5074464480082195, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1110529899597168, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": true, "logits_per_token": -1.1110529899597168, "logits_per_char": -0.5555264949798584, "num_chars": 2}, {"sum_logits": -1.2207456827163696, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": false, "logits_per_token": -1.2207456827163696, "logits_per_char": -0.6103728413581848, "num_chars": 2}, {"sum_logits": -1.825782299041748, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": false, "logits_per_token": -1.825782299041748, "logits_per_char": -0.912891149520874, "num_chars": 2}, {"sum_logits": -1.5855040550231934, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": false, "logits_per_token": -1.5855040550231934, "logits_per_char": -0.7927520275115967, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 486, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1797027587890625, "incorrect_loss_raw": 1.5294268131256104, "correct_loss_per_char": 0.5898513793945312, "incorrect_loss_per_char": 0.7647134065628052, "correct_loss_per_token": 1.1797027587890625, "incorrect_loss_per_token": 1.5294268131256104, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.097564935684204, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": true, "logits_per_token": -1.097564935684204, "logits_per_char": -0.548782467842102, "num_chars": 2}, {"sum_logits": -1.1797027587890625, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": false, "logits_per_token": -1.1797027587890625, "logits_per_char": -0.5898513793945312, "num_chars": 2}, {"sum_logits": -1.7823173999786377, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": false, "logits_per_token": -1.7823173999786377, "logits_per_char": -0.8911586999893188, "num_chars": 2}, {"sum_logits": -1.7083981037139893, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": false, "logits_per_token": -1.7083981037139893, "logits_per_char": -0.8541990518569946, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 487, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8454569578170776, "incorrect_loss_raw": 1.313089370727539, "correct_loss_per_char": 0.9227284789085388, "incorrect_loss_per_char": 0.6565446853637695, "correct_loss_per_token": 1.8454569578170776, "incorrect_loss_per_token": 1.313089370727539, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0632383823394775, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": true, "logits_per_token": -1.0632383823394775, "logits_per_char": -0.5316191911697388, "num_chars": 2}, {"sum_logits": -1.2040950059890747, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": false, "logits_per_token": -1.2040950059890747, "logits_per_char": -0.6020475029945374, "num_chars": 2}, {"sum_logits": -1.8454569578170776, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": false, "logits_per_token": -1.8454569578170776, "logits_per_char": -0.9227284789085388, "num_chars": 2}, {"sum_logits": -1.671934723854065, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": false, "logits_per_token": -1.671934723854065, "logits_per_char": -0.8359673619270325, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 488, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4445736408233643, "incorrect_loss_raw": 1.4041107892990112, "correct_loss_per_char": 0.7222868204116821, "incorrect_loss_per_char": 0.7020553946495056, "correct_loss_per_token": 1.4445736408233643, "incorrect_loss_per_token": 1.4041107892990112, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1752231121063232, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": true, "logits_per_token": -1.1752231121063232, "logits_per_char": -0.5876115560531616, "num_chars": 2}, {"sum_logits": -1.4445736408233643, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.4445736408233643, "logits_per_char": -0.7222868204116821, "num_chars": 2}, {"sum_logits": -1.731317162513733, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.731317162513733, "logits_per_char": -0.8656585812568665, "num_chars": 2}, {"sum_logits": -1.3057920932769775, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.3057920932769775, "logits_per_char": -0.6528960466384888, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 489, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7717745304107666, "incorrect_loss_raw": 1.302835742632548, "correct_loss_per_char": 0.8858872652053833, "incorrect_loss_per_char": 0.651417871316274, "correct_loss_per_token": 1.7717745304107666, "incorrect_loss_per_token": 1.302835742632548, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2246500253677368, "num_tokens": 1, "num_tokens_all": 985, "is_greedy": false, "logits_per_token": -1.2246500253677368, "logits_per_char": -0.6123250126838684, "num_chars": 2}, {"sum_logits": -1.1831302642822266, "num_tokens": 1, "num_tokens_all": 985, "is_greedy": true, "logits_per_token": -1.1831302642822266, "logits_per_char": -0.5915651321411133, "num_chars": 2}, {"sum_logits": -1.7717745304107666, "num_tokens": 1, "num_tokens_all": 985, "is_greedy": false, "logits_per_token": -1.7717745304107666, "logits_per_char": -0.8858872652053833, "num_chars": 2}, {"sum_logits": -1.5007269382476807, "num_tokens": 1, "num_tokens_all": 985, "is_greedy": false, "logits_per_token": -1.5007269382476807, "logits_per_char": -0.7503634691238403, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 490, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.648637294769287, "incorrect_loss_raw": 1.3675359884897869, "correct_loss_per_char": 0.8243186473846436, "incorrect_loss_per_char": 0.6837679942448934, "correct_loss_per_token": 1.648637294769287, "incorrect_loss_per_token": 1.3675359884897869, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1089439392089844, "num_tokens": 1, "num_tokens_all": 1048, "is_greedy": true, "logits_per_token": -1.1089439392089844, "logits_per_char": -0.5544719696044922, "num_chars": 2}, {"sum_logits": -1.1932227611541748, "num_tokens": 1, "num_tokens_all": 1048, "is_greedy": false, "logits_per_token": -1.1932227611541748, "logits_per_char": -0.5966113805770874, "num_chars": 2}, {"sum_logits": -1.8004412651062012, "num_tokens": 1, "num_tokens_all": 1048, "is_greedy": false, "logits_per_token": -1.8004412651062012, "logits_per_char": -0.9002206325531006, "num_chars": 2}, {"sum_logits": -1.648637294769287, "num_tokens": 1, "num_tokens_all": 1048, "is_greedy": false, "logits_per_token": -1.648637294769287, "logits_per_char": -0.8243186473846436, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 491, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3722373247146606, "incorrect_loss_raw": 1.435847004254659, "correct_loss_per_char": 0.6861186623573303, "incorrect_loss_per_char": 0.7179235021273295, "correct_loss_per_token": 1.3722373247146606, "incorrect_loss_per_token": 1.435847004254659, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0926382541656494, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": true, "logits_per_token": -1.0926382541656494, "logits_per_char": -0.5463191270828247, "num_chars": 2}, {"sum_logits": -1.3722373247146606, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.3722373247146606, "logits_per_char": -0.6861186623573303, "num_chars": 2}, {"sum_logits": -1.6613514423370361, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.6613514423370361, "logits_per_char": -0.8306757211685181, "num_chars": 2}, {"sum_logits": -1.5535513162612915, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.5535513162612915, "logits_per_char": -0.7767756581306458, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 492, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0215041637420654, "incorrect_loss_raw": 1.5627720355987549, "correct_loss_per_char": 0.5107520818710327, "incorrect_loss_per_char": 0.7813860177993774, "correct_loss_per_token": 1.0215041637420654, "incorrect_loss_per_token": 1.5627720355987549, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0215041637420654, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": true, "logits_per_token": -1.0215041637420654, "logits_per_char": -0.5107520818710327, "num_chars": 2}, {"sum_logits": -1.462325096130371, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -1.462325096130371, "logits_per_char": -0.7311625480651855, "num_chars": 2}, {"sum_logits": -1.6269304752349854, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -1.6269304752349854, "logits_per_char": -0.8134652376174927, "num_chars": 2}, {"sum_logits": -1.5990605354309082, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -1.5990605354309082, "logits_per_char": -0.7995302677154541, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 493, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6865379810333252, "incorrect_loss_raw": 1.3825652599334717, "correct_loss_per_char": 0.8432689905166626, "incorrect_loss_per_char": 0.6912826299667358, "correct_loss_per_token": 1.6865379810333252, "incorrect_loss_per_token": 1.3825652599334717, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0111424922943115, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": true, "logits_per_token": -1.0111424922943115, "logits_per_char": -0.5055712461471558, "num_chars": 2}, {"sum_logits": -1.221491813659668, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.221491813659668, "logits_per_char": -0.610745906829834, "num_chars": 2}, {"sum_logits": -1.9150614738464355, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.9150614738464355, "logits_per_char": -0.9575307369232178, "num_chars": 2}, {"sum_logits": -1.6865379810333252, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.6865379810333252, "logits_per_char": -0.8432689905166626, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 494, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6515377759933472, "incorrect_loss_raw": 1.3761501709620159, "correct_loss_per_char": 0.8257688879966736, "incorrect_loss_per_char": 0.6880750854810079, "correct_loss_per_token": 1.6515377759933472, "incorrect_loss_per_token": 1.3761501709620159, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0836482048034668, "num_tokens": 1, "num_tokens_all": 1056, "is_greedy": true, "logits_per_token": -1.0836482048034668, "logits_per_char": -0.5418241024017334, "num_chars": 2}, {"sum_logits": -1.186592698097229, "num_tokens": 1, "num_tokens_all": 1056, "is_greedy": false, "logits_per_token": -1.186592698097229, "logits_per_char": -0.5932963490486145, "num_chars": 2}, {"sum_logits": -1.8582096099853516, "num_tokens": 1, "num_tokens_all": 1056, "is_greedy": false, "logits_per_token": -1.8582096099853516, "logits_per_char": -0.9291048049926758, "num_chars": 2}, {"sum_logits": -1.6515377759933472, "num_tokens": 1, "num_tokens_all": 1056, "is_greedy": false, "logits_per_token": -1.6515377759933472, "logits_per_char": -0.8257688879966736, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 495, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.799445629119873, "incorrect_loss_raw": 1.3102773030598958, "correct_loss_per_char": 0.8997228145599365, "incorrect_loss_per_char": 0.6551386515299479, "correct_loss_per_token": 1.799445629119873, "incorrect_loss_per_token": 1.3102773030598958, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0920464992523193, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": true, "logits_per_token": -1.0920464992523193, "logits_per_char": -0.5460232496261597, "num_chars": 2}, {"sum_logits": -1.2646160125732422, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.2646160125732422, "logits_per_char": -0.6323080062866211, "num_chars": 2}, {"sum_logits": -1.799445629119873, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.799445629119873, "logits_per_char": -0.8997228145599365, "num_chars": 2}, {"sum_logits": -1.574169397354126, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.574169397354126, "logits_per_char": -0.787084698677063, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 496, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.326094150543213, "incorrect_loss_raw": 1.4739963610967, "correct_loss_per_char": 0.6630470752716064, "incorrect_loss_per_char": 0.73699818054835, "correct_loss_per_token": 1.326094150543213, "incorrect_loss_per_token": 1.4739963610967, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0275399684906006, "num_tokens": 1, "num_tokens_all": 1063, "is_greedy": true, "logits_per_token": -1.0275399684906006, "logits_per_char": -0.5137699842453003, "num_chars": 2}, {"sum_logits": -1.326094150543213, "num_tokens": 1, "num_tokens_all": 1063, "is_greedy": false, "logits_per_token": -1.326094150543213, "logits_per_char": -0.6630470752716064, "num_chars": 2}, {"sum_logits": -1.7743208408355713, "num_tokens": 1, "num_tokens_all": 1063, "is_greedy": false, "logits_per_token": -1.7743208408355713, "logits_per_char": -0.8871604204177856, "num_chars": 2}, {"sum_logits": -1.6201282739639282, "num_tokens": 1, "num_tokens_all": 1063, "is_greedy": false, "logits_per_token": -1.6201282739639282, "logits_per_char": -0.8100641369819641, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 497, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9494608640670776, "incorrect_loss_raw": 1.4187588294347127, "correct_loss_per_char": 0.9747304320335388, "incorrect_loss_per_char": 0.7093794147173563, "correct_loss_per_token": 1.9494608640670776, "incorrect_loss_per_token": 1.4187588294347127, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8506914377212524, "num_tokens": 1, "num_tokens_all": 1137, "is_greedy": true, "logits_per_token": -0.8506914377212524, "logits_per_char": -0.4253457188606262, "num_chars": 2}, {"sum_logits": -1.1640160083770752, "num_tokens": 1, "num_tokens_all": 1137, "is_greedy": false, "logits_per_token": -1.1640160083770752, "logits_per_char": -0.5820080041885376, "num_chars": 2}, {"sum_logits": -2.2415690422058105, "num_tokens": 1, "num_tokens_all": 1137, "is_greedy": false, "logits_per_token": -2.2415690422058105, "logits_per_char": -1.1207845211029053, "num_chars": 2}, {"sum_logits": -1.9494608640670776, "num_tokens": 1, "num_tokens_all": 1137, "is_greedy": false, "logits_per_token": -1.9494608640670776, "logits_per_char": -0.9747304320335388, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 498, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2418365478515625, "incorrect_loss_raw": 1.5100726286570232, "correct_loss_per_char": 0.6209182739257812, "incorrect_loss_per_char": 0.7550363143285116, "correct_loss_per_token": 1.2418365478515625, "incorrect_loss_per_token": 1.5100726286570232, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0508780479431152, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": true, "logits_per_token": -1.0508780479431152, "logits_per_char": -0.5254390239715576, "num_chars": 2}, {"sum_logits": -1.2418365478515625, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": false, "logits_per_token": -1.2418365478515625, "logits_per_char": -0.6209182739257812, "num_chars": 2}, {"sum_logits": -1.8100075721740723, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": false, "logits_per_token": -1.8100075721740723, "logits_per_char": -0.9050037860870361, "num_chars": 2}, {"sum_logits": -1.6693322658538818, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": false, "logits_per_token": -1.6693322658538818, "logits_per_char": -0.8346661329269409, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 499, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.205081820487976, "incorrect_loss_raw": 1.4901415904362996, "correct_loss_per_char": 0.602540910243988, "incorrect_loss_per_char": 0.7450707952181498, "correct_loss_per_token": 1.205081820487976, "incorrect_loss_per_token": 1.4901415904362996, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.205081820487976, "num_tokens": 1, "num_tokens_all": 1031, "is_greedy": true, "logits_per_token": -1.205081820487976, "logits_per_char": -0.602540910243988, "num_chars": 2}, {"sum_logits": -1.226643443107605, "num_tokens": 1, "num_tokens_all": 1031, "is_greedy": false, "logits_per_token": -1.226643443107605, "logits_per_char": -0.6133217215538025, "num_chars": 2}, {"sum_logits": -1.7818818092346191, "num_tokens": 1, "num_tokens_all": 1031, "is_greedy": false, "logits_per_token": -1.7818818092346191, "logits_per_char": -0.8909409046173096, "num_chars": 2}, {"sum_logits": -1.4618995189666748, "num_tokens": 1, "num_tokens_all": 1031, "is_greedy": false, "logits_per_token": -1.4618995189666748, "logits_per_char": -0.7309497594833374, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 500, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5747275352478027, "incorrect_loss_raw": 1.3731240431467693, "correct_loss_per_char": 0.7873637676239014, "incorrect_loss_per_char": 0.6865620215733846, "correct_loss_per_token": 1.5747275352478027, "incorrect_loss_per_token": 1.3731240431467693, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1492952108383179, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": true, "logits_per_token": -1.1492952108383179, "logits_per_char": -0.5746476054191589, "num_chars": 2}, {"sum_logits": -1.2299169301986694, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -1.2299169301986694, "logits_per_char": -0.6149584650993347, "num_chars": 2}, {"sum_logits": -1.7401599884033203, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -1.7401599884033203, "logits_per_char": -0.8700799942016602, "num_chars": 2}, {"sum_logits": -1.5747275352478027, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -1.5747275352478027, "logits_per_char": -0.7873637676239014, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 501, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.181457757949829, "incorrect_loss_raw": 1.5020888249079387, "correct_loss_per_char": 0.5907288789749146, "incorrect_loss_per_char": 0.7510444124539694, "correct_loss_per_token": 1.181457757949829, "incorrect_loss_per_token": 1.5020888249079387, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.181457757949829, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": true, "logits_per_token": -1.181457757949829, "logits_per_char": -0.5907288789749146, "num_chars": 2}, {"sum_logits": -1.2109332084655762, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": false, "logits_per_token": -1.2109332084655762, "logits_per_char": -0.6054666042327881, "num_chars": 2}, {"sum_logits": -1.7641297578811646, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": false, "logits_per_token": -1.7641297578811646, "logits_per_char": -0.8820648789405823, "num_chars": 2}, {"sum_logits": -1.5312035083770752, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": false, "logits_per_token": -1.5312035083770752, "logits_per_char": -0.7656017541885376, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 502, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1898987293243408, "incorrect_loss_raw": 1.508166988690694, "correct_loss_per_char": 0.5949493646621704, "incorrect_loss_per_char": 0.754083494345347, "correct_loss_per_token": 1.1898987293243408, "incorrect_loss_per_token": 1.508166988690694, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1582804918289185, "num_tokens": 1, "num_tokens_all": 1051, "is_greedy": true, "logits_per_token": -1.1582804918289185, "logits_per_char": -0.5791402459144592, "num_chars": 2}, {"sum_logits": -1.1898987293243408, "num_tokens": 1, "num_tokens_all": 1051, "is_greedy": false, "logits_per_token": -1.1898987293243408, "logits_per_char": -0.5949493646621704, "num_chars": 2}, {"sum_logits": -1.8064409494400024, "num_tokens": 1, "num_tokens_all": 1051, "is_greedy": false, "logits_per_token": -1.8064409494400024, "logits_per_char": -0.9032204747200012, "num_chars": 2}, {"sum_logits": -1.5597795248031616, "num_tokens": 1, "num_tokens_all": 1051, "is_greedy": false, "logits_per_token": -1.5597795248031616, "logits_per_char": -0.7798897624015808, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 503, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3852319717407227, "incorrect_loss_raw": 1.4249412218729656, "correct_loss_per_char": 0.6926159858703613, "incorrect_loss_per_char": 0.7124706109364828, "correct_loss_per_token": 1.3852319717407227, "incorrect_loss_per_token": 1.4249412218729656, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.155346155166626, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": true, "logits_per_token": -1.155346155166626, "logits_per_char": -0.577673077583313, "num_chars": 2}, {"sum_logits": -1.3852319717407227, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": false, "logits_per_token": -1.3852319717407227, "logits_per_char": -0.6926159858703613, "num_chars": 2}, {"sum_logits": -1.7512613534927368, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": false, "logits_per_token": -1.7512613534927368, "logits_per_char": -0.8756306767463684, "num_chars": 2}, {"sum_logits": -1.3682161569595337, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": false, "logits_per_token": -1.3682161569595337, "logits_per_char": -0.6841080784797668, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 504, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5248510837554932, "incorrect_loss_raw": 1.376441240310669, "correct_loss_per_char": 0.7624255418777466, "incorrect_loss_per_char": 0.6882206201553345, "correct_loss_per_token": 1.5248510837554932, "incorrect_loss_per_token": 1.376441240310669, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1186622381210327, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -1.1186622381210327, "logits_per_char": -0.5593311190605164, "num_chars": 2}, {"sum_logits": -1.3688442707061768, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.3688442707061768, "logits_per_char": -0.6844221353530884, "num_chars": 2}, {"sum_logits": -1.6418172121047974, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.6418172121047974, "logits_per_char": -0.8209086060523987, "num_chars": 2}, {"sum_logits": -1.5248510837554932, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.5248510837554932, "logits_per_char": -0.7624255418777466, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 505, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.095742106437683, "incorrect_loss_raw": 1.5520412921905518, "correct_loss_per_char": 0.5478710532188416, "incorrect_loss_per_char": 0.7760206460952759, "correct_loss_per_token": 1.095742106437683, "incorrect_loss_per_token": 1.5520412921905518, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.095742106437683, "num_tokens": 1, "num_tokens_all": 1062, "is_greedy": true, "logits_per_token": -1.095742106437683, "logits_per_char": -0.5478710532188416, "num_chars": 2}, {"sum_logits": -1.2230775356292725, "num_tokens": 1, "num_tokens_all": 1062, "is_greedy": false, "logits_per_token": -1.2230775356292725, "logits_per_char": -0.6115387678146362, "num_chars": 2}, {"sum_logits": -1.849649429321289, "num_tokens": 1, "num_tokens_all": 1062, "is_greedy": false, "logits_per_token": -1.849649429321289, "logits_per_char": -0.9248247146606445, "num_chars": 2}, {"sum_logits": -1.5833969116210938, "num_tokens": 1, "num_tokens_all": 1062, "is_greedy": false, "logits_per_token": -1.5833969116210938, "logits_per_char": -0.7916984558105469, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 506, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1685987710952759, "incorrect_loss_raw": 1.56804092725118, "correct_loss_per_char": 0.5842993855476379, "incorrect_loss_per_char": 0.78402046362559, "correct_loss_per_token": 1.1685987710952759, "incorrect_loss_per_token": 1.56804092725118, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0250730514526367, "num_tokens": 1, "num_tokens_all": 1111, "is_greedy": true, "logits_per_token": -1.0250730514526367, "logits_per_char": -0.5125365257263184, "num_chars": 2}, {"sum_logits": -1.1685987710952759, "num_tokens": 1, "num_tokens_all": 1111, "is_greedy": false, "logits_per_token": -1.1685987710952759, "logits_per_char": -0.5842993855476379, "num_chars": 2}, {"sum_logits": -1.9654971361160278, "num_tokens": 1, "num_tokens_all": 1111, "is_greedy": false, "logits_per_token": -1.9654971361160278, "logits_per_char": -0.9827485680580139, "num_chars": 2}, {"sum_logits": -1.7135525941848755, "num_tokens": 1, "num_tokens_all": 1111, "is_greedy": false, "logits_per_token": -1.7135525941848755, "logits_per_char": -0.8567762970924377, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 507, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8879588842391968, "incorrect_loss_raw": 1.3037211100260417, "correct_loss_per_char": 0.9439794421195984, "incorrect_loss_per_char": 0.6518605550130209, "correct_loss_per_token": 1.8879588842391968, "incorrect_loss_per_token": 1.3037211100260417, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0710924863815308, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": true, "logits_per_token": -1.0710924863815308, "logits_per_char": -0.5355462431907654, "num_chars": 2}, {"sum_logits": -1.2169265747070312, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": false, "logits_per_token": -1.2169265747070312, "logits_per_char": -0.6084632873535156, "num_chars": 2}, {"sum_logits": -1.8879588842391968, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": false, "logits_per_token": -1.8879588842391968, "logits_per_char": -0.9439794421195984, "num_chars": 2}, {"sum_logits": -1.623144268989563, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": false, "logits_per_token": -1.623144268989563, "logits_per_char": -0.8115721344947815, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 508, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.082472324371338, "incorrect_loss_raw": 1.2714087963104248, "correct_loss_per_char": 1.041236162185669, "incorrect_loss_per_char": 0.6357043981552124, "correct_loss_per_token": 2.082472324371338, "incorrect_loss_per_token": 1.2714087963104248, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.117423176765442, "num_tokens": 1, "num_tokens_all": 1112, "is_greedy": false, "logits_per_token": -1.117423176765442, "logits_per_char": -0.558711588382721, "num_chars": 2}, {"sum_logits": -1.083656907081604, "num_tokens": 1, "num_tokens_all": 1112, "is_greedy": true, "logits_per_token": -1.083656907081604, "logits_per_char": -0.541828453540802, "num_chars": 2}, {"sum_logits": -2.082472324371338, "num_tokens": 1, "num_tokens_all": 1112, "is_greedy": false, "logits_per_token": -2.082472324371338, "logits_per_char": -1.041236162185669, "num_chars": 2}, {"sum_logits": -1.6131463050842285, "num_tokens": 1, "num_tokens_all": 1112, "is_greedy": false, "logits_per_token": -1.6131463050842285, "logits_per_char": -0.8065731525421143, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 509, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9861894845962524, "incorrect_loss_raw": 1.27728267510732, "correct_loss_per_char": 0.9930947422981262, "incorrect_loss_per_char": 0.63864133755366, "correct_loss_per_token": 1.9861894845962524, "incorrect_loss_per_token": 1.27728267510732, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1534560918807983, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": false, "logits_per_token": -1.1534560918807983, "logits_per_char": -0.5767280459403992, "num_chars": 2}, {"sum_logits": -1.095729112625122, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": true, "logits_per_token": -1.095729112625122, "logits_per_char": -0.547864556312561, "num_chars": 2}, {"sum_logits": -1.9861894845962524, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": false, "logits_per_token": -1.9861894845962524, "logits_per_char": -0.9930947422981262, "num_chars": 2}, {"sum_logits": -1.58266282081604, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": false, "logits_per_token": -1.58266282081604, "logits_per_char": -0.79133141040802, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 510, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.10532546043396, "incorrect_loss_raw": 1.5694592396418254, "correct_loss_per_char": 0.55266273021698, "incorrect_loss_per_char": 0.7847296198209127, "correct_loss_per_token": 1.10532546043396, "incorrect_loss_per_token": 1.5694592396418254, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.10532546043396, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": true, "logits_per_token": -1.10532546043396, "logits_per_char": -0.55266273021698, "num_chars": 2}, {"sum_logits": -1.1421468257904053, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": false, "logits_per_token": -1.1421468257904053, "logits_per_char": -0.5710734128952026, "num_chars": 2}, {"sum_logits": -1.9540340900421143, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": false, "logits_per_token": -1.9540340900421143, "logits_per_char": -0.9770170450210571, "num_chars": 2}, {"sum_logits": -1.6121968030929565, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": false, "logits_per_token": -1.6121968030929565, "logits_per_char": -0.8060984015464783, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 511, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9858964681625366, "incorrect_loss_raw": 1.2803536653518677, "correct_loss_per_char": 0.9929482340812683, "incorrect_loss_per_char": 0.6401768326759338, "correct_loss_per_token": 1.9858964681625366, "incorrect_loss_per_token": 1.2803536653518677, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0309633016586304, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": true, "logits_per_token": -1.0309633016586304, "logits_per_char": -0.5154816508293152, "num_chars": 2}, {"sum_logits": -1.2372031211853027, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.2372031211853027, "logits_per_char": -0.6186015605926514, "num_chars": 2}, {"sum_logits": -1.9858964681625366, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.9858964681625366, "logits_per_char": -0.9929482340812683, "num_chars": 2}, {"sum_logits": -1.57289457321167, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.57289457321167, "logits_per_char": -0.786447286605835, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 512, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.861449122428894, "incorrect_loss_raw": 1.2986271778742473, "correct_loss_per_char": 0.930724561214447, "incorrect_loss_per_char": 0.6493135889371237, "correct_loss_per_token": 1.861449122428894, "incorrect_loss_per_token": 1.2986271778742473, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0975489616394043, "num_tokens": 1, "num_tokens_all": 1020, "is_greedy": true, "logits_per_token": -1.0975489616394043, "logits_per_char": -0.5487744808197021, "num_chars": 2}, {"sum_logits": -1.1945682764053345, "num_tokens": 1, "num_tokens_all": 1020, "is_greedy": false, "logits_per_token": -1.1945682764053345, "logits_per_char": -0.5972841382026672, "num_chars": 2}, {"sum_logits": -1.861449122428894, "num_tokens": 1, "num_tokens_all": 1020, "is_greedy": false, "logits_per_token": -1.861449122428894, "logits_per_char": -0.930724561214447, "num_chars": 2}, {"sum_logits": -1.603764295578003, "num_tokens": 1, "num_tokens_all": 1020, "is_greedy": false, "logits_per_token": -1.603764295578003, "logits_per_char": -0.8018821477890015, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 513, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1861865520477295, "incorrect_loss_raw": 1.5674537817637126, "correct_loss_per_char": 0.5930932760238647, "incorrect_loss_per_char": 0.7837268908818563, "correct_loss_per_token": 1.1861865520477295, "incorrect_loss_per_token": 1.5674537817637126, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.013728141784668, "num_tokens": 1, "num_tokens_all": 1128, "is_greedy": true, "logits_per_token": -1.013728141784668, "logits_per_char": -0.506864070892334, "num_chars": 2}, {"sum_logits": -1.1861865520477295, "num_tokens": 1, "num_tokens_all": 1128, "is_greedy": false, "logits_per_token": -1.1861865520477295, "logits_per_char": -0.5930932760238647, "num_chars": 2}, {"sum_logits": -2.066831588745117, "num_tokens": 1, "num_tokens_all": 1128, "is_greedy": false, "logits_per_token": -2.066831588745117, "logits_per_char": -1.0334157943725586, "num_chars": 2}, {"sum_logits": -1.6218016147613525, "num_tokens": 1, "num_tokens_all": 1128, "is_greedy": false, "logits_per_token": -1.6218016147613525, "logits_per_char": -0.8109008073806763, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 514, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.206527590751648, "incorrect_loss_raw": 1.5212557315826416, "correct_loss_per_char": 0.603263795375824, "incorrect_loss_per_char": 0.7606278657913208, "correct_loss_per_token": 1.206527590751648, "incorrect_loss_per_token": 1.5212557315826416, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0678577423095703, "num_tokens": 1, "num_tokens_all": 1048, "is_greedy": true, "logits_per_token": -1.0678577423095703, "logits_per_char": -0.5339288711547852, "num_chars": 2}, {"sum_logits": -1.206527590751648, "num_tokens": 1, "num_tokens_all": 1048, "is_greedy": false, "logits_per_token": -1.206527590751648, "logits_per_char": -0.603263795375824, "num_chars": 2}, {"sum_logits": -1.7768785953521729, "num_tokens": 1, "num_tokens_all": 1048, "is_greedy": false, "logits_per_token": -1.7768785953521729, "logits_per_char": -0.8884392976760864, "num_chars": 2}, {"sum_logits": -1.7190308570861816, "num_tokens": 1, "num_tokens_all": 1048, "is_greedy": false, "logits_per_token": -1.7190308570861816, "logits_per_char": -0.8595154285430908, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 515, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5701290369033813, "incorrect_loss_raw": 1.4111970663070679, "correct_loss_per_char": 0.7850645184516907, "incorrect_loss_per_char": 0.7055985331535339, "correct_loss_per_token": 1.5701290369033813, "incorrect_loss_per_token": 1.4111970663070679, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1335172653198242, "num_tokens": 1, "num_tokens_all": 1120, "is_greedy": true, "logits_per_token": -1.1335172653198242, "logits_per_char": -0.5667586326599121, "num_chars": 2}, {"sum_logits": -1.1501240730285645, "num_tokens": 1, "num_tokens_all": 1120, "is_greedy": false, "logits_per_token": -1.1501240730285645, "logits_per_char": -0.5750620365142822, "num_chars": 2}, {"sum_logits": -1.949949860572815, "num_tokens": 1, "num_tokens_all": 1120, "is_greedy": false, "logits_per_token": -1.949949860572815, "logits_per_char": -0.9749749302864075, "num_chars": 2}, {"sum_logits": -1.5701290369033813, "num_tokens": 1, "num_tokens_all": 1120, "is_greedy": false, "logits_per_token": -1.5701290369033813, "logits_per_char": -0.7850645184516907, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 516, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6453497409820557, "incorrect_loss_raw": 1.3263644377390544, "correct_loss_per_char": 0.8226748704910278, "incorrect_loss_per_char": 0.6631822188695272, "correct_loss_per_token": 1.6453497409820557, "incorrect_loss_per_token": 1.3263644377390544, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1998900175094604, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": true, "logits_per_token": -1.1998900175094604, "logits_per_char": -0.5999450087547302, "num_chars": 2}, {"sum_logits": -1.4496634006500244, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.4496634006500244, "logits_per_char": -0.7248317003250122, "num_chars": 2}, {"sum_logits": -1.6453497409820557, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.6453497409820557, "logits_per_char": -0.8226748704910278, "num_chars": 2}, {"sum_logits": -1.3295398950576782, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.3295398950576782, "logits_per_char": -0.6647699475288391, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 517, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2918338775634766, "incorrect_loss_raw": 1.513485272725423, "correct_loss_per_char": 0.6459169387817383, "incorrect_loss_per_char": 0.7567426363627116, "correct_loss_per_token": 1.2918338775634766, "incorrect_loss_per_token": 1.513485272725423, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9775402545928955, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": true, "logits_per_token": -0.9775402545928955, "logits_per_char": -0.48877012729644775, "num_chars": 2}, {"sum_logits": -1.2918338775634766, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": false, "logits_per_token": -1.2918338775634766, "logits_per_char": -0.6459169387817383, "num_chars": 2}, {"sum_logits": -1.757150650024414, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": false, "logits_per_token": -1.757150650024414, "logits_per_char": -0.878575325012207, "num_chars": 2}, {"sum_logits": -1.80576491355896, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": false, "logits_per_token": -1.80576491355896, "logits_per_char": -0.90288245677948, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 518, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1931390762329102, "incorrect_loss_raw": 1.4916906754175823, "correct_loss_per_char": 0.5965695381164551, "incorrect_loss_per_char": 0.7458453377087911, "correct_loss_per_token": 1.1931390762329102, "incorrect_loss_per_token": 1.4916906754175823, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2374107837677002, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": false, "logits_per_token": -1.2374107837677002, "logits_per_char": -0.6187053918838501, "num_chars": 2}, {"sum_logits": -1.1931390762329102, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": true, "logits_per_token": -1.1931390762329102, "logits_per_char": -0.5965695381164551, "num_chars": 2}, {"sum_logits": -1.7205138206481934, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": false, "logits_per_token": -1.7205138206481934, "logits_per_char": -0.8602569103240967, "num_chars": 2}, {"sum_logits": -1.517147421836853, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": false, "logits_per_token": -1.517147421836853, "logits_per_char": -0.7585737109184265, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 519, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1666921377182007, "incorrect_loss_raw": 1.5675781567891438, "correct_loss_per_char": 0.5833460688591003, "incorrect_loss_per_char": 0.7837890783945719, "correct_loss_per_token": 1.1666921377182007, "incorrect_loss_per_token": 1.5675781567891438, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.063216209411621, "num_tokens": 1, "num_tokens_all": 1132, "is_greedy": true, "logits_per_token": -1.063216209411621, "logits_per_char": -0.5316081047058105, "num_chars": 2}, {"sum_logits": -1.1666921377182007, "num_tokens": 1, "num_tokens_all": 1132, "is_greedy": false, "logits_per_token": -1.1666921377182007, "logits_per_char": -0.5833460688591003, "num_chars": 2}, {"sum_logits": -2.0335233211517334, "num_tokens": 1, "num_tokens_all": 1132, "is_greedy": false, "logits_per_token": -2.0335233211517334, "logits_per_char": -1.0167616605758667, "num_chars": 2}, {"sum_logits": -1.6059949398040771, "num_tokens": 1, "num_tokens_all": 1132, "is_greedy": false, "logits_per_token": -1.6059949398040771, "logits_per_char": -0.8029974699020386, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 520, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.019872784614563, "incorrect_loss_raw": 1.5698470671971638, "correct_loss_per_char": 0.5099363923072815, "incorrect_loss_per_char": 0.7849235335985819, "correct_loss_per_token": 1.019872784614563, "incorrect_loss_per_token": 1.5698470671971638, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.019872784614563, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": true, "logits_per_token": -1.019872784614563, "logits_per_char": -0.5099363923072815, "num_chars": 2}, {"sum_logits": -1.3889288902282715, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -1.3889288902282715, "logits_per_char": -0.6944644451141357, "num_chars": 2}, {"sum_logits": -1.7683781385421753, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -1.7683781385421753, "logits_per_char": -0.8841890692710876, "num_chars": 2}, {"sum_logits": -1.552234172821045, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -1.552234172821045, "logits_per_char": -0.7761170864105225, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 521, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4790595769882202, "incorrect_loss_raw": 1.3893896341323853, "correct_loss_per_char": 0.7395297884941101, "incorrect_loss_per_char": 0.6946948170661926, "correct_loss_per_token": 1.4790595769882202, "incorrect_loss_per_token": 1.3893896341323853, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1389285326004028, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -1.1389285326004028, "logits_per_char": -0.5694642663002014, "num_chars": 2}, {"sum_logits": -1.3627842664718628, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.3627842664718628, "logits_per_char": -0.6813921332359314, "num_chars": 2}, {"sum_logits": -1.6664561033248901, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.6664561033248901, "logits_per_char": -0.8332280516624451, "num_chars": 2}, {"sum_logits": -1.4790595769882202, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.4790595769882202, "logits_per_char": -0.7395297884941101, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 522, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4246433973312378, "incorrect_loss_raw": 1.4202558199564617, "correct_loss_per_char": 0.7123216986656189, "incorrect_loss_per_char": 0.7101279099782308, "correct_loss_per_token": 1.4246433973312378, "incorrect_loss_per_token": 1.4202558199564617, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0982428789138794, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": true, "logits_per_token": -1.0982428789138794, "logits_per_char": -0.5491214394569397, "num_chars": 2}, {"sum_logits": -1.4246433973312378, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -1.4246433973312378, "logits_per_char": -0.7123216986656189, "num_chars": 2}, {"sum_logits": -1.7538982629776, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -1.7538982629776, "logits_per_char": -0.8769491314888, "num_chars": 2}, {"sum_logits": -1.4086263179779053, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -1.4086263179779053, "logits_per_char": -0.7043131589889526, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 523, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1914957761764526, "incorrect_loss_raw": 1.5545745293299358, "correct_loss_per_char": 0.5957478880882263, "incorrect_loss_per_char": 0.7772872646649679, "correct_loss_per_token": 1.1914957761764526, "incorrect_loss_per_token": 1.5545745293299358, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.043504238128662, "num_tokens": 1, "num_tokens_all": 1126, "is_greedy": true, "logits_per_token": -1.043504238128662, "logits_per_char": -0.521752119064331, "num_chars": 2}, {"sum_logits": -1.1914957761764526, "num_tokens": 1, "num_tokens_all": 1126, "is_greedy": false, "logits_per_token": -1.1914957761764526, "logits_per_char": -0.5957478880882263, "num_chars": 2}, {"sum_logits": -2.0402493476867676, "num_tokens": 1, "num_tokens_all": 1126, "is_greedy": false, "logits_per_token": -2.0402493476867676, "logits_per_char": -1.0201246738433838, "num_chars": 2}, {"sum_logits": -1.5799700021743774, "num_tokens": 1, "num_tokens_all": 1126, "is_greedy": false, "logits_per_token": -1.5799700021743774, "logits_per_char": -0.7899850010871887, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 524, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.206419825553894, "incorrect_loss_raw": 1.505806803703308, "correct_loss_per_char": 0.603209912776947, "incorrect_loss_per_char": 0.752903401851654, "correct_loss_per_token": 1.206419825553894, "incorrect_loss_per_token": 1.505806803703308, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1322996616363525, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": true, "logits_per_token": -1.1322996616363525, "logits_per_char": -0.5661498308181763, "num_chars": 2}, {"sum_logits": -1.206419825553894, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.206419825553894, "logits_per_char": -0.603209912776947, "num_chars": 2}, {"sum_logits": -1.7706657648086548, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.7706657648086548, "logits_per_char": -0.8853328824043274, "num_chars": 2}, {"sum_logits": -1.614454984664917, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.614454984664917, "logits_per_char": -0.8072274923324585, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 525, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4130607843399048, "incorrect_loss_raw": 1.4235819180806477, "correct_loss_per_char": 0.7065303921699524, "incorrect_loss_per_char": 0.7117909590403239, "correct_loss_per_token": 1.4130607843399048, "incorrect_loss_per_token": 1.4235819180806477, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0549767017364502, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": true, "logits_per_token": -1.0549767017364502, "logits_per_char": -0.5274883508682251, "num_chars": 2}, {"sum_logits": -1.4130607843399048, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": false, "logits_per_token": -1.4130607843399048, "logits_per_char": -0.7065303921699524, "num_chars": 2}, {"sum_logits": -1.672620415687561, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": false, "logits_per_token": -1.672620415687561, "logits_per_char": -0.8363102078437805, "num_chars": 2}, {"sum_logits": -1.5431486368179321, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": false, "logits_per_token": -1.5431486368179321, "logits_per_char": -0.7715743184089661, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 526, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2943696975708008, "incorrect_loss_raw": 1.5226975282033284, "correct_loss_per_char": 0.6471848487854004, "incorrect_loss_per_char": 0.7613487641016642, "correct_loss_per_token": 1.2943696975708008, "incorrect_loss_per_token": 1.5226975282033284, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9554260969161987, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": true, "logits_per_token": -0.9554260969161987, "logits_per_char": -0.47771304845809937, "num_chars": 2}, {"sum_logits": -1.2943696975708008, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": false, "logits_per_token": -1.2943696975708008, "logits_per_char": -0.6471848487854004, "num_chars": 2}, {"sum_logits": -1.986834168434143, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": false, "logits_per_token": -1.986834168434143, "logits_per_char": -0.9934170842170715, "num_chars": 2}, {"sum_logits": -1.6258323192596436, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": false, "logits_per_token": -1.6258323192596436, "logits_per_char": -0.8129161596298218, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 527, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7381876707077026, "incorrect_loss_raw": 1.3054072459538777, "correct_loss_per_char": 0.8690938353538513, "incorrect_loss_per_char": 0.6527036229769388, "correct_loss_per_token": 1.7381876707077026, "incorrect_loss_per_token": 1.3054072459538777, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1636290550231934, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": true, "logits_per_token": -1.1636290550231934, "logits_per_char": -0.5818145275115967, "num_chars": 2}, {"sum_logits": -1.3424780368804932, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": false, "logits_per_token": -1.3424780368804932, "logits_per_char": -0.6712390184402466, "num_chars": 2}, {"sum_logits": -1.7381876707077026, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": false, "logits_per_token": -1.7381876707077026, "logits_per_char": -0.8690938353538513, "num_chars": 2}, {"sum_logits": -1.4101146459579468, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": false, "logits_per_token": -1.4101146459579468, "logits_per_char": -0.7050573229789734, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 528, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3048889636993408, "incorrect_loss_raw": 1.5104804436365764, "correct_loss_per_char": 0.6524444818496704, "incorrect_loss_per_char": 0.7552402218182882, "correct_loss_per_token": 1.3048889636993408, "incorrect_loss_per_token": 1.5104804436365764, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9880013465881348, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": true, "logits_per_token": -0.9880013465881348, "logits_per_char": -0.4940006732940674, "num_chars": 2}, {"sum_logits": -1.3048889636993408, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": false, "logits_per_token": -1.3048889636993408, "logits_per_char": -0.6524444818496704, "num_chars": 2}, {"sum_logits": -1.9652596712112427, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": false, "logits_per_token": -1.9652596712112427, "logits_per_char": -0.9826298356056213, "num_chars": 2}, {"sum_logits": -1.5781803131103516, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": false, "logits_per_token": -1.5781803131103516, "logits_per_char": -0.7890901565551758, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 529, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1568092107772827, "incorrect_loss_raw": 1.4976564248402913, "correct_loss_per_char": 0.5784046053886414, "incorrect_loss_per_char": 0.7488282124201456, "correct_loss_per_token": 1.1568092107772827, "incorrect_loss_per_token": 1.4976564248402913, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1568092107772827, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": true, "logits_per_token": -1.1568092107772827, "logits_per_char": -0.5784046053886414, "num_chars": 2}, {"sum_logits": -1.2911036014556885, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": false, "logits_per_token": -1.2911036014556885, "logits_per_char": -0.6455518007278442, "num_chars": 2}, {"sum_logits": -1.6382442712783813, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": false, "logits_per_token": -1.6382442712783813, "logits_per_char": -0.8191221356391907, "num_chars": 2}, {"sum_logits": -1.5636214017868042, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": false, "logits_per_token": -1.5636214017868042, "logits_per_char": -0.7818107008934021, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 530, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1291890144348145, "incorrect_loss_raw": 1.566121021906535, "correct_loss_per_char": 0.5645945072174072, "incorrect_loss_per_char": 0.7830605109532675, "correct_loss_per_token": 1.1291890144348145, "incorrect_loss_per_token": 1.566121021906535, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1291890144348145, "num_tokens": 1, "num_tokens_all": 1120, "is_greedy": false, "logits_per_token": -1.1291890144348145, "logits_per_char": -0.5645945072174072, "num_chars": 2}, {"sum_logits": -1.1268510818481445, "num_tokens": 1, "num_tokens_all": 1120, "is_greedy": true, "logits_per_token": -1.1268510818481445, "logits_per_char": -0.5634255409240723, "num_chars": 2}, {"sum_logits": -1.9921929836273193, "num_tokens": 1, "num_tokens_all": 1120, "is_greedy": false, "logits_per_token": -1.9921929836273193, "logits_per_char": -0.9960964918136597, "num_chars": 2}, {"sum_logits": -1.5793190002441406, "num_tokens": 1, "num_tokens_all": 1120, "is_greedy": false, "logits_per_token": -1.5793190002441406, "logits_per_char": -0.7896595001220703, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 531, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1960326433181763, "incorrect_loss_raw": 1.5230494737625122, "correct_loss_per_char": 0.5980163216590881, "incorrect_loss_per_char": 0.7615247368812561, "correct_loss_per_token": 1.1960326433181763, "incorrect_loss_per_token": 1.5230494737625122, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1144294738769531, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": true, "logits_per_token": -1.1144294738769531, "logits_per_char": -0.5572147369384766, "num_chars": 2}, {"sum_logits": -1.1960326433181763, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": false, "logits_per_token": -1.1960326433181763, "logits_per_char": -0.5980163216590881, "num_chars": 2}, {"sum_logits": -1.9060654640197754, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": false, "logits_per_token": -1.9060654640197754, "logits_per_char": -0.9530327320098877, "num_chars": 2}, {"sum_logits": -1.548653483390808, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": false, "logits_per_token": -1.548653483390808, "logits_per_char": -0.774326741695404, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 532, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.159542202949524, "incorrect_loss_raw": 1.5390653610229492, "correct_loss_per_char": 0.579771101474762, "incorrect_loss_per_char": 0.7695326805114746, "correct_loss_per_token": 1.159542202949524, "incorrect_loss_per_token": 1.5390653610229492, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.159542202949524, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.159542202949524, "logits_per_char": -0.579771101474762, "num_chars": 2}, {"sum_logits": -1.1226999759674072, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": true, "logits_per_token": -1.1226999759674072, "logits_per_char": -0.5613499879837036, "num_chars": 2}, {"sum_logits": -1.9092015027999878, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.9092015027999878, "logits_per_char": -0.9546007513999939, "num_chars": 2}, {"sum_logits": -1.5852946043014526, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.5852946043014526, "logits_per_char": -0.7926473021507263, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 533, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9125487804412842, "incorrect_loss_raw": 1.2843972047170003, "correct_loss_per_char": 0.9562743902206421, "incorrect_loss_per_char": 0.6421986023585001, "correct_loss_per_token": 1.9125487804412842, "incorrect_loss_per_token": 1.2843972047170003, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1158339977264404, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": true, "logits_per_token": -1.1158339977264404, "logits_per_char": -0.5579169988632202, "num_chars": 2}, {"sum_logits": -1.1925402879714966, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -1.1925402879714966, "logits_per_char": -0.5962701439857483, "num_chars": 2}, {"sum_logits": -1.9125487804412842, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -1.9125487804412842, "logits_per_char": -0.9562743902206421, "num_chars": 2}, {"sum_logits": -1.544817328453064, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -1.544817328453064, "logits_per_char": -0.772408664226532, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 534, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3123835325241089, "incorrect_loss_raw": 1.440976619720459, "correct_loss_per_char": 0.6561917662620544, "incorrect_loss_per_char": 0.7204883098602295, "correct_loss_per_token": 1.3123835325241089, "incorrect_loss_per_token": 1.440976619720459, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3123835325241089, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": false, "logits_per_token": -1.3123835325241089, "logits_per_char": -0.6561917662620544, "num_chars": 2}, {"sum_logits": -1.2277860641479492, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": true, "logits_per_token": -1.2277860641479492, "logits_per_char": -0.6138930320739746, "num_chars": 2}, {"sum_logits": -1.6870115995407104, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": false, "logits_per_token": -1.6870115995407104, "logits_per_char": -0.8435057997703552, "num_chars": 2}, {"sum_logits": -1.4081321954727173, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": false, "logits_per_token": -1.4081321954727173, "logits_per_char": -0.7040660977363586, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 535, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8112918138504028, "incorrect_loss_raw": 1.4072727759679158, "correct_loss_per_char": 0.9056459069252014, "incorrect_loss_per_char": 0.7036363879839579, "correct_loss_per_token": 1.8112918138504028, "incorrect_loss_per_token": 1.4072727759679158, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8744291067123413, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": true, "logits_per_token": -0.8744291067123413, "logits_per_char": -0.43721455335617065, "num_chars": 2}, {"sum_logits": -1.2458739280700684, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.2458739280700684, "logits_per_char": -0.6229369640350342, "num_chars": 2}, {"sum_logits": -2.101515293121338, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -2.101515293121338, "logits_per_char": -1.050757646560669, "num_chars": 2}, {"sum_logits": -1.8112918138504028, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.8112918138504028, "logits_per_char": -0.9056459069252014, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 536, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.614752173423767, "incorrect_loss_raw": 1.3732747634251912, "correct_loss_per_char": 0.8073760867118835, "incorrect_loss_per_char": 0.6866373817125956, "correct_loss_per_token": 1.614752173423767, "incorrect_loss_per_token": 1.3732747634251912, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1748536825180054, "num_tokens": 1, "num_tokens_all": 1062, "is_greedy": false, "logits_per_token": -1.1748536825180054, "logits_per_char": -0.5874268412590027, "num_chars": 2}, {"sum_logits": -1.1413631439208984, "num_tokens": 1, "num_tokens_all": 1062, "is_greedy": true, "logits_per_token": -1.1413631439208984, "logits_per_char": -0.5706815719604492, "num_chars": 2}, {"sum_logits": -1.80360746383667, "num_tokens": 1, "num_tokens_all": 1062, "is_greedy": false, "logits_per_token": -1.80360746383667, "logits_per_char": -0.901803731918335, "num_chars": 2}, {"sum_logits": -1.614752173423767, "num_tokens": 1, "num_tokens_all": 1062, "is_greedy": false, "logits_per_token": -1.614752173423767, "logits_per_char": -0.8073760867118835, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 537, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1389732360839844, "incorrect_loss_raw": 1.5212434927622478, "correct_loss_per_char": 0.5694866180419922, "incorrect_loss_per_char": 0.7606217463811239, "correct_loss_per_token": 1.1389732360839844, "incorrect_loss_per_token": 1.5212434927622478, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1389732360839844, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": true, "logits_per_token": -1.1389732360839844, "logits_per_char": -0.5694866180419922, "num_chars": 2}, {"sum_logits": -1.2535415887832642, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": false, "logits_per_token": -1.2535415887832642, "logits_per_char": -0.6267707943916321, "num_chars": 2}, {"sum_logits": -1.7980343103408813, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": false, "logits_per_token": -1.7980343103408813, "logits_per_char": -0.8990171551704407, "num_chars": 2}, {"sum_logits": -1.5121545791625977, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": false, "logits_per_token": -1.5121545791625977, "logits_per_char": -0.7560772895812988, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 538, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0624661445617676, "incorrect_loss_raw": 1.3051112691561382, "correct_loss_per_char": 1.0312330722808838, "incorrect_loss_per_char": 0.6525556345780691, "correct_loss_per_token": 2.0624661445617676, "incorrect_loss_per_token": 1.3051112691561382, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9717116951942444, "num_tokens": 1, "num_tokens_all": 1103, "is_greedy": true, "logits_per_token": -0.9717116951942444, "logits_per_char": -0.4858558475971222, "num_chars": 2}, {"sum_logits": -1.1306967735290527, "num_tokens": 1, "num_tokens_all": 1103, "is_greedy": false, "logits_per_token": -1.1306967735290527, "logits_per_char": -0.5653483867645264, "num_chars": 2}, {"sum_logits": -2.0624661445617676, "num_tokens": 1, "num_tokens_all": 1103, "is_greedy": false, "logits_per_token": -2.0624661445617676, "logits_per_char": -1.0312330722808838, "num_chars": 2}, {"sum_logits": -1.8129253387451172, "num_tokens": 1, "num_tokens_all": 1103, "is_greedy": false, "logits_per_token": -1.8129253387451172, "logits_per_char": -0.9064626693725586, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 539, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7668582201004028, "incorrect_loss_raw": 1.2990222374598186, "correct_loss_per_char": 0.8834291100502014, "incorrect_loss_per_char": 0.6495111187299093, "correct_loss_per_token": 1.7668582201004028, "incorrect_loss_per_token": 1.2990222374598186, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2105903625488281, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": true, "logits_per_token": -1.2105903625488281, "logits_per_char": -0.6052951812744141, "num_chars": 2}, {"sum_logits": -1.2699838876724243, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -1.2699838876724243, "logits_per_char": -0.6349919438362122, "num_chars": 2}, {"sum_logits": -1.7668582201004028, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -1.7668582201004028, "logits_per_char": -0.8834291100502014, "num_chars": 2}, {"sum_logits": -1.4164924621582031, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -1.4164924621582031, "logits_per_char": -0.7082462310791016, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 540, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.410248041152954, "incorrect_loss_raw": 1.414097547531128, "correct_loss_per_char": 0.705124020576477, "incorrect_loss_per_char": 0.707048773765564, "correct_loss_per_token": 1.410248041152954, "incorrect_loss_per_token": 1.414097547531128, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1173114776611328, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": true, "logits_per_token": -1.1173114776611328, "logits_per_char": -0.5586557388305664, "num_chars": 2}, {"sum_logits": -1.4433197975158691, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": false, "logits_per_token": -1.4433197975158691, "logits_per_char": -0.7216598987579346, "num_chars": 2}, {"sum_logits": -1.6816613674163818, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": false, "logits_per_token": -1.6816613674163818, "logits_per_char": -0.8408306837081909, "num_chars": 2}, {"sum_logits": -1.410248041152954, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": false, "logits_per_token": -1.410248041152954, "logits_per_char": -0.705124020576477, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 541, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0840833187103271, "incorrect_loss_raw": 1.5335476398468018, "correct_loss_per_char": 0.5420416593551636, "incorrect_loss_per_char": 0.7667738199234009, "correct_loss_per_token": 1.0840833187103271, "incorrect_loss_per_token": 1.5335476398468018, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0840833187103271, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": true, "logits_per_token": -1.0840833187103271, "logits_per_char": -0.5420416593551636, "num_chars": 2}, {"sum_logits": -1.3709065914154053, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -1.3709065914154053, "logits_per_char": -0.6854532957077026, "num_chars": 2}, {"sum_logits": -1.7022051811218262, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -1.7022051811218262, "logits_per_char": -0.8511025905609131, "num_chars": 2}, {"sum_logits": -1.5275311470031738, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -1.5275311470031738, "logits_per_char": -0.7637655735015869, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 542, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1776288747787476, "incorrect_loss_raw": 1.5291049877802532, "correct_loss_per_char": 0.5888144373893738, "incorrect_loss_per_char": 0.7645524938901266, "correct_loss_per_token": 1.1776288747787476, "incorrect_loss_per_token": 1.5291049877802532, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1356470584869385, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": true, "logits_per_token": -1.1356470584869385, "logits_per_char": -0.5678235292434692, "num_chars": 2}, {"sum_logits": -1.1776288747787476, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": false, "logits_per_token": -1.1776288747787476, "logits_per_char": -0.5888144373893738, "num_chars": 2}, {"sum_logits": -1.8876783847808838, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": false, "logits_per_token": -1.8876783847808838, "logits_per_char": -0.9438391923904419, "num_chars": 2}, {"sum_logits": -1.563989520072937, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": false, "logits_per_token": -1.563989520072937, "logits_per_char": -0.7819947600364685, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 543, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2148383855819702, "incorrect_loss_raw": 1.5629919171333313, "correct_loss_per_char": 0.6074191927909851, "incorrect_loss_per_char": 0.7814959585666656, "correct_loss_per_token": 1.2148383855819702, "incorrect_loss_per_token": 1.5629919171333313, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9656904339790344, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": true, "logits_per_token": -0.9656904339790344, "logits_per_char": -0.4828452169895172, "num_chars": 2}, {"sum_logits": -1.2148383855819702, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.2148383855819702, "logits_per_char": -0.6074191927909851, "num_chars": 2}, {"sum_logits": -1.9134482145309448, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.9134482145309448, "logits_per_char": -0.9567241072654724, "num_chars": 2}, {"sum_logits": -1.8098371028900146, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.8098371028900146, "logits_per_char": -0.9049185514450073, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 544, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9305940866470337, "incorrect_loss_raw": 1.694951097170512, "correct_loss_per_char": 0.46529704332351685, "incorrect_loss_per_char": 0.847475548585256, "correct_loss_per_token": 0.9305940866470337, "incorrect_loss_per_token": 1.694951097170512, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9305940866470337, "num_tokens": 1, "num_tokens_all": 1128, "is_greedy": true, "logits_per_token": -0.9305940866470337, "logits_per_char": -0.46529704332351685, "num_chars": 2}, {"sum_logits": -1.1865490674972534, "num_tokens": 1, "num_tokens_all": 1128, "is_greedy": false, "logits_per_token": -1.1865490674972534, "logits_per_char": -0.5932745337486267, "num_chars": 2}, {"sum_logits": -2.1704299449920654, "num_tokens": 1, "num_tokens_all": 1128, "is_greedy": false, "logits_per_token": -2.1704299449920654, "logits_per_char": -1.0852149724960327, "num_chars": 2}, {"sum_logits": -1.7278742790222168, "num_tokens": 1, "num_tokens_all": 1128, "is_greedy": false, "logits_per_token": -1.7278742790222168, "logits_per_char": -0.8639371395111084, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 545, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0106468200683594, "incorrect_loss_raw": 1.269527554512024, "correct_loss_per_char": 1.0053234100341797, "incorrect_loss_per_char": 0.634763777256012, "correct_loss_per_token": 2.0106468200683594, "incorrect_loss_per_token": 1.269527554512024, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1062934398651123, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": true, "logits_per_token": -1.1062934398651123, "logits_per_char": -0.5531467199325562, "num_chars": 2}, {"sum_logits": -1.1584357023239136, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.1584357023239136, "logits_per_char": -0.5792178511619568, "num_chars": 2}, {"sum_logits": -2.0106468200683594, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -2.0106468200683594, "logits_per_char": -1.0053234100341797, "num_chars": 2}, {"sum_logits": -1.543853521347046, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.543853521347046, "logits_per_char": -0.771926760673523, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 546, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.676427960395813, "incorrect_loss_raw": 1.3168404499689739, "correct_loss_per_char": 0.8382139801979065, "incorrect_loss_per_char": 0.6584202249844869, "correct_loss_per_token": 1.676427960395813, "incorrect_loss_per_token": 1.3168404499689739, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2245627641677856, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": true, "logits_per_token": -1.2245627641677856, "logits_per_char": -0.6122813820838928, "num_chars": 2}, {"sum_logits": -1.3619325160980225, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.3619325160980225, "logits_per_char": -0.6809662580490112, "num_chars": 2}, {"sum_logits": -1.676427960395813, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.676427960395813, "logits_per_char": -0.8382139801979065, "num_chars": 2}, {"sum_logits": -1.3640260696411133, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.3640260696411133, "logits_per_char": -0.6820130348205566, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 547, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6763343811035156, "incorrect_loss_raw": 1.3883729775746663, "correct_loss_per_char": 0.8381671905517578, "incorrect_loss_per_char": 0.6941864887873331, "correct_loss_per_token": 1.6763343811035156, "incorrect_loss_per_token": 1.3883729775746663, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0264010429382324, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": true, "logits_per_token": -1.0264010429382324, "logits_per_char": -0.5132005214691162, "num_chars": 2}, {"sum_logits": -1.1864728927612305, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.1864728927612305, "logits_per_char": -0.5932364463806152, "num_chars": 2}, {"sum_logits": -1.9522449970245361, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.9522449970245361, "logits_per_char": -0.9761224985122681, "num_chars": 2}, {"sum_logits": -1.6763343811035156, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.6763343811035156, "logits_per_char": -0.8381671905517578, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 548, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4646166563034058, "incorrect_loss_raw": 1.4016335407892864, "correct_loss_per_char": 0.7323083281517029, "incorrect_loss_per_char": 0.7008167703946432, "correct_loss_per_token": 1.4646166563034058, "incorrect_loss_per_token": 1.4016335407892864, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0929712057113647, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": true, "logits_per_token": -1.0929712057113647, "logits_per_char": -0.5464856028556824, "num_chars": 2}, {"sum_logits": -1.4646166563034058, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.4646166563034058, "logits_per_char": -0.7323083281517029, "num_chars": 2}, {"sum_logits": -1.6851356029510498, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.6851356029510498, "logits_per_char": -0.8425678014755249, "num_chars": 2}, {"sum_logits": -1.4267938137054443, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.4267938137054443, "logits_per_char": -0.7133969068527222, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 549, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.177562952041626, "incorrect_loss_raw": 1.5303594668706257, "correct_loss_per_char": 0.588781476020813, "incorrect_loss_per_char": 0.7651797334353129, "correct_loss_per_token": 1.177562952041626, "incorrect_loss_per_token": 1.5303594668706257, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0940169095993042, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": true, "logits_per_token": -1.0940169095993042, "logits_per_char": -0.5470084547996521, "num_chars": 2}, {"sum_logits": -1.177562952041626, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.177562952041626, "logits_per_char": -0.588781476020813, "num_chars": 2}, {"sum_logits": -1.8245115280151367, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.8245115280151367, "logits_per_char": -0.9122557640075684, "num_chars": 2}, {"sum_logits": -1.6725499629974365, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.6725499629974365, "logits_per_char": -0.8362749814987183, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 550, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1044878959655762, "incorrect_loss_raw": 1.5181911786397297, "correct_loss_per_char": 0.5522439479827881, "incorrect_loss_per_char": 0.7590955893198649, "correct_loss_per_token": 1.1044878959655762, "incorrect_loss_per_token": 1.5181911786397297, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1044878959655762, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": true, "logits_per_token": -1.1044878959655762, "logits_per_char": -0.5522439479827881, "num_chars": 2}, {"sum_logits": -1.4574323892593384, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -1.4574323892593384, "logits_per_char": -0.7287161946296692, "num_chars": 2}, {"sum_logits": -1.668680191040039, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -1.668680191040039, "logits_per_char": -0.8343400955200195, "num_chars": 2}, {"sum_logits": -1.428460955619812, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -1.428460955619812, "logits_per_char": -0.714230477809906, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 551, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1883848905563354, "incorrect_loss_raw": 1.536562403043111, "correct_loss_per_char": 0.5941924452781677, "incorrect_loss_per_char": 0.7682812015215555, "correct_loss_per_token": 1.1883848905563354, "incorrect_loss_per_token": 1.536562403043111, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0726357698440552, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": true, "logits_per_token": -1.0726357698440552, "logits_per_char": -0.5363178849220276, "num_chars": 2}, {"sum_logits": -1.1883848905563354, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": false, "logits_per_token": -1.1883848905563354, "logits_per_char": -0.5941924452781677, "num_chars": 2}, {"sum_logits": -1.9020737409591675, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": false, "logits_per_token": -1.9020737409591675, "logits_per_char": -0.9510368704795837, "num_chars": 2}, {"sum_logits": -1.6349776983261108, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": false, "logits_per_token": -1.6349776983261108, "logits_per_char": -0.8174888491630554, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 552, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2333146333694458, "incorrect_loss_raw": 1.4976053635279338, "correct_loss_per_char": 0.6166573166847229, "incorrect_loss_per_char": 0.7488026817639669, "correct_loss_per_token": 1.2333146333694458, "incorrect_loss_per_token": 1.4976053635279338, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2333146333694458, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": false, "logits_per_token": -1.2333146333694458, "logits_per_char": -0.6166573166847229, "num_chars": 2}, {"sum_logits": -1.130194067955017, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": true, "logits_per_token": -1.130194067955017, "logits_per_char": -0.5650970339775085, "num_chars": 2}, {"sum_logits": -1.8632676601409912, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": false, "logits_per_token": -1.8632676601409912, "logits_per_char": -0.9316338300704956, "num_chars": 2}, {"sum_logits": -1.499354362487793, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": false, "logits_per_token": -1.499354362487793, "logits_per_char": -0.7496771812438965, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 553, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1392085552215576, "incorrect_loss_raw": 1.5752145846684773, "correct_loss_per_char": 0.5696042776107788, "incorrect_loss_per_char": 0.7876072923342387, "correct_loss_per_token": 1.1392085552215576, "incorrect_loss_per_token": 1.5752145846684773, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0466340780258179, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": true, "logits_per_token": -1.0466340780258179, "logits_per_char": -0.5233170390129089, "num_chars": 2}, {"sum_logits": -1.1392085552215576, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": false, "logits_per_token": -1.1392085552215576, "logits_per_char": -0.5696042776107788, "num_chars": 2}, {"sum_logits": -1.8710137605667114, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": false, "logits_per_token": -1.8710137605667114, "logits_per_char": -0.9355068802833557, "num_chars": 2}, {"sum_logits": -1.8079959154129028, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": false, "logits_per_token": -1.8079959154129028, "logits_per_char": -0.9039979577064514, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 554, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0683505535125732, "incorrect_loss_raw": 1.2940714359283447, "correct_loss_per_char": 1.0341752767562866, "incorrect_loss_per_char": 0.6470357179641724, "correct_loss_per_token": 2.0683505535125732, "incorrect_loss_per_token": 1.2940714359283447, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9602706432342529, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": true, "logits_per_token": -0.9602706432342529, "logits_per_char": -0.48013532161712646, "num_chars": 2}, {"sum_logits": -1.1943742036819458, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": false, "logits_per_token": -1.1943742036819458, "logits_per_char": -0.5971871018409729, "num_chars": 2}, {"sum_logits": -2.0683505535125732, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": false, "logits_per_token": -2.0683505535125732, "logits_per_char": -1.0341752767562866, "num_chars": 2}, {"sum_logits": -1.7275694608688354, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": false, "logits_per_token": -1.7275694608688354, "logits_per_char": -0.8637847304344177, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 555, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3039331436157227, "incorrect_loss_raw": 1.470102866490682, "correct_loss_per_char": 0.6519665718078613, "incorrect_loss_per_char": 0.735051433245341, "correct_loss_per_token": 1.3039331436157227, "incorrect_loss_per_token": 1.470102866490682, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1321702003479004, "num_tokens": 1, "num_tokens_all": 1098, "is_greedy": true, "logits_per_token": -1.1321702003479004, "logits_per_char": -0.5660851001739502, "num_chars": 2}, {"sum_logits": -1.3039331436157227, "num_tokens": 1, "num_tokens_all": 1098, "is_greedy": false, "logits_per_token": -1.3039331436157227, "logits_per_char": -0.6519665718078613, "num_chars": 2}, {"sum_logits": -1.8384162187576294, "num_tokens": 1, "num_tokens_all": 1098, "is_greedy": false, "logits_per_token": -1.8384162187576294, "logits_per_char": -0.9192081093788147, "num_chars": 2}, {"sum_logits": -1.4397221803665161, "num_tokens": 1, "num_tokens_all": 1098, "is_greedy": false, "logits_per_token": -1.4397221803665161, "logits_per_char": -0.7198610901832581, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 556, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.139275074005127, "incorrect_loss_raw": 1.507744590441386, "correct_loss_per_char": 0.5696375370025635, "incorrect_loss_per_char": 0.753872295220693, "correct_loss_per_token": 1.139275074005127, "incorrect_loss_per_token": 1.507744590441386, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.139275074005127, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -1.139275074005127, "logits_per_char": -0.5696375370025635, "num_chars": 2}, {"sum_logits": -1.3280161619186401, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.3280161619186401, "logits_per_char": -0.6640080809593201, "num_chars": 2}, {"sum_logits": -1.703776240348816, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.703776240348816, "logits_per_char": -0.851888120174408, "num_chars": 2}, {"sum_logits": -1.4914413690567017, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.4914413690567017, "logits_per_char": -0.7457206845283508, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 557, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.144629716873169, "incorrect_loss_raw": 1.5155361493428547, "correct_loss_per_char": 0.5723148584365845, "incorrect_loss_per_char": 0.7577680746714274, "correct_loss_per_token": 1.144629716873169, "incorrect_loss_per_token": 1.5155361493428547, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.144629716873169, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": true, "logits_per_token": -1.144629716873169, "logits_per_char": -0.5723148584365845, "num_chars": 2}, {"sum_logits": -1.232315182685852, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": false, "logits_per_token": -1.232315182685852, "logits_per_char": -0.616157591342926, "num_chars": 2}, {"sum_logits": -1.7635692358016968, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": false, "logits_per_token": -1.7635692358016968, "logits_per_char": -0.8817846179008484, "num_chars": 2}, {"sum_logits": -1.5507240295410156, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": false, "logits_per_token": -1.5507240295410156, "logits_per_char": -0.7753620147705078, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 558, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.714309573173523, "incorrect_loss_raw": 1.3886009057362874, "correct_loss_per_char": 0.8571547865867615, "incorrect_loss_per_char": 0.6943004528681437, "correct_loss_per_token": 1.714309573173523, "incorrect_loss_per_token": 1.3886009057362874, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9993433952331543, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": true, "logits_per_token": -0.9993433952331543, "logits_per_char": -0.49967169761657715, "num_chars": 2}, {"sum_logits": -1.1857197284698486, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.1857197284698486, "logits_per_char": -0.5928598642349243, "num_chars": 2}, {"sum_logits": -1.9807395935058594, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.9807395935058594, "logits_per_char": -0.9903697967529297, "num_chars": 2}, {"sum_logits": -1.714309573173523, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.714309573173523, "logits_per_char": -0.8571547865867615, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 559, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1904288530349731, "incorrect_loss_raw": 1.5318700472513835, "correct_loss_per_char": 0.5952144265174866, "incorrect_loss_per_char": 0.7659350236256918, "correct_loss_per_token": 1.1904288530349731, "incorrect_loss_per_token": 1.5318700472513835, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1159093379974365, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": true, "logits_per_token": -1.1159093379974365, "logits_per_char": -0.5579546689987183, "num_chars": 2}, {"sum_logits": -1.1904288530349731, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": false, "logits_per_token": -1.1904288530349731, "logits_per_char": -0.5952144265174866, "num_chars": 2}, {"sum_logits": -1.981951355934143, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": false, "logits_per_token": -1.981951355934143, "logits_per_char": -0.9909756779670715, "num_chars": 2}, {"sum_logits": -1.4977494478225708, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": false, "logits_per_token": -1.4977494478225708, "logits_per_char": -0.7488747239112854, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 560, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4734961986541748, "incorrect_loss_raw": 1.3933026790618896, "correct_loss_per_char": 0.7367480993270874, "incorrect_loss_per_char": 0.6966513395309448, "correct_loss_per_token": 1.4734961986541748, "incorrect_loss_per_token": 1.3933026790618896, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0915175676345825, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": true, "logits_per_token": -1.0915175676345825, "logits_per_char": -0.5457587838172913, "num_chars": 2}, {"sum_logits": -1.4734961986541748, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.4734961986541748, "logits_per_char": -0.7367480993270874, "num_chars": 2}, {"sum_logits": -1.6414713859558105, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.6414713859558105, "logits_per_char": -0.8207356929779053, "num_chars": 2}, {"sum_logits": -1.4469190835952759, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.4469190835952759, "logits_per_char": -0.7234595417976379, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 561, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.119983434677124, "incorrect_loss_raw": 1.582795222600301, "correct_loss_per_char": 0.559991717338562, "incorrect_loss_per_char": 0.7913976113001505, "correct_loss_per_token": 1.119983434677124, "incorrect_loss_per_token": 1.582795222600301, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.065067172050476, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": true, "logits_per_token": -1.065067172050476, "logits_per_char": -0.532533586025238, "num_chars": 2}, {"sum_logits": -1.119983434677124, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": false, "logits_per_token": -1.119983434677124, "logits_per_char": -0.559991717338562, "num_chars": 2}, {"sum_logits": -1.9730312824249268, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": false, "logits_per_token": -1.9730312824249268, "logits_per_char": -0.9865156412124634, "num_chars": 2}, {"sum_logits": -1.7102872133255005, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": false, "logits_per_token": -1.7102872133255005, "logits_per_char": -0.8551436066627502, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 562, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6359919309616089, "incorrect_loss_raw": 1.4029373327891033, "correct_loss_per_char": 0.8179959654808044, "incorrect_loss_per_char": 0.7014686663945516, "correct_loss_per_token": 1.6359919309616089, "incorrect_loss_per_token": 1.4029373327891033, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1021639108657837, "num_tokens": 1, "num_tokens_all": 1103, "is_greedy": true, "logits_per_token": -1.1021639108657837, "logits_per_char": -0.5510819554328918, "num_chars": 2}, {"sum_logits": -1.1320140361785889, "num_tokens": 1, "num_tokens_all": 1103, "is_greedy": false, "logits_per_token": -1.1320140361785889, "logits_per_char": -0.5660070180892944, "num_chars": 2}, {"sum_logits": -1.974634051322937, "num_tokens": 1, "num_tokens_all": 1103, "is_greedy": false, "logits_per_token": -1.974634051322937, "logits_per_char": -0.9873170256614685, "num_chars": 2}, {"sum_logits": -1.6359919309616089, "num_tokens": 1, "num_tokens_all": 1103, "is_greedy": false, "logits_per_token": -1.6359919309616089, "logits_per_char": -0.8179959654808044, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 563, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.315053939819336, "incorrect_loss_raw": 1.455354889233907, "correct_loss_per_char": 0.657526969909668, "incorrect_loss_per_char": 0.7276774446169535, "correct_loss_per_token": 1.315053939819336, "incorrect_loss_per_token": 1.455354889233907, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2159833908081055, "num_tokens": 1, "num_tokens_all": 985, "is_greedy": true, "logits_per_token": -1.2159833908081055, "logits_per_char": -0.6079916954040527, "num_chars": 2}, {"sum_logits": -1.315053939819336, "num_tokens": 1, "num_tokens_all": 985, "is_greedy": false, "logits_per_token": -1.315053939819336, "logits_per_char": -0.657526969909668, "num_chars": 2}, {"sum_logits": -1.8121448755264282, "num_tokens": 1, "num_tokens_all": 985, "is_greedy": false, "logits_per_token": -1.8121448755264282, "logits_per_char": -0.9060724377632141, "num_chars": 2}, {"sum_logits": -1.3379364013671875, "num_tokens": 1, "num_tokens_all": 985, "is_greedy": false, "logits_per_token": -1.3379364013671875, "logits_per_char": -0.6689682006835938, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 564, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9510169625282288, "incorrect_loss_raw": 1.6936673323313396, "correct_loss_per_char": 0.4755084812641144, "incorrect_loss_per_char": 0.8468336661656698, "correct_loss_per_token": 0.9510169625282288, "incorrect_loss_per_token": 1.6936673323313396, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9510169625282288, "num_tokens": 1, "num_tokens_all": 1131, "is_greedy": true, "logits_per_token": -0.9510169625282288, "logits_per_char": -0.4755084812641144, "num_chars": 2}, {"sum_logits": -1.1297557353973389, "num_tokens": 1, "num_tokens_all": 1131, "is_greedy": false, "logits_per_token": -1.1297557353973389, "logits_per_char": -0.5648778676986694, "num_chars": 2}, {"sum_logits": -2.168396472930908, "num_tokens": 1, "num_tokens_all": 1131, "is_greedy": false, "logits_per_token": -2.168396472930908, "logits_per_char": -1.084198236465454, "num_chars": 2}, {"sum_logits": -1.7828497886657715, "num_tokens": 1, "num_tokens_all": 1131, "is_greedy": false, "logits_per_token": -1.7828497886657715, "logits_per_char": -0.8914248943328857, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 565, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3441106081008911, "incorrect_loss_raw": 1.549472490946452, "correct_loss_per_char": 0.6720553040504456, "incorrect_loss_per_char": 0.774736245473226, "correct_loss_per_token": 1.3441106081008911, "incorrect_loss_per_token": 1.549472490946452, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8509176969528198, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": true, "logits_per_token": -0.8509176969528198, "logits_per_char": -0.4254588484764099, "num_chars": 2}, {"sum_logits": -1.3441106081008911, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.3441106081008911, "logits_per_char": -0.6720553040504456, "num_chars": 2}, {"sum_logits": -1.9305148124694824, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.9305148124694824, "logits_per_char": -0.9652574062347412, "num_chars": 2}, {"sum_logits": -1.8669849634170532, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.8669849634170532, "logits_per_char": -0.9334924817085266, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 566, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1388005018234253, "incorrect_loss_raw": 1.5120385885238647, "correct_loss_per_char": 0.5694002509117126, "incorrect_loss_per_char": 0.7560192942619324, "correct_loss_per_token": 1.1388005018234253, "incorrect_loss_per_token": 1.5120385885238647, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1388005018234253, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": true, "logits_per_token": -1.1388005018234253, "logits_per_char": -0.5694002509117126, "num_chars": 2}, {"sum_logits": -1.2894195318222046, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -1.2894195318222046, "logits_per_char": -0.6447097659111023, "num_chars": 2}, {"sum_logits": -1.7416386604309082, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -1.7416386604309082, "logits_per_char": -0.8708193302154541, "num_chars": 2}, {"sum_logits": -1.5050575733184814, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -1.5050575733184814, "logits_per_char": -0.7525287866592407, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 567, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5267921686172485, "incorrect_loss_raw": 1.3723021348317463, "correct_loss_per_char": 0.7633960843086243, "incorrect_loss_per_char": 0.6861510674158732, "correct_loss_per_token": 1.5267921686172485, "incorrect_loss_per_token": 1.3723021348317463, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2039519548416138, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": true, "logits_per_token": -1.2039519548416138, "logits_per_char": -0.6019759774208069, "num_chars": 2}, {"sum_logits": -1.2681740522384644, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": false, "logits_per_token": -1.2681740522384644, "logits_per_char": -0.6340870261192322, "num_chars": 2}, {"sum_logits": -1.6447803974151611, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": false, "logits_per_token": -1.6447803974151611, "logits_per_char": -0.8223901987075806, "num_chars": 2}, {"sum_logits": -1.5267921686172485, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": false, "logits_per_token": -1.5267921686172485, "logits_per_char": -0.7633960843086243, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 568, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7325465679168701, "incorrect_loss_raw": 1.3813114563624065, "correct_loss_per_char": 0.8662732839584351, "incorrect_loss_per_char": 0.6906557281812032, "correct_loss_per_token": 1.7325465679168701, "incorrect_loss_per_token": 1.3813114563624065, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.039178729057312, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": true, "logits_per_token": -1.039178729057312, "logits_per_char": -0.519589364528656, "num_chars": 2}, {"sum_logits": -1.140997052192688, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": false, "logits_per_token": -1.140997052192688, "logits_per_char": -0.570498526096344, "num_chars": 2}, {"sum_logits": -1.9637585878372192, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": false, "logits_per_token": -1.9637585878372192, "logits_per_char": -0.9818792939186096, "num_chars": 2}, {"sum_logits": -1.7325465679168701, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": false, "logits_per_token": -1.7325465679168701, "logits_per_char": -0.8662732839584351, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 569, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9050872325897217, "incorrect_loss_raw": 1.420833686987559, "correct_loss_per_char": 0.9525436162948608, "incorrect_loss_per_char": 0.7104168434937795, "correct_loss_per_token": 1.9050872325897217, "incorrect_loss_per_token": 1.420833686987559, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8435470461845398, "num_tokens": 1, "num_tokens_all": 1121, "is_greedy": true, "logits_per_token": -0.8435470461845398, "logits_per_char": -0.4217735230922699, "num_chars": 2}, {"sum_logits": -1.2030442953109741, "num_tokens": 1, "num_tokens_all": 1121, "is_greedy": false, "logits_per_token": -1.2030442953109741, "logits_per_char": -0.6015221476554871, "num_chars": 2}, {"sum_logits": -2.215909719467163, "num_tokens": 1, "num_tokens_all": 1121, "is_greedy": false, "logits_per_token": -2.215909719467163, "logits_per_char": -1.1079548597335815, "num_chars": 2}, {"sum_logits": -1.9050872325897217, "num_tokens": 1, "num_tokens_all": 1121, "is_greedy": false, "logits_per_token": -1.9050872325897217, "logits_per_char": -0.9525436162948608, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 570, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1091734170913696, "incorrect_loss_raw": 1.5488890409469604, "correct_loss_per_char": 0.5545867085456848, "incorrect_loss_per_char": 0.7744445204734802, "correct_loss_per_token": 1.1091734170913696, "incorrect_loss_per_token": 1.5488890409469604, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1091734170913696, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": true, "logits_per_token": -1.1091734170913696, "logits_per_char": -0.5545867085456848, "num_chars": 2}, {"sum_logits": -1.1783013343811035, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": false, "logits_per_token": -1.1783013343811035, "logits_per_char": -0.5891506671905518, "num_chars": 2}, {"sum_logits": -1.8263018131256104, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": false, "logits_per_token": -1.8263018131256104, "logits_per_char": -0.9131509065628052, "num_chars": 2}, {"sum_logits": -1.6420639753341675, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": false, "logits_per_token": -1.6420639753341675, "logits_per_char": -0.8210319876670837, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 571, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.270801305770874, "incorrect_loss_raw": 1.4558406273523967, "correct_loss_per_char": 0.635400652885437, "incorrect_loss_per_char": 0.7279203136761984, "correct_loss_per_token": 1.270801305770874, "incorrect_loss_per_token": 1.4558406273523967, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.270801305770874, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": false, "logits_per_token": -1.270801305770874, "logits_per_char": -0.635400652885437, "num_chars": 2}, {"sum_logits": -1.2137112617492676, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": true, "logits_per_token": -1.2137112617492676, "logits_per_char": -0.6068556308746338, "num_chars": 2}, {"sum_logits": -1.6709740161895752, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": false, "logits_per_token": -1.6709740161895752, "logits_per_char": -0.8354870080947876, "num_chars": 2}, {"sum_logits": -1.4828366041183472, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": false, "logits_per_token": -1.4828366041183472, "logits_per_char": -0.7414183020591736, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 572, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6483848094940186, "incorrect_loss_raw": 1.417317509651184, "correct_loss_per_char": 0.8241924047470093, "incorrect_loss_per_char": 0.708658754825592, "correct_loss_per_token": 1.6483848094940186, "incorrect_loss_per_token": 1.417317509651184, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9643381834030151, "num_tokens": 1, "num_tokens_all": 1138, "is_greedy": true, "logits_per_token": -0.9643381834030151, "logits_per_char": -0.48216909170150757, "num_chars": 2}, {"sum_logits": -1.2559850215911865, "num_tokens": 1, "num_tokens_all": 1138, "is_greedy": false, "logits_per_token": -1.2559850215911865, "logits_per_char": -0.6279925107955933, "num_chars": 2}, {"sum_logits": -2.0316293239593506, "num_tokens": 1, "num_tokens_all": 1138, "is_greedy": false, "logits_per_token": -2.0316293239593506, "logits_per_char": -1.0158146619796753, "num_chars": 2}, {"sum_logits": -1.6483848094940186, "num_tokens": 1, "num_tokens_all": 1138, "is_greedy": false, "logits_per_token": -1.6483848094940186, "logits_per_char": -0.8241924047470093, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 573, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9865826368331909, "incorrect_loss_raw": 1.6224254767100017, "correct_loss_per_char": 0.49329131841659546, "incorrect_loss_per_char": 0.8112127383550009, "correct_loss_per_token": 0.9865826368331909, "incorrect_loss_per_token": 1.6224254767100017, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9865826368331909, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": true, "logits_per_token": -0.9865826368331909, "logits_per_char": -0.49329131841659546, "num_chars": 2}, {"sum_logits": -1.255814790725708, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.255814790725708, "logits_per_char": -0.627907395362854, "num_chars": 2}, {"sum_logits": -1.953291893005371, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.953291893005371, "logits_per_char": -0.9766459465026855, "num_chars": 2}, {"sum_logits": -1.6581697463989258, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.6581697463989258, "logits_per_char": -0.8290848731994629, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 574, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1819751262664795, "incorrect_loss_raw": 1.5311458905537922, "correct_loss_per_char": 0.5909875631332397, "incorrect_loss_per_char": 0.7655729452768961, "correct_loss_per_token": 1.1819751262664795, "incorrect_loss_per_token": 1.5311458905537922, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1819751262664795, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": false, "logits_per_token": -1.1819751262664795, "logits_per_char": -0.5909875631332397, "num_chars": 2}, {"sum_logits": -1.1120136976242065, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": true, "logits_per_token": -1.1120136976242065, "logits_per_char": -0.5560068488121033, "num_chars": 2}, {"sum_logits": -1.9162427186965942, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": false, "logits_per_token": -1.9162427186965942, "logits_per_char": -0.9581213593482971, "num_chars": 2}, {"sum_logits": -1.5651812553405762, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": false, "logits_per_token": -1.5651812553405762, "logits_per_char": -0.7825906276702881, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 575, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9999768733978271, "incorrect_loss_raw": 1.3265447417895, "correct_loss_per_char": 0.9999884366989136, "incorrect_loss_per_char": 0.66327237089475, "correct_loss_per_token": 1.9999768733978271, "incorrect_loss_per_token": 1.3265447417895, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9225553870201111, "num_tokens": 1, "num_tokens_all": 1120, "is_greedy": true, "logits_per_token": -0.9225553870201111, "logits_per_char": -0.46127769351005554, "num_chars": 2}, {"sum_logits": -1.2159204483032227, "num_tokens": 1, "num_tokens_all": 1120, "is_greedy": false, "logits_per_token": -1.2159204483032227, "logits_per_char": -0.6079602241516113, "num_chars": 2}, {"sum_logits": -1.9999768733978271, "num_tokens": 1, "num_tokens_all": 1120, "is_greedy": false, "logits_per_token": -1.9999768733978271, "logits_per_char": -0.9999884366989136, "num_chars": 2}, {"sum_logits": -1.841158390045166, "num_tokens": 1, "num_tokens_all": 1120, "is_greedy": false, "logits_per_token": -1.841158390045166, "logits_per_char": -0.920579195022583, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 576, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.610367774963379, "incorrect_loss_raw": 1.3932815392812092, "correct_loss_per_char": 0.8051838874816895, "incorrect_loss_per_char": 0.6966407696406046, "correct_loss_per_token": 1.610367774963379, "incorrect_loss_per_token": 1.3932815392812092, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0605729818344116, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": true, "logits_per_token": -1.0605729818344116, "logits_per_char": -0.5302864909172058, "num_chars": 2}, {"sum_logits": -1.230586051940918, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.230586051940918, "logits_per_char": -0.615293025970459, "num_chars": 2}, {"sum_logits": -1.8886855840682983, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.8886855840682983, "logits_per_char": -0.9443427920341492, "num_chars": 2}, {"sum_logits": -1.610367774963379, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.610367774963379, "logits_per_char": -0.8051838874816895, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 577, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1140989065170288, "incorrect_loss_raw": 1.5796593030293782, "correct_loss_per_char": 0.5570494532585144, "incorrect_loss_per_char": 0.7898296515146891, "correct_loss_per_token": 1.1140989065170288, "incorrect_loss_per_token": 1.5796593030293782, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1256053447723389, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": false, "logits_per_token": -1.1256053447723389, "logits_per_char": -0.5628026723861694, "num_chars": 2}, {"sum_logits": -1.1140989065170288, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": true, "logits_per_token": -1.1140989065170288, "logits_per_char": -0.5570494532585144, "num_chars": 2}, {"sum_logits": -2.0503311157226562, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": false, "logits_per_token": -2.0503311157226562, "logits_per_char": -1.0251655578613281, "num_chars": 2}, {"sum_logits": -1.5630414485931396, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": false, "logits_per_token": -1.5630414485931396, "logits_per_char": -0.7815207242965698, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 578, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0737061500549316, "incorrect_loss_raw": 1.2645288705825806, "correct_loss_per_char": 1.0368530750274658, "incorrect_loss_per_char": 0.6322644352912903, "correct_loss_per_token": 2.0737061500549316, "incorrect_loss_per_token": 1.2645288705825806, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0947976112365723, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": true, "logits_per_token": -1.0947976112365723, "logits_per_char": -0.5473988056182861, "num_chars": 2}, {"sum_logits": -1.1429202556610107, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.1429202556610107, "logits_per_char": -0.5714601278305054, "num_chars": 2}, {"sum_logits": -2.0737061500549316, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -2.0737061500549316, "logits_per_char": -1.0368530750274658, "num_chars": 2}, {"sum_logits": -1.5558687448501587, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.5558687448501587, "logits_per_char": -0.7779343724250793, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 579, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.101442813873291, "incorrect_loss_raw": 1.300356149673462, "correct_loss_per_char": 1.0507214069366455, "incorrect_loss_per_char": 0.650178074836731, "correct_loss_per_token": 2.101442813873291, "incorrect_loss_per_token": 1.300356149673462, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0097882747650146, "num_tokens": 1, "num_tokens_all": 1130, "is_greedy": true, "logits_per_token": -1.0097882747650146, "logits_per_char": -0.5048941373825073, "num_chars": 2}, {"sum_logits": -1.0687167644500732, "num_tokens": 1, "num_tokens_all": 1130, "is_greedy": false, "logits_per_token": -1.0687167644500732, "logits_per_char": -0.5343583822250366, "num_chars": 2}, {"sum_logits": -2.101442813873291, "num_tokens": 1, "num_tokens_all": 1130, "is_greedy": false, "logits_per_token": -2.101442813873291, "logits_per_char": -1.0507214069366455, "num_chars": 2}, {"sum_logits": -1.8225634098052979, "num_tokens": 1, "num_tokens_all": 1130, "is_greedy": false, "logits_per_token": -1.8225634098052979, "logits_per_char": -0.9112817049026489, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 580, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9085273742675781, "incorrect_loss_raw": 1.2861930926640828, "correct_loss_per_char": 0.9542636871337891, "incorrect_loss_per_char": 0.6430965463320414, "correct_loss_per_token": 1.9085273742675781, "incorrect_loss_per_token": 1.2861930926640828, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0936574935913086, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": true, "logits_per_token": -1.0936574935913086, "logits_per_char": -0.5468287467956543, "num_chars": 2}, {"sum_logits": -1.2096917629241943, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.2096917629241943, "logits_per_char": -0.6048458814620972, "num_chars": 2}, {"sum_logits": -1.9085273742675781, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.9085273742675781, "logits_per_char": -0.9542636871337891, "num_chars": 2}, {"sum_logits": -1.5552300214767456, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.5552300214767456, "logits_per_char": -0.7776150107383728, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 581, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8939228057861328, "incorrect_loss_raw": 1.7335253556569417, "correct_loss_per_char": 0.4469614028930664, "incorrect_loss_per_char": 0.8667626778284708, "correct_loss_per_token": 0.8939228057861328, "incorrect_loss_per_token": 1.7335253556569417, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8939228057861328, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": true, "logits_per_token": -0.8939228057861328, "logits_per_char": -0.4469614028930664, "num_chars": 2}, {"sum_logits": -1.1588594913482666, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.1588594913482666, "logits_per_char": -0.5794297456741333, "num_chars": 2}, {"sum_logits": -2.1680970191955566, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -2.1680970191955566, "logits_per_char": -1.0840485095977783, "num_chars": 2}, {"sum_logits": -1.873619556427002, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.873619556427002, "logits_per_char": -0.936809778213501, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 582, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7185649871826172, "incorrect_loss_raw": 1.3644636074701946, "correct_loss_per_char": 0.8592824935913086, "incorrect_loss_per_char": 0.6822318037350973, "correct_loss_per_token": 1.7185649871826172, "incorrect_loss_per_token": 1.3644636074701946, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.064778208732605, "num_tokens": 1, "num_tokens_all": 1042, "is_greedy": true, "logits_per_token": -1.064778208732605, "logits_per_char": -0.5323891043663025, "num_chars": 2}, {"sum_logits": -1.1666150093078613, "num_tokens": 1, "num_tokens_all": 1042, "is_greedy": false, "logits_per_token": -1.1666150093078613, "logits_per_char": -0.5833075046539307, "num_chars": 2}, {"sum_logits": -1.8619976043701172, "num_tokens": 1, "num_tokens_all": 1042, "is_greedy": false, "logits_per_token": -1.8619976043701172, "logits_per_char": -0.9309988021850586, "num_chars": 2}, {"sum_logits": -1.7185649871826172, "num_tokens": 1, "num_tokens_all": 1042, "is_greedy": false, "logits_per_token": -1.7185649871826172, "logits_per_char": -0.8592824935913086, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 583, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9981403350830078, "incorrect_loss_raw": 1.3278472423553467, "correct_loss_per_char": 0.9990701675415039, "incorrect_loss_per_char": 0.6639236211776733, "correct_loss_per_token": 1.9981403350830078, "incorrect_loss_per_token": 1.3278472423553467, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.941114068031311, "num_tokens": 1, "num_tokens_all": 1051, "is_greedy": true, "logits_per_token": -0.941114068031311, "logits_per_char": -0.4705570340156555, "num_chars": 2}, {"sum_logits": -1.1704227924346924, "num_tokens": 1, "num_tokens_all": 1051, "is_greedy": false, "logits_per_token": -1.1704227924346924, "logits_per_char": -0.5852113962173462, "num_chars": 2}, {"sum_logits": -1.9981403350830078, "num_tokens": 1, "num_tokens_all": 1051, "is_greedy": false, "logits_per_token": -1.9981403350830078, "logits_per_char": -0.9990701675415039, "num_chars": 2}, {"sum_logits": -1.8720048666000366, "num_tokens": 1, "num_tokens_all": 1051, "is_greedy": false, "logits_per_token": -1.8720048666000366, "logits_per_char": -0.9360024333000183, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 584, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.288053035736084, "incorrect_loss_raw": 1.4714375336964924, "correct_loss_per_char": 0.644026517868042, "incorrect_loss_per_char": 0.7357187668482462, "correct_loss_per_token": 1.288053035736084, "incorrect_loss_per_token": 1.4714375336964924, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1292088031768799, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": true, "logits_per_token": -1.1292088031768799, "logits_per_char": -0.5646044015884399, "num_chars": 2}, {"sum_logits": -1.288053035736084, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": false, "logits_per_token": -1.288053035736084, "logits_per_char": -0.644026517868042, "num_chars": 2}, {"sum_logits": -1.7978296279907227, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": false, "logits_per_token": -1.7978296279907227, "logits_per_char": -0.8989148139953613, "num_chars": 2}, {"sum_logits": -1.487274169921875, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": false, "logits_per_token": -1.487274169921875, "logits_per_char": -0.7436370849609375, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 585, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5869643688201904, "incorrect_loss_raw": 1.4001446167627971, "correct_loss_per_char": 0.7934821844100952, "incorrect_loss_per_char": 0.7000723083813986, "correct_loss_per_token": 1.5869643688201904, "incorrect_loss_per_token": 1.4001446167627971, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1074485778808594, "num_tokens": 1, "num_tokens_all": 1052, "is_greedy": true, "logits_per_token": -1.1074485778808594, "logits_per_char": -0.5537242889404297, "num_chars": 2}, {"sum_logits": -1.1635745763778687, "num_tokens": 1, "num_tokens_all": 1052, "is_greedy": false, "logits_per_token": -1.1635745763778687, "logits_per_char": -0.5817872881889343, "num_chars": 2}, {"sum_logits": -1.929410696029663, "num_tokens": 1, "num_tokens_all": 1052, "is_greedy": false, "logits_per_token": -1.929410696029663, "logits_per_char": -0.9647053480148315, "num_chars": 2}, {"sum_logits": -1.5869643688201904, "num_tokens": 1, "num_tokens_all": 1052, "is_greedy": false, "logits_per_token": -1.5869643688201904, "logits_per_char": -0.7934821844100952, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 586, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.270068883895874, "incorrect_loss_raw": 1.495568831761678, "correct_loss_per_char": 0.635034441947937, "incorrect_loss_per_char": 0.747784415880839, "correct_loss_per_token": 1.270068883895874, "incorrect_loss_per_token": 1.495568831761678, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0533778667449951, "num_tokens": 1, "num_tokens_all": 1078, "is_greedy": true, "logits_per_token": -1.0533778667449951, "logits_per_char": -0.5266889333724976, "num_chars": 2}, {"sum_logits": -1.270068883895874, "num_tokens": 1, "num_tokens_all": 1078, "is_greedy": false, "logits_per_token": -1.270068883895874, "logits_per_char": -0.635034441947937, "num_chars": 2}, {"sum_logits": -1.760498046875, "num_tokens": 1, "num_tokens_all": 1078, "is_greedy": false, "logits_per_token": -1.760498046875, "logits_per_char": -0.8802490234375, "num_chars": 2}, {"sum_logits": -1.672830581665039, "num_tokens": 1, "num_tokens_all": 1078, "is_greedy": false, "logits_per_token": -1.672830581665039, "logits_per_char": -0.8364152908325195, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 587, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0724387168884277, "incorrect_loss_raw": 1.5382762749989827, "correct_loss_per_char": 0.5362193584442139, "incorrect_loss_per_char": 0.7691381374994913, "correct_loss_per_token": 1.0724387168884277, "incorrect_loss_per_token": 1.5382762749989827, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0724387168884277, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": true, "logits_per_token": -1.0724387168884277, "logits_per_char": -0.5362193584442139, "num_chars": 2}, {"sum_logits": -1.4068262577056885, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.4068262577056885, "logits_per_char": -0.7034131288528442, "num_chars": 2}, {"sum_logits": -1.737794280052185, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.737794280052185, "logits_per_char": -0.8688971400260925, "num_chars": 2}, {"sum_logits": -1.4702082872390747, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.4702082872390747, "logits_per_char": -0.7351041436195374, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 588, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2523787021636963, "incorrect_loss_raw": 1.4743395646413167, "correct_loss_per_char": 0.6261893510818481, "incorrect_loss_per_char": 0.7371697823206583, "correct_loss_per_token": 1.2523787021636963, "incorrect_loss_per_token": 1.4743395646413167, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.246742844581604, "num_tokens": 1, "num_tokens_all": 1005, "is_greedy": true, "logits_per_token": -1.246742844581604, "logits_per_char": -0.623371422290802, "num_chars": 2}, {"sum_logits": -1.2523787021636963, "num_tokens": 1, "num_tokens_all": 1005, "is_greedy": false, "logits_per_token": -1.2523787021636963, "logits_per_char": -0.6261893510818481, "num_chars": 2}, {"sum_logits": -1.7815403938293457, "num_tokens": 1, "num_tokens_all": 1005, "is_greedy": false, "logits_per_token": -1.7815403938293457, "logits_per_char": -0.8907701969146729, "num_chars": 2}, {"sum_logits": -1.3947354555130005, "num_tokens": 1, "num_tokens_all": 1005, "is_greedy": false, "logits_per_token": -1.3947354555130005, "logits_per_char": -0.6973677277565002, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 589, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2243455648422241, "incorrect_loss_raw": 1.5670738617579143, "correct_loss_per_char": 0.6121727824211121, "incorrect_loss_per_char": 0.7835369308789571, "correct_loss_per_token": 1.2243455648422241, "incorrect_loss_per_token": 1.5670738617579143, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.956419825553894, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": true, "logits_per_token": -0.956419825553894, "logits_per_char": -0.478209912776947, "num_chars": 2}, {"sum_logits": -1.2243455648422241, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": false, "logits_per_token": -1.2243455648422241, "logits_per_char": -0.6121727824211121, "num_chars": 2}, {"sum_logits": -2.010023355484009, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": false, "logits_per_token": -2.010023355484009, "logits_per_char": -1.0050116777420044, "num_chars": 2}, {"sum_logits": -1.7347784042358398, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": false, "logits_per_token": -1.7347784042358398, "logits_per_char": -0.8673892021179199, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 590, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9670796990394592, "incorrect_loss_raw": 1.6350456078847249, "correct_loss_per_char": 0.4835398495197296, "incorrect_loss_per_char": 0.8175228039423624, "correct_loss_per_token": 0.9670796990394592, "incorrect_loss_per_token": 1.6350456078847249, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9670796990394592, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": true, "logits_per_token": -0.9670796990394592, "logits_per_char": -0.4835398495197296, "num_chars": 2}, {"sum_logits": -1.2550429105758667, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": false, "logits_per_token": -1.2550429105758667, "logits_per_char": -0.6275214552879333, "num_chars": 2}, {"sum_logits": -1.9511675834655762, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": false, "logits_per_token": -1.9511675834655762, "logits_per_char": -0.9755837917327881, "num_chars": 2}, {"sum_logits": -1.698926329612732, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": false, "logits_per_token": -1.698926329612732, "logits_per_char": -0.849463164806366, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 591, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2118839025497437, "incorrect_loss_raw": 1.4646687507629395, "correct_loss_per_char": 0.6059419512748718, "incorrect_loss_per_char": 0.7323343753814697, "correct_loss_per_token": 1.2118839025497437, "incorrect_loss_per_token": 1.4646687507629395, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2118839025497437, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": true, "logits_per_token": -1.2118839025497437, "logits_per_char": -0.6059419512748718, "num_chars": 2}, {"sum_logits": -1.3569682836532593, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": false, "logits_per_token": -1.3569682836532593, "logits_per_char": -0.6784841418266296, "num_chars": 2}, {"sum_logits": -1.59229576587677, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": false, "logits_per_token": -1.59229576587677, "logits_per_char": -0.796147882938385, "num_chars": 2}, {"sum_logits": -1.444742202758789, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": false, "logits_per_token": -1.444742202758789, "logits_per_char": -0.7223711013793945, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 592, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9644393920898438, "incorrect_loss_raw": 1.2712844212849934, "correct_loss_per_char": 0.9822196960449219, "incorrect_loss_per_char": 0.6356422106424967, "correct_loss_per_token": 1.9644393920898438, "incorrect_loss_per_token": 1.2712844212849934, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1327872276306152, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": true, "logits_per_token": -1.1327872276306152, "logits_per_char": -0.5663936138153076, "num_chars": 2}, {"sum_logits": -1.219275951385498, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": false, "logits_per_token": -1.219275951385498, "logits_per_char": -0.609637975692749, "num_chars": 2}, {"sum_logits": -1.9644393920898438, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": false, "logits_per_token": -1.9644393920898438, "logits_per_char": -0.9822196960449219, "num_chars": 2}, {"sum_logits": -1.4617900848388672, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": false, "logits_per_token": -1.4617900848388672, "logits_per_char": -0.7308950424194336, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 593, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2091495990753174, "incorrect_loss_raw": 1.5365228255589802, "correct_loss_per_char": 0.6045747995376587, "incorrect_loss_per_char": 0.7682614127794901, "correct_loss_per_token": 1.2091495990753174, "incorrect_loss_per_token": 1.5365228255589802, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0325433015823364, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": true, "logits_per_token": -1.0325433015823364, "logits_per_char": -0.5162716507911682, "num_chars": 2}, {"sum_logits": -1.2091495990753174, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": false, "logits_per_token": -1.2091495990753174, "logits_per_char": -0.6045747995376587, "num_chars": 2}, {"sum_logits": -1.9009108543395996, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": false, "logits_per_token": -1.9009108543395996, "logits_per_char": -0.9504554271697998, "num_chars": 2}, {"sum_logits": -1.6761143207550049, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": false, "logits_per_token": -1.6761143207550049, "logits_per_char": -0.8380571603775024, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 594, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5393425226211548, "incorrect_loss_raw": 1.413257400194804, "correct_loss_per_char": 0.7696712613105774, "incorrect_loss_per_char": 0.706628700097402, "correct_loss_per_token": 1.5393425226211548, "incorrect_loss_per_token": 1.413257400194804, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0798330307006836, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": true, "logits_per_token": -1.0798330307006836, "logits_per_char": -0.5399165153503418, "num_chars": 2}, {"sum_logits": -1.2349390983581543, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": false, "logits_per_token": -1.2349390983581543, "logits_per_char": -0.6174695491790771, "num_chars": 2}, {"sum_logits": -1.9250000715255737, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": false, "logits_per_token": -1.9250000715255737, "logits_per_char": -0.9625000357627869, "num_chars": 2}, {"sum_logits": -1.5393425226211548, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": false, "logits_per_token": -1.5393425226211548, "logits_per_char": -0.7696712613105774, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 595, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1824195384979248, "incorrect_loss_raw": 1.5122356017430623, "correct_loss_per_char": 0.5912097692489624, "incorrect_loss_per_char": 0.7561178008715311, "correct_loss_per_token": 1.1824195384979248, "incorrect_loss_per_token": 1.5122356017430623, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1824195384979248, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.1824195384979248, "logits_per_char": -0.5912097692489624, "num_chars": 2}, {"sum_logits": -1.1593017578125, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": true, "logits_per_token": -1.1593017578125, "logits_per_char": -0.57965087890625, "num_chars": 2}, {"sum_logits": -1.7991560697555542, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.7991560697555542, "logits_per_char": -0.8995780348777771, "num_chars": 2}, {"sum_logits": -1.5782489776611328, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.5782489776611328, "logits_per_char": -0.7891244888305664, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 596, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.550938367843628, "incorrect_loss_raw": 1.4611964027086894, "correct_loss_per_char": 0.775469183921814, "incorrect_loss_per_char": 0.7305982013543447, "correct_loss_per_token": 1.550938367843628, "incorrect_loss_per_token": 1.4611964027086894, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9860405325889587, "num_tokens": 1, "num_tokens_all": 1125, "is_greedy": true, "logits_per_token": -0.9860405325889587, "logits_per_char": -0.49302026629447937, "num_chars": 2}, {"sum_logits": -1.2325966358184814, "num_tokens": 1, "num_tokens_all": 1125, "is_greedy": false, "logits_per_token": -1.2325966358184814, "logits_per_char": -0.6162983179092407, "num_chars": 2}, {"sum_logits": -2.164952039718628, "num_tokens": 1, "num_tokens_all": 1125, "is_greedy": false, "logits_per_token": -2.164952039718628, "logits_per_char": -1.082476019859314, "num_chars": 2}, {"sum_logits": -1.550938367843628, "num_tokens": 1, "num_tokens_all": 1125, "is_greedy": false, "logits_per_token": -1.550938367843628, "logits_per_char": -0.775469183921814, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 597, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.322052001953125, "incorrect_loss_raw": 1.4521605173746746, "correct_loss_per_char": 0.6610260009765625, "incorrect_loss_per_char": 0.7260802586873373, "correct_loss_per_token": 1.322052001953125, "incorrect_loss_per_token": 1.4521605173746746, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1572202444076538, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": true, "logits_per_token": -1.1572202444076538, "logits_per_char": -0.5786101222038269, "num_chars": 2}, {"sum_logits": -1.3896764516830444, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": false, "logits_per_token": -1.3896764516830444, "logits_per_char": -0.6948382258415222, "num_chars": 2}, {"sum_logits": -1.8095848560333252, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": false, "logits_per_token": -1.8095848560333252, "logits_per_char": -0.9047924280166626, "num_chars": 2}, {"sum_logits": -1.322052001953125, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": false, "logits_per_token": -1.322052001953125, "logits_per_char": -0.6610260009765625, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 598, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9982261657714844, "incorrect_loss_raw": 1.6060949961344402, "correct_loss_per_char": 0.4991130828857422, "incorrect_loss_per_char": 0.8030474980672201, "correct_loss_per_token": 0.9982261657714844, "incorrect_loss_per_token": 1.6060949961344402, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9982261657714844, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": true, "logits_per_token": -0.9982261657714844, "logits_per_char": -0.4991130828857422, "num_chars": 2}, {"sum_logits": -1.2816025018692017, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": false, "logits_per_token": -1.2816025018692017, "logits_per_char": -0.6408012509346008, "num_chars": 2}, {"sum_logits": -1.8250911235809326, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": false, "logits_per_token": -1.8250911235809326, "logits_per_char": -0.9125455617904663, "num_chars": 2}, {"sum_logits": -1.711591362953186, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": false, "logits_per_token": -1.711591362953186, "logits_per_char": -0.855795681476593, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 599, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1559972763061523, "incorrect_loss_raw": 1.558202823003133, "correct_loss_per_char": 0.5779986381530762, "incorrect_loss_per_char": 0.7791014115015665, "correct_loss_per_token": 1.1559972763061523, "incorrect_loss_per_token": 1.558202823003133, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0665161609649658, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": true, "logits_per_token": -1.0665161609649658, "logits_per_char": -0.5332580804824829, "num_chars": 2}, {"sum_logits": -1.1559972763061523, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.1559972763061523, "logits_per_char": -0.5779986381530762, "num_chars": 2}, {"sum_logits": -1.9171149730682373, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.9171149730682373, "logits_per_char": -0.9585574865341187, "num_chars": 2}, {"sum_logits": -1.6909773349761963, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.6909773349761963, "logits_per_char": -0.8454886674880981, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 600, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.430760383605957, "incorrect_loss_raw": 1.4142820835113525, "correct_loss_per_char": 0.7153801918029785, "incorrect_loss_per_char": 0.7071410417556763, "correct_loss_per_token": 1.430760383605957, "incorrect_loss_per_token": 1.4142820835113525, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0966486930847168, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -1.0966486930847168, "logits_per_char": -0.5483243465423584, "num_chars": 2}, {"sum_logits": -1.4268884658813477, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.4268884658813477, "logits_per_char": -0.7134442329406738, "num_chars": 2}, {"sum_logits": -1.7193090915679932, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.7193090915679932, "logits_per_char": -0.8596545457839966, "num_chars": 2}, {"sum_logits": -1.430760383605957, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.430760383605957, "logits_per_char": -0.7153801918029785, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 601, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.151734471321106, "incorrect_loss_raw": 1.530892252922058, "correct_loss_per_char": 0.575867235660553, "incorrect_loss_per_char": 0.765446126461029, "correct_loss_per_token": 1.151734471321106, "incorrect_loss_per_token": 1.530892252922058, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.151734471321106, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": true, "logits_per_token": -1.151734471321106, "logits_per_char": -0.575867235660553, "num_chars": 2}, {"sum_logits": -1.2120962142944336, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.2120962142944336, "logits_per_char": -0.6060481071472168, "num_chars": 2}, {"sum_logits": -1.924349308013916, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.924349308013916, "logits_per_char": -0.962174654006958, "num_chars": 2}, {"sum_logits": -1.4562312364578247, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.4562312364578247, "logits_per_char": -0.7281156182289124, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 602, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6661220788955688, "incorrect_loss_raw": 1.332783857981364, "correct_loss_per_char": 0.8330610394477844, "incorrect_loss_per_char": 0.666391928990682, "correct_loss_per_token": 1.6661220788955688, "incorrect_loss_per_token": 1.332783857981364, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1728501319885254, "num_tokens": 1, "num_tokens_all": 968, "is_greedy": true, "logits_per_token": -1.1728501319885254, "logits_per_char": -0.5864250659942627, "num_chars": 2}, {"sum_logits": -1.2525113821029663, "num_tokens": 1, "num_tokens_all": 968, "is_greedy": false, "logits_per_token": -1.2525113821029663, "logits_per_char": -0.6262556910514832, "num_chars": 2}, {"sum_logits": -1.6661220788955688, "num_tokens": 1, "num_tokens_all": 968, "is_greedy": false, "logits_per_token": -1.6661220788955688, "logits_per_char": -0.8330610394477844, "num_chars": 2}, {"sum_logits": -1.5729900598526, "num_tokens": 1, "num_tokens_all": 968, "is_greedy": false, "logits_per_token": -1.5729900598526, "logits_per_char": -0.7864950299263, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 603, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1605656147003174, "incorrect_loss_raw": 1.5292716821034749, "correct_loss_per_char": 0.5802828073501587, "incorrect_loss_per_char": 0.7646358410517374, "correct_loss_per_token": 1.1605656147003174, "incorrect_loss_per_token": 1.5292716821034749, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1714175939559937, "num_tokens": 1, "num_tokens_all": 1009, "is_greedy": false, "logits_per_token": -1.1714175939559937, "logits_per_char": -0.5857087969779968, "num_chars": 2}, {"sum_logits": -1.1605656147003174, "num_tokens": 1, "num_tokens_all": 1009, "is_greedy": true, "logits_per_token": -1.1605656147003174, "logits_per_char": -0.5802828073501587, "num_chars": 2}, {"sum_logits": -1.8797093629837036, "num_tokens": 1, "num_tokens_all": 1009, "is_greedy": false, "logits_per_token": -1.8797093629837036, "logits_per_char": -0.9398546814918518, "num_chars": 2}, {"sum_logits": -1.5366880893707275, "num_tokens": 1, "num_tokens_all": 1009, "is_greedy": false, "logits_per_token": -1.5366880893707275, "logits_per_char": -0.7683440446853638, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 604, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0295144319534302, "incorrect_loss_raw": 1.6013861497243245, "correct_loss_per_char": 0.5147572159767151, "incorrect_loss_per_char": 0.8006930748621622, "correct_loss_per_token": 1.0295144319534302, "incorrect_loss_per_token": 1.6013861497243245, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0295144319534302, "num_tokens": 1, "num_tokens_all": 1059, "is_greedy": true, "logits_per_token": -1.0295144319534302, "logits_per_char": -0.5147572159767151, "num_chars": 2}, {"sum_logits": -1.2177144289016724, "num_tokens": 1, "num_tokens_all": 1059, "is_greedy": false, "logits_per_token": -1.2177144289016724, "logits_per_char": -0.6088572144508362, "num_chars": 2}, {"sum_logits": -1.9923285245895386, "num_tokens": 1, "num_tokens_all": 1059, "is_greedy": false, "logits_per_token": -1.9923285245895386, "logits_per_char": -0.9961642622947693, "num_chars": 2}, {"sum_logits": -1.5941154956817627, "num_tokens": 1, "num_tokens_all": 1059, "is_greedy": false, "logits_per_token": -1.5941154956817627, "logits_per_char": -0.7970577478408813, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 605, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1119897365570068, "incorrect_loss_raw": 1.5220729112625122, "correct_loss_per_char": 0.5559948682785034, "incorrect_loss_per_char": 0.7610364556312561, "correct_loss_per_token": 1.1119897365570068, "incorrect_loss_per_token": 1.5220729112625122, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1119897365570068, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": true, "logits_per_token": -1.1119897365570068, "logits_per_char": -0.5559948682785034, "num_chars": 2}, {"sum_logits": -1.3234776258468628, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.3234776258468628, "logits_per_char": -0.6617388129234314, "num_chars": 2}, {"sum_logits": -1.683079481124878, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.683079481124878, "logits_per_char": -0.841539740562439, "num_chars": 2}, {"sum_logits": -1.559661626815796, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.559661626815796, "logits_per_char": -0.779830813407898, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 606, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5652176141738892, "incorrect_loss_raw": 1.3996011813481648, "correct_loss_per_char": 0.7826088070869446, "incorrect_loss_per_char": 0.6998005906740824, "correct_loss_per_token": 1.5652176141738892, "incorrect_loss_per_token": 1.3996011813481648, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1130595207214355, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": true, "logits_per_token": -1.1130595207214355, "logits_per_char": -0.5565297603607178, "num_chars": 2}, {"sum_logits": -1.1909399032592773, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": false, "logits_per_token": -1.1909399032592773, "logits_per_char": -0.5954699516296387, "num_chars": 2}, {"sum_logits": -1.8948041200637817, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": false, "logits_per_token": -1.8948041200637817, "logits_per_char": -0.9474020600318909, "num_chars": 2}, {"sum_logits": -1.5652176141738892, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": false, "logits_per_token": -1.5652176141738892, "logits_per_char": -0.7826088070869446, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 607, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4276043176651, "incorrect_loss_raw": 1.412227749824524, "correct_loss_per_char": 0.71380215883255, "incorrect_loss_per_char": 0.706113874912262, "correct_loss_per_token": 1.4276043176651, "incorrect_loss_per_token": 1.412227749824524, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0950748920440674, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": true, "logits_per_token": -1.0950748920440674, "logits_per_char": -0.5475374460220337, "num_chars": 2}, {"sum_logits": -1.4276043176651, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": false, "logits_per_token": -1.4276043176651, "logits_per_char": -0.71380215883255, "num_chars": 2}, {"sum_logits": -1.6393382549285889, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": false, "logits_per_token": -1.6393382549285889, "logits_per_char": -0.8196691274642944, "num_chars": 2}, {"sum_logits": -1.5022701025009155, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": false, "logits_per_token": -1.5022701025009155, "logits_per_char": -0.7511350512504578, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 608, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.296059012413025, "incorrect_loss_raw": 1.4381858507792156, "correct_loss_per_char": 0.6480295062065125, "incorrect_loss_per_char": 0.7190929253896078, "correct_loss_per_token": 1.296059012413025, "incorrect_loss_per_token": 1.4381858507792156, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.296059012413025, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": true, "logits_per_token": -1.296059012413025, "logits_per_char": -0.6480295062065125, "num_chars": 2}, {"sum_logits": -1.3130602836608887, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": false, "logits_per_token": -1.3130602836608887, "logits_per_char": -0.6565301418304443, "num_chars": 2}, {"sum_logits": -1.6426931619644165, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": false, "logits_per_token": -1.6426931619644165, "logits_per_char": -0.8213465809822083, "num_chars": 2}, {"sum_logits": -1.3588041067123413, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": false, "logits_per_token": -1.3588041067123413, "logits_per_char": -0.6794020533561707, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 609, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7852754592895508, "incorrect_loss_raw": 1.3219168186187744, "correct_loss_per_char": 0.8926377296447754, "incorrect_loss_per_char": 0.6609584093093872, "correct_loss_per_token": 1.7852754592895508, "incorrect_loss_per_token": 1.3219168186187744, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0818792581558228, "num_tokens": 1, "num_tokens_all": 1057, "is_greedy": true, "logits_per_token": -1.0818792581558228, "logits_per_char": -0.5409396290779114, "num_chars": 2}, {"sum_logits": -1.2452975511550903, "num_tokens": 1, "num_tokens_all": 1057, "is_greedy": false, "logits_per_token": -1.2452975511550903, "logits_per_char": -0.6226487755775452, "num_chars": 2}, {"sum_logits": -1.7852754592895508, "num_tokens": 1, "num_tokens_all": 1057, "is_greedy": false, "logits_per_token": -1.7852754592895508, "logits_per_char": -0.8926377296447754, "num_chars": 2}, {"sum_logits": -1.6385736465454102, "num_tokens": 1, "num_tokens_all": 1057, "is_greedy": false, "logits_per_token": -1.6385736465454102, "logits_per_char": -0.8192868232727051, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 610, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2121251821517944, "incorrect_loss_raw": 1.5393307209014893, "correct_loss_per_char": 0.6060625910758972, "incorrect_loss_per_char": 0.7696653604507446, "correct_loss_per_token": 1.2121251821517944, "incorrect_loss_per_token": 1.5393307209014893, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.020707607269287, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": true, "logits_per_token": -1.020707607269287, "logits_per_char": -0.5103538036346436, "num_chars": 2}, {"sum_logits": -1.2121251821517944, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.2121251821517944, "logits_per_char": -0.6060625910758972, "num_chars": 2}, {"sum_logits": -1.9163155555725098, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.9163155555725098, "logits_per_char": -0.9581577777862549, "num_chars": 2}, {"sum_logits": -1.680968999862671, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.680968999862671, "logits_per_char": -0.8404844999313354, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 611, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3835047483444214, "incorrect_loss_raw": 1.4108134110768635, "correct_loss_per_char": 0.6917523741722107, "incorrect_loss_per_char": 0.7054067055384318, "correct_loss_per_token": 1.3835047483444214, "incorrect_loss_per_token": 1.4108134110768635, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1891095638275146, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": true, "logits_per_token": -1.1891095638275146, "logits_per_char": -0.5945547819137573, "num_chars": 2}, {"sum_logits": -1.3835047483444214, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.3835047483444214, "logits_per_char": -0.6917523741722107, "num_chars": 2}, {"sum_logits": -1.5809435844421387, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.5809435844421387, "logits_per_char": -0.7904717922210693, "num_chars": 2}, {"sum_logits": -1.4623870849609375, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.4623870849609375, "logits_per_char": -0.7311935424804688, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 612, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1147106885910034, "incorrect_loss_raw": 1.5171943108240764, "correct_loss_per_char": 0.5573553442955017, "incorrect_loss_per_char": 0.7585971554120382, "correct_loss_per_token": 1.1147106885910034, "incorrect_loss_per_token": 1.5171943108240764, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1147106885910034, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": true, "logits_per_token": -1.1147106885910034, "logits_per_char": -0.5573553442955017, "num_chars": 2}, {"sum_logits": -1.4745080471038818, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.4745080471038818, "logits_per_char": -0.7372540235519409, "num_chars": 2}, {"sum_logits": -1.7219290733337402, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.7219290733337402, "logits_per_char": -0.8609645366668701, "num_chars": 2}, {"sum_logits": -1.355145812034607, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.355145812034607, "logits_per_char": -0.6775729060173035, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 613, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9415333271026611, "incorrect_loss_raw": 1.3256785869598389, "correct_loss_per_char": 0.9707666635513306, "incorrect_loss_per_char": 0.6628392934799194, "correct_loss_per_token": 1.9415333271026611, "incorrect_loss_per_token": 1.3256785869598389, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.927717924118042, "num_tokens": 1, "num_tokens_all": 1053, "is_greedy": true, "logits_per_token": -0.927717924118042, "logits_per_char": -0.463858962059021, "num_chars": 2}, {"sum_logits": -1.257056474685669, "num_tokens": 1, "num_tokens_all": 1053, "is_greedy": false, "logits_per_token": -1.257056474685669, "logits_per_char": -0.6285282373428345, "num_chars": 2}, {"sum_logits": -1.9415333271026611, "num_tokens": 1, "num_tokens_all": 1053, "is_greedy": false, "logits_per_token": -1.9415333271026611, "logits_per_char": -0.9707666635513306, "num_chars": 2}, {"sum_logits": -1.7922613620758057, "num_tokens": 1, "num_tokens_all": 1053, "is_greedy": false, "logits_per_token": -1.7922613620758057, "logits_per_char": -0.8961306810379028, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 614, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0599204301834106, "incorrect_loss_raw": 1.5892166296641033, "correct_loss_per_char": 0.5299602150917053, "incorrect_loss_per_char": 0.7946083148320516, "correct_loss_per_token": 1.0599204301834106, "incorrect_loss_per_token": 1.5892166296641033, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0599204301834106, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": true, "logits_per_token": -1.0599204301834106, "logits_per_char": -0.5299602150917053, "num_chars": 2}, {"sum_logits": -1.1880522966384888, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.1880522966384888, "logits_per_char": -0.5940261483192444, "num_chars": 2}, {"sum_logits": -1.9878376722335815, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.9878376722335815, "logits_per_char": -0.9939188361167908, "num_chars": 2}, {"sum_logits": -1.5917599201202393, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.5917599201202393, "logits_per_char": -0.7958799600601196, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 615, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7047712802886963, "incorrect_loss_raw": 1.3125193913777669, "correct_loss_per_char": 0.8523856401443481, "incorrect_loss_per_char": 0.6562596956888834, "correct_loss_per_token": 1.7047712802886963, "incorrect_loss_per_token": 1.3125193913777669, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2953664064407349, "num_tokens": 1, "num_tokens_all": 1003, "is_greedy": false, "logits_per_token": -1.2953664064407349, "logits_per_char": -0.6476832032203674, "num_chars": 2}, {"sum_logits": -1.206413984298706, "num_tokens": 1, "num_tokens_all": 1003, "is_greedy": true, "logits_per_token": -1.206413984298706, "logits_per_char": -0.603206992149353, "num_chars": 2}, {"sum_logits": -1.7047712802886963, "num_tokens": 1, "num_tokens_all": 1003, "is_greedy": false, "logits_per_token": -1.7047712802886963, "logits_per_char": -0.8523856401443481, "num_chars": 2}, {"sum_logits": -1.4357777833938599, "num_tokens": 1, "num_tokens_all": 1003, "is_greedy": false, "logits_per_token": -1.4357777833938599, "logits_per_char": -0.7178888916969299, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 616, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9651370048522949, "incorrect_loss_raw": 1.655886212984721, "correct_loss_per_char": 0.48256850242614746, "incorrect_loss_per_char": 0.8279431064923605, "correct_loss_per_token": 0.9651370048522949, "incorrect_loss_per_token": 1.655886212984721, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9651370048522949, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": true, "logits_per_token": -0.9651370048522949, "logits_per_char": -0.48256850242614746, "num_chars": 2}, {"sum_logits": -1.190514326095581, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": false, "logits_per_token": -1.190514326095581, "logits_per_char": -0.5952571630477905, "num_chars": 2}, {"sum_logits": -1.9814034700393677, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": false, "logits_per_token": -1.9814034700393677, "logits_per_char": -0.9907017350196838, "num_chars": 2}, {"sum_logits": -1.7957408428192139, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": false, "logits_per_token": -1.7957408428192139, "logits_per_char": -0.8978704214096069, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 617, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6928865909576416, "incorrect_loss_raw": 1.3944956064224243, "correct_loss_per_char": 0.8464432954788208, "incorrect_loss_per_char": 0.6972478032112122, "correct_loss_per_token": 1.6928865909576416, "incorrect_loss_per_token": 1.3944956064224243, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9766017198562622, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": true, "logits_per_token": -0.9766017198562622, "logits_per_char": -0.4883008599281311, "num_chars": 2}, {"sum_logits": -1.2310069799423218, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.2310069799423218, "logits_per_char": -0.6155034899711609, "num_chars": 2}, {"sum_logits": -1.975878119468689, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.975878119468689, "logits_per_char": -0.9879390597343445, "num_chars": 2}, {"sum_logits": -1.6928865909576416, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.6928865909576416, "logits_per_char": -0.8464432954788208, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 618, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0588154792785645, "incorrect_loss_raw": 1.5440520445505779, "correct_loss_per_char": 0.5294077396392822, "incorrect_loss_per_char": 0.7720260222752889, "correct_loss_per_token": 1.0588154792785645, "incorrect_loss_per_token": 1.5440520445505779, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0588154792785645, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": true, "logits_per_token": -1.0588154792785645, "logits_per_char": -0.5294077396392822, "num_chars": 2}, {"sum_logits": -1.4221348762512207, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": false, "logits_per_token": -1.4221348762512207, "logits_per_char": -0.7110674381256104, "num_chars": 2}, {"sum_logits": -1.7353363037109375, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": false, "logits_per_token": -1.7353363037109375, "logits_per_char": -0.8676681518554688, "num_chars": 2}, {"sum_logits": -1.4746849536895752, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": false, "logits_per_token": -1.4746849536895752, "logits_per_char": -0.7373424768447876, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 619, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2287318706512451, "incorrect_loss_raw": 1.5838417212168376, "correct_loss_per_char": 0.6143659353256226, "incorrect_loss_per_char": 0.7919208606084188, "correct_loss_per_token": 1.2287318706512451, "incorrect_loss_per_token": 1.5838417212168376, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9144079685211182, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": true, "logits_per_token": -0.9144079685211182, "logits_per_char": -0.4572039842605591, "num_chars": 2}, {"sum_logits": -1.2287318706512451, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -1.2287318706512451, "logits_per_char": -0.6143659353256226, "num_chars": 2}, {"sum_logits": -2.064636707305908, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -2.064636707305908, "logits_per_char": -1.032318353652954, "num_chars": 2}, {"sum_logits": -1.7724804878234863, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -1.7724804878234863, "logits_per_char": -0.8862402439117432, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 620, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.695211410522461, "incorrect_loss_raw": 1.33711842695872, "correct_loss_per_char": 0.8476057052612305, "incorrect_loss_per_char": 0.66855921347936, "correct_loss_per_token": 1.695211410522461, "incorrect_loss_per_token": 1.33711842695872, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.084957242012024, "num_tokens": 1, "num_tokens_all": 1044, "is_greedy": true, "logits_per_token": -1.084957242012024, "logits_per_char": -0.542478621006012, "num_chars": 2}, {"sum_logits": -1.3138072490692139, "num_tokens": 1, "num_tokens_all": 1044, "is_greedy": false, "logits_per_token": -1.3138072490692139, "logits_per_char": -0.6569036245346069, "num_chars": 2}, {"sum_logits": -1.695211410522461, "num_tokens": 1, "num_tokens_all": 1044, "is_greedy": false, "logits_per_token": -1.695211410522461, "logits_per_char": -0.8476057052612305, "num_chars": 2}, {"sum_logits": -1.6125907897949219, "num_tokens": 1, "num_tokens_all": 1044, "is_greedy": false, "logits_per_token": -1.6125907897949219, "logits_per_char": -0.8062953948974609, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 621, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0886480808258057, "incorrect_loss_raw": 1.527736783027649, "correct_loss_per_char": 0.5443240404129028, "incorrect_loss_per_char": 0.7638683915138245, "correct_loss_per_token": 1.0886480808258057, "incorrect_loss_per_token": 1.527736783027649, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0886480808258057, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": true, "logits_per_token": -1.0886480808258057, "logits_per_char": -0.5443240404129028, "num_chars": 2}, {"sum_logits": -1.4167234897613525, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.4167234897613525, "logits_per_char": -0.7083617448806763, "num_chars": 2}, {"sum_logits": -1.726154088973999, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.726154088973999, "logits_per_char": -0.8630770444869995, "num_chars": 2}, {"sum_logits": -1.4403327703475952, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.4403327703475952, "logits_per_char": -0.7201663851737976, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 622, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8769246339797974, "incorrect_loss_raw": 1.2957566579182942, "correct_loss_per_char": 0.9384623169898987, "incorrect_loss_per_char": 0.6478783289591471, "correct_loss_per_token": 1.8769246339797974, "incorrect_loss_per_token": 1.2957566579182942, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1012630462646484, "num_tokens": 1, "num_tokens_all": 1063, "is_greedy": true, "logits_per_token": -1.1012630462646484, "logits_per_char": -0.5506315231323242, "num_chars": 2}, {"sum_logits": -1.196377158164978, "num_tokens": 1, "num_tokens_all": 1063, "is_greedy": false, "logits_per_token": -1.196377158164978, "logits_per_char": -0.598188579082489, "num_chars": 2}, {"sum_logits": -1.8769246339797974, "num_tokens": 1, "num_tokens_all": 1063, "is_greedy": false, "logits_per_token": -1.8769246339797974, "logits_per_char": -0.9384623169898987, "num_chars": 2}, {"sum_logits": -1.5896297693252563, "num_tokens": 1, "num_tokens_all": 1063, "is_greedy": false, "logits_per_token": -1.5896297693252563, "logits_per_char": -0.7948148846626282, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 623, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8640539646148682, "incorrect_loss_raw": 1.2977396647135417, "correct_loss_per_char": 0.9320269823074341, "incorrect_loss_per_char": 0.6488698323567709, "correct_loss_per_token": 1.8640539646148682, "incorrect_loss_per_token": 1.2977396647135417, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1281898021697998, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": true, "logits_per_token": -1.1281898021697998, "logits_per_char": -0.5640949010848999, "num_chars": 2}, {"sum_logits": -1.1725564002990723, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.1725564002990723, "logits_per_char": -0.5862782001495361, "num_chars": 2}, {"sum_logits": -1.8640539646148682, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.8640539646148682, "logits_per_char": -0.9320269823074341, "num_chars": 2}, {"sum_logits": -1.592472791671753, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.592472791671753, "logits_per_char": -0.7962363958358765, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 624, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4653232097625732, "incorrect_loss_raw": 1.389423926671346, "correct_loss_per_char": 0.7326616048812866, "incorrect_loss_per_char": 0.694711963335673, "correct_loss_per_token": 1.4653232097625732, "incorrect_loss_per_token": 1.389423926671346, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.150338053703308, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": true, "logits_per_token": -1.150338053703308, "logits_per_char": -0.575169026851654, "num_chars": 2}, {"sum_logits": -1.3822658061981201, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.3822658061981201, "logits_per_char": -0.6911329030990601, "num_chars": 2}, {"sum_logits": -1.6356679201126099, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.6356679201126099, "logits_per_char": -0.8178339600563049, "num_chars": 2}, {"sum_logits": -1.4653232097625732, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.4653232097625732, "logits_per_char": -0.7326616048812866, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 625, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.037999153137207, "incorrect_loss_raw": 1.2995807727177937, "correct_loss_per_char": 1.0189995765686035, "incorrect_loss_per_char": 0.6497903863588969, "correct_loss_per_token": 2.037999153137207, "incorrect_loss_per_token": 1.2995807727177937, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9684501886367798, "num_tokens": 1, "num_tokens_all": 1127, "is_greedy": true, "logits_per_token": -0.9684501886367798, "logits_per_char": -0.4842250943183899, "num_chars": 2}, {"sum_logits": -1.1789276599884033, "num_tokens": 1, "num_tokens_all": 1127, "is_greedy": false, "logits_per_token": -1.1789276599884033, "logits_per_char": -0.5894638299942017, "num_chars": 2}, {"sum_logits": -2.037999153137207, "num_tokens": 1, "num_tokens_all": 1127, "is_greedy": false, "logits_per_token": -2.037999153137207, "logits_per_char": -1.0189995765686035, "num_chars": 2}, {"sum_logits": -1.7513644695281982, "num_tokens": 1, "num_tokens_all": 1127, "is_greedy": false, "logits_per_token": -1.7513644695281982, "logits_per_char": -0.8756822347640991, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 626, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3891799449920654, "incorrect_loss_raw": 1.4402966499328613, "correct_loss_per_char": 0.6945899724960327, "incorrect_loss_per_char": 0.7201483249664307, "correct_loss_per_token": 1.3891799449920654, "incorrect_loss_per_token": 1.4402966499328613, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0297486782073975, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": true, "logits_per_token": -1.0297486782073975, "logits_per_char": -0.5148743391036987, "num_chars": 2}, {"sum_logits": -1.3891799449920654, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": false, "logits_per_token": -1.3891799449920654, "logits_per_char": -0.6945899724960327, "num_chars": 2}, {"sum_logits": -1.6911511421203613, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": false, "logits_per_token": -1.6911511421203613, "logits_per_char": -0.8455755710601807, "num_chars": 2}, {"sum_logits": -1.5999901294708252, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": false, "logits_per_token": -1.5999901294708252, "logits_per_char": -0.7999950647354126, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 627, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0958986282348633, "incorrect_loss_raw": 1.561198075612386, "correct_loss_per_char": 0.5479493141174316, "incorrect_loss_per_char": 0.780599037806193, "correct_loss_per_token": 1.0958986282348633, "incorrect_loss_per_token": 1.561198075612386, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0958986282348633, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": true, "logits_per_token": -1.0958986282348633, "logits_per_char": -0.5479493141174316, "num_chars": 2}, {"sum_logits": -1.1963177919387817, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": false, "logits_per_token": -1.1963177919387817, "logits_per_char": -0.5981588959693909, "num_chars": 2}, {"sum_logits": -1.8720550537109375, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": false, "logits_per_token": -1.8720550537109375, "logits_per_char": -0.9360275268554688, "num_chars": 2}, {"sum_logits": -1.615221381187439, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": false, "logits_per_token": -1.615221381187439, "logits_per_char": -0.8076106905937195, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 628, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1083544492721558, "incorrect_loss_raw": 1.5517496665318806, "correct_loss_per_char": 0.5541772246360779, "incorrect_loss_per_char": 0.7758748332659403, "correct_loss_per_token": 1.1083544492721558, "incorrect_loss_per_token": 1.5517496665318806, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1083544492721558, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": true, "logits_per_token": -1.1083544492721558, "logits_per_char": -0.5541772246360779, "num_chars": 2}, {"sum_logits": -1.227786898612976, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -1.227786898612976, "logits_per_char": -0.613893449306488, "num_chars": 2}, {"sum_logits": -1.8895293474197388, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -1.8895293474197388, "logits_per_char": -0.9447646737098694, "num_chars": 2}, {"sum_logits": -1.5379327535629272, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -1.5379327535629272, "logits_per_char": -0.7689663767814636, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 629, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2247802019119263, "incorrect_loss_raw": 1.4841117858886719, "correct_loss_per_char": 0.6123901009559631, "incorrect_loss_per_char": 0.7420558929443359, "correct_loss_per_token": 1.2247802019119263, "incorrect_loss_per_token": 1.4841117858886719, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2247802019119263, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": true, "logits_per_token": -1.2247802019119263, "logits_per_char": -0.6123901009559631, "num_chars": 2}, {"sum_logits": -1.2582409381866455, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.2582409381866455, "logits_per_char": -0.6291204690933228, "num_chars": 2}, {"sum_logits": -1.8089519739151, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.8089519739151, "logits_per_char": -0.90447598695755, "num_chars": 2}, {"sum_logits": -1.38514244556427, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.38514244556427, "logits_per_char": -0.692571222782135, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 630, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0148468017578125, "incorrect_loss_raw": 1.5687776406606038, "correct_loss_per_char": 0.5074234008789062, "incorrect_loss_per_char": 0.7843888203303019, "correct_loss_per_token": 1.0148468017578125, "incorrect_loss_per_token": 1.5687776406606038, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0148468017578125, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": true, "logits_per_token": -1.0148468017578125, "logits_per_char": -0.5074234008789062, "num_chars": 2}, {"sum_logits": -1.4098048210144043, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": false, "logits_per_token": -1.4098048210144043, "logits_per_char": -0.7049024105072021, "num_chars": 2}, {"sum_logits": -1.7162617444992065, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": false, "logits_per_token": -1.7162617444992065, "logits_per_char": -0.8581308722496033, "num_chars": 2}, {"sum_logits": -1.5802663564682007, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": false, "logits_per_token": -1.5802663564682007, "logits_per_char": -0.7901331782341003, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 631, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9101359844207764, "incorrect_loss_raw": 1.3128763834635417, "correct_loss_per_char": 0.9550679922103882, "incorrect_loss_per_char": 0.6564381917317709, "correct_loss_per_token": 1.9101359844207764, "incorrect_loss_per_token": 1.3128763834635417, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9982423782348633, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": true, "logits_per_token": -0.9982423782348633, "logits_per_char": -0.49912118911743164, "num_chars": 2}, {"sum_logits": -1.21237313747406, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.21237313747406, "logits_per_char": -0.60618656873703, "num_chars": 2}, {"sum_logits": -1.9101359844207764, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.9101359844207764, "logits_per_char": -0.9550679922103882, "num_chars": 2}, {"sum_logits": -1.7280136346817017, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.7280136346817017, "logits_per_char": -0.8640068173408508, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 632, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9668280482292175, "incorrect_loss_raw": 1.6499604781468709, "correct_loss_per_char": 0.48341402411460876, "incorrect_loss_per_char": 0.8249802390734354, "correct_loss_per_token": 0.9668280482292175, "incorrect_loss_per_token": 1.6499604781468709, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9668280482292175, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": true, "logits_per_token": -0.9668280482292175, "logits_per_char": -0.48341402411460876, "num_chars": 2}, {"sum_logits": -1.2363426685333252, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.2363426685333252, "logits_per_char": -0.6181713342666626, "num_chars": 2}, {"sum_logits": -2.0690085887908936, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -2.0690085887908936, "logits_per_char": -1.0345042943954468, "num_chars": 2}, {"sum_logits": -1.644530177116394, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.644530177116394, "logits_per_char": -0.822265088558197, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 633, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0610696077346802, "incorrect_loss_raw": 1.5654933452606201, "correct_loss_per_char": 0.5305348038673401, "incorrect_loss_per_char": 0.7827466726303101, "correct_loss_per_token": 1.0610696077346802, "incorrect_loss_per_token": 1.5654933452606201, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0610696077346802, "num_tokens": 1, "num_tokens_all": 1063, "is_greedy": true, "logits_per_token": -1.0610696077346802, "logits_per_char": -0.5305348038673401, "num_chars": 2}, {"sum_logits": -1.2627562284469604, "num_tokens": 1, "num_tokens_all": 1063, "is_greedy": false, "logits_per_token": -1.2627562284469604, "logits_per_char": -0.6313781142234802, "num_chars": 2}, {"sum_logits": -1.8443738222122192, "num_tokens": 1, "num_tokens_all": 1063, "is_greedy": false, "logits_per_token": -1.8443738222122192, "logits_per_char": -0.9221869111061096, "num_chars": 2}, {"sum_logits": -1.5893499851226807, "num_tokens": 1, "num_tokens_all": 1063, "is_greedy": false, "logits_per_token": -1.5893499851226807, "logits_per_char": -0.7946749925613403, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 634, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8591035604476929, "incorrect_loss_raw": 1.2909900347391765, "correct_loss_per_char": 0.9295517802238464, "incorrect_loss_per_char": 0.6454950173695883, "correct_loss_per_token": 1.8591035604476929, "incorrect_loss_per_token": 1.2909900347391765, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1643904447555542, "num_tokens": 1, "num_tokens_all": 1040, "is_greedy": false, "logits_per_token": -1.1643904447555542, "logits_per_char": -0.5821952223777771, "num_chars": 2}, {"sum_logits": -1.164066195487976, "num_tokens": 1, "num_tokens_all": 1040, "is_greedy": true, "logits_per_token": -1.164066195487976, "logits_per_char": -0.582033097743988, "num_chars": 2}, {"sum_logits": -1.8591035604476929, "num_tokens": 1, "num_tokens_all": 1040, "is_greedy": false, "logits_per_token": -1.8591035604476929, "logits_per_char": -0.9295517802238464, "num_chars": 2}, {"sum_logits": -1.544513463973999, "num_tokens": 1, "num_tokens_all": 1040, "is_greedy": false, "logits_per_token": -1.544513463973999, "logits_per_char": -0.7722567319869995, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 635, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.520329475402832, "incorrect_loss_raw": 1.3881744941075642, "correct_loss_per_char": 0.760164737701416, "incorrect_loss_per_char": 0.6940872470537821, "correct_loss_per_token": 1.520329475402832, "incorrect_loss_per_token": 1.3881744941075642, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1504051685333252, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": true, "logits_per_token": -1.1504051685333252, "logits_per_char": -0.5752025842666626, "num_chars": 2}, {"sum_logits": -1.2857451438903809, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": false, "logits_per_token": -1.2857451438903809, "logits_per_char": -0.6428725719451904, "num_chars": 2}, {"sum_logits": -1.7283731698989868, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": false, "logits_per_token": -1.7283731698989868, "logits_per_char": -0.8641865849494934, "num_chars": 2}, {"sum_logits": -1.520329475402832, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": false, "logits_per_token": -1.520329475402832, "logits_per_char": -0.760164737701416, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 636, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8141683340072632, "incorrect_loss_raw": 1.3008766571680705, "correct_loss_per_char": 0.9070841670036316, "incorrect_loss_per_char": 0.6504383285840353, "correct_loss_per_token": 1.8141683340072632, "incorrect_loss_per_token": 1.3008766571680705, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1478906869888306, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": true, "logits_per_token": -1.1478906869888306, "logits_per_char": -0.5739453434944153, "num_chars": 2}, {"sum_logits": -1.2140496969223022, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.2140496969223022, "logits_per_char": -0.6070248484611511, "num_chars": 2}, {"sum_logits": -1.8141683340072632, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.8141683340072632, "logits_per_char": -0.9070841670036316, "num_chars": 2}, {"sum_logits": -1.5406895875930786, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.5406895875930786, "logits_per_char": -0.7703447937965393, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 637, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.171593189239502, "incorrect_loss_raw": 1.4934085210164387, "correct_loss_per_char": 0.585796594619751, "incorrect_loss_per_char": 0.7467042605082194, "correct_loss_per_token": 1.171593189239502, "incorrect_loss_per_token": 1.4934085210164387, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.171593189239502, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": true, "logits_per_token": -1.171593189239502, "logits_per_char": -0.585796594619751, "num_chars": 2}, {"sum_logits": -1.3639717102050781, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.3639717102050781, "logits_per_char": -0.6819858551025391, "num_chars": 2}, {"sum_logits": -1.7056328058242798, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.7056328058242798, "logits_per_char": -0.8528164029121399, "num_chars": 2}, {"sum_logits": -1.4106210470199585, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.4106210470199585, "logits_per_char": -0.7053105235099792, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 638, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6428864002227783, "incorrect_loss_raw": 1.3912458817164104, "correct_loss_per_char": 0.8214432001113892, "incorrect_loss_per_char": 0.6956229408582052, "correct_loss_per_token": 1.6428864002227783, "incorrect_loss_per_token": 1.3912458817164104, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1226849555969238, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": false, "logits_per_token": -1.1226849555969238, "logits_per_char": -0.5613424777984619, "num_chars": 2}, {"sum_logits": -1.1057900190353394, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": true, "logits_per_token": -1.1057900190353394, "logits_per_char": -0.5528950095176697, "num_chars": 2}, {"sum_logits": -1.9452626705169678, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": false, "logits_per_token": -1.9452626705169678, "logits_per_char": -0.9726313352584839, "num_chars": 2}, {"sum_logits": -1.6428864002227783, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": false, "logits_per_token": -1.6428864002227783, "logits_per_char": -0.8214432001113892, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 639, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2487207651138306, "incorrect_loss_raw": 1.4951271613438923, "correct_loss_per_char": 0.6243603825569153, "incorrect_loss_per_char": 0.7475635806719462, "correct_loss_per_token": 1.2487207651138306, "incorrect_loss_per_token": 1.4951271613438923, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.151483178138733, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": true, "logits_per_token": -1.151483178138733, "logits_per_char": -0.5757415890693665, "num_chars": 2}, {"sum_logits": -1.2487207651138306, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.2487207651138306, "logits_per_char": -0.6243603825569153, "num_chars": 2}, {"sum_logits": -1.9044004678726196, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.9044004678726196, "logits_per_char": -0.9522002339363098, "num_chars": 2}, {"sum_logits": -1.4294978380203247, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.4294978380203247, "logits_per_char": -0.7147489190101624, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 640, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6642875671386719, "incorrect_loss_raw": 1.4206605752309163, "correct_loss_per_char": 0.8321437835693359, "incorrect_loss_per_char": 0.7103302876154581, "correct_loss_per_token": 1.6642875671386719, "incorrect_loss_per_token": 1.4206605752309163, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0669541358947754, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": true, "logits_per_token": -1.0669541358947754, "logits_per_char": -0.5334770679473877, "num_chars": 2}, {"sum_logits": -1.0892860889434814, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.0892860889434814, "logits_per_char": -0.5446430444717407, "num_chars": 2}, {"sum_logits": -2.105741500854492, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -2.105741500854492, "logits_per_char": -1.052870750427246, "num_chars": 2}, {"sum_logits": -1.6642875671386719, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.6642875671386719, "logits_per_char": -0.8321437835693359, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 641, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4479011297225952, "incorrect_loss_raw": 1.4029417037963867, "correct_loss_per_char": 0.7239505648612976, "incorrect_loss_per_char": 0.7014708518981934, "correct_loss_per_token": 1.4479011297225952, "incorrect_loss_per_token": 1.4029417037963867, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1255927085876465, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": true, "logits_per_token": -1.1255927085876465, "logits_per_char": -0.5627963542938232, "num_chars": 2}, {"sum_logits": -1.4479011297225952, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.4479011297225952, "logits_per_char": -0.7239505648612976, "num_chars": 2}, {"sum_logits": -1.6985174417495728, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.6985174417495728, "logits_per_char": -0.8492587208747864, "num_chars": 2}, {"sum_logits": -1.384714961051941, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.384714961051941, "logits_per_char": -0.6923574805259705, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 642, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9225702285766602, "incorrect_loss_raw": 1.2770450115203857, "correct_loss_per_char": 0.9612851142883301, "incorrect_loss_per_char": 0.6385225057601929, "correct_loss_per_token": 1.9225702285766602, "incorrect_loss_per_token": 1.2770450115203857, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1468919515609741, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": true, "logits_per_token": -1.1468919515609741, "logits_per_char": -0.5734459757804871, "num_chars": 2}, {"sum_logits": -1.2093443870544434, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": false, "logits_per_token": -1.2093443870544434, "logits_per_char": -0.6046721935272217, "num_chars": 2}, {"sum_logits": -1.9225702285766602, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": false, "logits_per_token": -1.9225702285766602, "logits_per_char": -0.9612851142883301, "num_chars": 2}, {"sum_logits": -1.4748986959457397, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": false, "logits_per_token": -1.4748986959457397, "logits_per_char": -0.7374493479728699, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 643, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.86802077293396, "incorrect_loss_raw": 1.295102874437968, "correct_loss_per_char": 0.93401038646698, "incorrect_loss_per_char": 0.647551437218984, "correct_loss_per_token": 1.86802077293396, "incorrect_loss_per_token": 1.295102874437968, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2021403312683105, "num_tokens": 1, "num_tokens_all": 1114, "is_greedy": false, "logits_per_token": -1.2021403312683105, "logits_per_char": -0.6010701656341553, "num_chars": 2}, {"sum_logits": -1.1316758394241333, "num_tokens": 1, "num_tokens_all": 1114, "is_greedy": true, "logits_per_token": -1.1316758394241333, "logits_per_char": -0.5658379197120667, "num_chars": 2}, {"sum_logits": -1.86802077293396, "num_tokens": 1, "num_tokens_all": 1114, "is_greedy": false, "logits_per_token": -1.86802077293396, "logits_per_char": -0.93401038646698, "num_chars": 2}, {"sum_logits": -1.55149245262146, "num_tokens": 1, "num_tokens_all": 1114, "is_greedy": false, "logits_per_token": -1.55149245262146, "logits_per_char": -0.77574622631073, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 644, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7700462341308594, "incorrect_loss_raw": 1.388990084330241, "correct_loss_per_char": 0.8850231170654297, "incorrect_loss_per_char": 0.6944950421651205, "correct_loss_per_token": 1.7700462341308594, "incorrect_loss_per_token": 1.388990084330241, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.914474606513977, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": true, "logits_per_token": -0.914474606513977, "logits_per_char": -0.4572373032569885, "num_chars": 2}, {"sum_logits": -1.2586250305175781, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.2586250305175781, "logits_per_char": -0.6293125152587891, "num_chars": 2}, {"sum_logits": -1.9938706159591675, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.9938706159591675, "logits_per_char": -0.9969353079795837, "num_chars": 2}, {"sum_logits": -1.7700462341308594, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.7700462341308594, "logits_per_char": -0.8850231170654297, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 645, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7153366804122925, "incorrect_loss_raw": 1.3225359916687012, "correct_loss_per_char": 0.8576683402061462, "incorrect_loss_per_char": 0.6612679958343506, "correct_loss_per_token": 1.7153366804122925, "incorrect_loss_per_token": 1.3225359916687012, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2401363849639893, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": false, "logits_per_token": -1.2401363849639893, "logits_per_char": -0.6200681924819946, "num_chars": 2}, {"sum_logits": -1.16976797580719, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": true, "logits_per_token": -1.16976797580719, "logits_per_char": -0.584883987903595, "num_chars": 2}, {"sum_logits": -1.7153366804122925, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": false, "logits_per_token": -1.7153366804122925, "logits_per_char": -0.8576683402061462, "num_chars": 2}, {"sum_logits": -1.5577036142349243, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": false, "logits_per_token": -1.5577036142349243, "logits_per_char": -0.7788518071174622, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 646, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.098231554031372, "incorrect_loss_raw": 1.5260157187779744, "correct_loss_per_char": 0.549115777015686, "incorrect_loss_per_char": 0.7630078593889872, "correct_loss_per_token": 1.098231554031372, "incorrect_loss_per_token": 1.5260157187779744, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.098231554031372, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": true, "logits_per_token": -1.098231554031372, "logits_per_char": -0.549115777015686, "num_chars": 2}, {"sum_logits": -1.3748044967651367, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -1.3748044967651367, "logits_per_char": -0.6874022483825684, "num_chars": 2}, {"sum_logits": -1.6961748600006104, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -1.6961748600006104, "logits_per_char": -0.8480874300003052, "num_chars": 2}, {"sum_logits": -1.5070677995681763, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -1.5070677995681763, "logits_per_char": -0.7535338997840881, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 647, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1397674083709717, "incorrect_loss_raw": 1.522512714068095, "correct_loss_per_char": 0.5698837041854858, "incorrect_loss_per_char": 0.7612563570340475, "correct_loss_per_token": 1.1397674083709717, "incorrect_loss_per_token": 1.522512714068095, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2205562591552734, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.2205562591552734, "logits_per_char": -0.6102781295776367, "num_chars": 2}, {"sum_logits": -1.1397674083709717, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": true, "logits_per_token": -1.1397674083709717, "logits_per_char": -0.5698837041854858, "num_chars": 2}, {"sum_logits": -1.7844449281692505, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.7844449281692505, "logits_per_char": -0.8922224640846252, "num_chars": 2}, {"sum_logits": -1.5625369548797607, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.5625369548797607, "logits_per_char": -0.7812684774398804, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 648, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7401111125946045, "incorrect_loss_raw": 1.3190891742706299, "correct_loss_per_char": 0.8700555562973022, "incorrect_loss_per_char": 0.6595445871353149, "correct_loss_per_token": 1.7401111125946045, "incorrect_loss_per_token": 1.3190891742706299, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1158440113067627, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": true, "logits_per_token": -1.1158440113067627, "logits_per_char": -0.5579220056533813, "num_chars": 2}, {"sum_logits": -1.2830102443695068, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": false, "logits_per_token": -1.2830102443695068, "logits_per_char": -0.6415051221847534, "num_chars": 2}, {"sum_logits": -1.7401111125946045, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": false, "logits_per_token": -1.7401111125946045, "logits_per_char": -0.8700555562973022, "num_chars": 2}, {"sum_logits": -1.5584132671356201, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": false, "logits_per_token": -1.5584132671356201, "logits_per_char": -0.7792066335678101, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 649, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0798580646514893, "incorrect_loss_raw": 1.570394515991211, "correct_loss_per_char": 0.5399290323257446, "incorrect_loss_per_char": 0.7851972579956055, "correct_loss_per_token": 1.0798580646514893, "incorrect_loss_per_token": 1.570394515991211, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0798580646514893, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": true, "logits_per_token": -1.0798580646514893, "logits_per_char": -0.5399290323257446, "num_chars": 2}, {"sum_logits": -1.2062957286834717, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.2062957286834717, "logits_per_char": -0.6031478643417358, "num_chars": 2}, {"sum_logits": -1.7825281620025635, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.7825281620025635, "logits_per_char": -0.8912640810012817, "num_chars": 2}, {"sum_logits": -1.7223596572875977, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.7223596572875977, "logits_per_char": -0.8611798286437988, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 650, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6878478527069092, "incorrect_loss_raw": 1.317655881245931, "correct_loss_per_char": 0.8439239263534546, "incorrect_loss_per_char": 0.6588279406229655, "correct_loss_per_token": 1.6878478527069092, "incorrect_loss_per_token": 1.317655881245931, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1713929176330566, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -1.1713929176330566, "logits_per_char": -0.5856964588165283, "num_chars": 2}, {"sum_logits": -1.3295812606811523, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.3295812606811523, "logits_per_char": -0.6647906303405762, "num_chars": 2}, {"sum_logits": -1.6878478527069092, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.6878478527069092, "logits_per_char": -0.8439239263534546, "num_chars": 2}, {"sum_logits": -1.451993465423584, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.451993465423584, "logits_per_char": -0.725996732711792, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 651, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.1384143829345703, "incorrect_loss_raw": 1.2784865697224934, "correct_loss_per_char": 1.0692071914672852, "incorrect_loss_per_char": 0.6392432848612467, "correct_loss_per_token": 2.1384143829345703, "incorrect_loss_per_token": 1.2784865697224934, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.014230728149414, "num_tokens": 1, "num_tokens_all": 1133, "is_greedy": true, "logits_per_token": -1.014230728149414, "logits_per_char": -0.507115364074707, "num_chars": 2}, {"sum_logits": -1.1078120470046997, "num_tokens": 1, "num_tokens_all": 1133, "is_greedy": false, "logits_per_token": -1.1078120470046997, "logits_per_char": -0.5539060235023499, "num_chars": 2}, {"sum_logits": -2.1384143829345703, "num_tokens": 1, "num_tokens_all": 1133, "is_greedy": false, "logits_per_token": -2.1384143829345703, "logits_per_char": -1.0692071914672852, "num_chars": 2}, {"sum_logits": -1.7134169340133667, "num_tokens": 1, "num_tokens_all": 1133, "is_greedy": false, "logits_per_token": -1.7134169340133667, "logits_per_char": -0.8567084670066833, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 652, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.306631088256836, "incorrect_loss_raw": 1.4472222725550334, "correct_loss_per_char": 0.653315544128418, "incorrect_loss_per_char": 0.7236111362775167, "correct_loss_per_token": 1.306631088256836, "incorrect_loss_per_token": 1.4472222725550334, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1639833450317383, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": true, "logits_per_token": -1.1639833450317383, "logits_per_char": -0.5819916725158691, "num_chars": 2}, {"sum_logits": -1.306631088256836, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.306631088256836, "logits_per_char": -0.653315544128418, "num_chars": 2}, {"sum_logits": -1.6290125846862793, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.6290125846862793, "logits_per_char": -0.8145062923431396, "num_chars": 2}, {"sum_logits": -1.5486708879470825, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.5486708879470825, "logits_per_char": -0.7743354439735413, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 653, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9925849437713623, "incorrect_loss_raw": 1.318956196308136, "correct_loss_per_char": 0.9962924718856812, "incorrect_loss_per_char": 0.659478098154068, "correct_loss_per_token": 1.9925849437713623, "incorrect_loss_per_token": 1.318956196308136, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9912919402122498, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": true, "logits_per_token": -0.9912919402122498, "logits_per_char": -0.4956459701061249, "num_chars": 2}, {"sum_logits": -1.1241710186004639, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": false, "logits_per_token": -1.1241710186004639, "logits_per_char": -0.5620855093002319, "num_chars": 2}, {"sum_logits": -1.9925849437713623, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": false, "logits_per_token": -1.9925849437713623, "logits_per_char": -0.9962924718856812, "num_chars": 2}, {"sum_logits": -1.8414056301116943, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": false, "logits_per_token": -1.8414056301116943, "logits_per_char": -0.9207028150558472, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 654, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2559597492218018, "incorrect_loss_raw": 1.5498865644137065, "correct_loss_per_char": 0.6279798746109009, "incorrect_loss_per_char": 0.7749432822068533, "correct_loss_per_token": 1.2559597492218018, "incorrect_loss_per_token": 1.5498865644137065, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9509521126747131, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": true, "logits_per_token": -0.9509521126747131, "logits_per_char": -0.47547605633735657, "num_chars": 2}, {"sum_logits": -1.2559597492218018, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.2559597492218018, "logits_per_char": -0.6279798746109009, "num_chars": 2}, {"sum_logits": -1.9961791038513184, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.9961791038513184, "logits_per_char": -0.9980895519256592, "num_chars": 2}, {"sum_logits": -1.702528476715088, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.702528476715088, "logits_per_char": -0.851264238357544, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 655, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2410533428192139, "incorrect_loss_raw": 1.5667408307393391, "correct_loss_per_char": 0.6205266714096069, "incorrect_loss_per_char": 0.7833704153696696, "correct_loss_per_token": 1.2410533428192139, "incorrect_loss_per_token": 1.5667408307393391, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9341452121734619, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": true, "logits_per_token": -0.9341452121734619, "logits_per_char": -0.46707260608673096, "num_chars": 2}, {"sum_logits": -1.2410533428192139, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": false, "logits_per_token": -1.2410533428192139, "logits_per_char": -0.6205266714096069, "num_chars": 2}, {"sum_logits": -2.026553153991699, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": false, "logits_per_token": -2.026553153991699, "logits_per_char": -1.0132765769958496, "num_chars": 2}, {"sum_logits": -1.7395241260528564, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": false, "logits_per_token": -1.7395241260528564, "logits_per_char": -0.8697620630264282, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 656, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1246627569198608, "incorrect_loss_raw": 1.5135465065638225, "correct_loss_per_char": 0.5623313784599304, "incorrect_loss_per_char": 0.7567732532819113, "correct_loss_per_token": 1.1246627569198608, "incorrect_loss_per_token": 1.5135465065638225, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1246627569198608, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": true, "logits_per_token": -1.1246627569198608, "logits_per_char": -0.5623313784599304, "num_chars": 2}, {"sum_logits": -1.4151729345321655, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.4151729345321655, "logits_per_char": -0.7075864672660828, "num_chars": 2}, {"sum_logits": -1.7334188222885132, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.7334188222885132, "logits_per_char": -0.8667094111442566, "num_chars": 2}, {"sum_logits": -1.3920477628707886, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.3920477628707886, "logits_per_char": -0.6960238814353943, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 657, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.64536452293396, "incorrect_loss_raw": 1.344327171643575, "correct_loss_per_char": 0.82268226146698, "incorrect_loss_per_char": 0.6721635858217875, "correct_loss_per_token": 1.64536452293396, "incorrect_loss_per_token": 1.344327171643575, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0845398902893066, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": true, "logits_per_token": -1.0845398902893066, "logits_per_char": -0.5422699451446533, "num_chars": 2}, {"sum_logits": -1.3686983585357666, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.3686983585357666, "logits_per_char": -0.6843491792678833, "num_chars": 2}, {"sum_logits": -1.64536452293396, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.64536452293396, "logits_per_char": -0.82268226146698, "num_chars": 2}, {"sum_logits": -1.5797432661056519, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.5797432661056519, "logits_per_char": -0.7898716330528259, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 658, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1095002889633179, "incorrect_loss_raw": 1.5458942651748657, "correct_loss_per_char": 0.5547501444816589, "incorrect_loss_per_char": 0.7729471325874329, "correct_loss_per_token": 1.1095002889633179, "incorrect_loss_per_token": 1.5458942651748657, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1095002889633179, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": true, "logits_per_token": -1.1095002889633179, "logits_per_char": -0.5547501444816589, "num_chars": 2}, {"sum_logits": -1.2428219318389893, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.2428219318389893, "logits_per_char": -0.6214109659194946, "num_chars": 2}, {"sum_logits": -1.8493239879608154, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.8493239879608154, "logits_per_char": -0.9246619939804077, "num_chars": 2}, {"sum_logits": -1.5455368757247925, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.5455368757247925, "logits_per_char": -0.7727684378623962, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 659, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1646848917007446, "incorrect_loss_raw": 1.5531362295150757, "correct_loss_per_char": 0.5823424458503723, "incorrect_loss_per_char": 0.7765681147575378, "correct_loss_per_token": 1.1646848917007446, "incorrect_loss_per_token": 1.5531362295150757, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.065167784690857, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": true, "logits_per_token": -1.065167784690857, "logits_per_char": -0.5325838923454285, "num_chars": 2}, {"sum_logits": -1.1646848917007446, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.1646848917007446, "logits_per_char": -0.5823424458503723, "num_chars": 2}, {"sum_logits": -1.9405921697616577, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.9405921697616577, "logits_per_char": -0.9702960848808289, "num_chars": 2}, {"sum_logits": -1.6536487340927124, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.6536487340927124, "logits_per_char": -0.8268243670463562, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 660, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1084972620010376, "incorrect_loss_raw": 1.542798678080241, "correct_loss_per_char": 0.5542486310005188, "incorrect_loss_per_char": 0.7713993390401205, "correct_loss_per_token": 1.1084972620010376, "incorrect_loss_per_token": 1.542798678080241, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1084972620010376, "num_tokens": 1, "num_tokens_all": 1036, "is_greedy": true, "logits_per_token": -1.1084972620010376, "logits_per_char": -0.5542486310005188, "num_chars": 2}, {"sum_logits": -1.211229920387268, "num_tokens": 1, "num_tokens_all": 1036, "is_greedy": false, "logits_per_token": -1.211229920387268, "logits_per_char": -0.605614960193634, "num_chars": 2}, {"sum_logits": -1.7248615026474, "num_tokens": 1, "num_tokens_all": 1036, "is_greedy": false, "logits_per_token": -1.7248615026474, "logits_per_char": -0.8624307513237, "num_chars": 2}, {"sum_logits": -1.6923046112060547, "num_tokens": 1, "num_tokens_all": 1036, "is_greedy": false, "logits_per_token": -1.6923046112060547, "logits_per_char": -0.8461523056030273, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 661, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5680913925170898, "incorrect_loss_raw": 1.3760747114817302, "correct_loss_per_char": 0.7840456962585449, "incorrect_loss_per_char": 0.6880373557408651, "correct_loss_per_token": 1.5680913925170898, "incorrect_loss_per_token": 1.3760747114817302, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1318954229354858, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": true, "logits_per_token": -1.1318954229354858, "logits_per_char": -0.5659477114677429, "num_chars": 2}, {"sum_logits": -1.239937663078308, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": false, "logits_per_token": -1.239937663078308, "logits_per_char": -0.619968831539154, "num_chars": 2}, {"sum_logits": -1.7563910484313965, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": false, "logits_per_token": -1.7563910484313965, "logits_per_char": -0.8781955242156982, "num_chars": 2}, {"sum_logits": -1.5680913925170898, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": false, "logits_per_token": -1.5680913925170898, "logits_per_char": -0.7840456962585449, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 662, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.066638469696045, "incorrect_loss_raw": 1.2966128985087078, "correct_loss_per_char": 1.0333192348480225, "incorrect_loss_per_char": 0.6483064492543539, "correct_loss_per_token": 2.066638469696045, "incorrect_loss_per_token": 1.2966128985087078, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8830417394638062, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": true, "logits_per_token": -0.8830417394638062, "logits_per_char": -0.4415208697319031, "num_chars": 2}, {"sum_logits": -1.3188730478286743, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -1.3188730478286743, "logits_per_char": -0.6594365239143372, "num_chars": 2}, {"sum_logits": -2.066638469696045, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -2.066638469696045, "logits_per_char": -1.0333192348480225, "num_chars": 2}, {"sum_logits": -1.6879239082336426, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -1.6879239082336426, "logits_per_char": -0.8439619541168213, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 663, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8211913108825684, "incorrect_loss_raw": 1.2912149826685588, "correct_loss_per_char": 0.9105956554412842, "incorrect_loss_per_char": 0.6456074913342794, "correct_loss_per_token": 1.8211913108825684, "incorrect_loss_per_token": 1.2912149826685588, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2240769863128662, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": false, "logits_per_token": -1.2240769863128662, "logits_per_char": -0.6120384931564331, "num_chars": 2}, {"sum_logits": -1.1738929748535156, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": true, "logits_per_token": -1.1738929748535156, "logits_per_char": -0.5869464874267578, "num_chars": 2}, {"sum_logits": -1.8211913108825684, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": false, "logits_per_token": -1.8211913108825684, "logits_per_char": -0.9105956554412842, "num_chars": 2}, {"sum_logits": -1.4756749868392944, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": false, "logits_per_token": -1.4756749868392944, "logits_per_char": -0.7378374934196472, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 664, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9669504165649414, "incorrect_loss_raw": 1.3276734153429668, "correct_loss_per_char": 0.9834752082824707, "incorrect_loss_per_char": 0.6638367076714834, "correct_loss_per_token": 1.9669504165649414, "incorrect_loss_per_token": 1.3276734153429668, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8702110648155212, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": true, "logits_per_token": -0.8702110648155212, "logits_per_char": -0.4351055324077606, "num_chars": 2}, {"sum_logits": -1.3481770753860474, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": false, "logits_per_token": -1.3481770753860474, "logits_per_char": -0.6740885376930237, "num_chars": 2}, {"sum_logits": -1.9669504165649414, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": false, "logits_per_token": -1.9669504165649414, "logits_per_char": -0.9834752082824707, "num_chars": 2}, {"sum_logits": -1.7646321058273315, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": false, "logits_per_token": -1.7646321058273315, "logits_per_char": -0.8823160529136658, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 665, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7014435529708862, "incorrect_loss_raw": 1.3564846118291218, "correct_loss_per_char": 0.8507217764854431, "incorrect_loss_per_char": 0.6782423059145609, "correct_loss_per_token": 1.7014435529708862, "incorrect_loss_per_token": 1.3564846118291218, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0463969707489014, "num_tokens": 1, "num_tokens_all": 1066, "is_greedy": true, "logits_per_token": -1.0463969707489014, "logits_per_char": -0.5231984853744507, "num_chars": 2}, {"sum_logits": -1.2456817626953125, "num_tokens": 1, "num_tokens_all": 1066, "is_greedy": false, "logits_per_token": -1.2456817626953125, "logits_per_char": -0.6228408813476562, "num_chars": 2}, {"sum_logits": -1.7773751020431519, "num_tokens": 1, "num_tokens_all": 1066, "is_greedy": false, "logits_per_token": -1.7773751020431519, "logits_per_char": -0.8886875510215759, "num_chars": 2}, {"sum_logits": -1.7014435529708862, "num_tokens": 1, "num_tokens_all": 1066, "is_greedy": false, "logits_per_token": -1.7014435529708862, "logits_per_char": -0.8507217764854431, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 666, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.691270351409912, "incorrect_loss_raw": 1.3717239300409954, "correct_loss_per_char": 0.845635175704956, "incorrect_loss_per_char": 0.6858619650204977, "correct_loss_per_token": 1.691270351409912, "incorrect_loss_per_token": 1.3717239300409954, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.147806167602539, "num_tokens": 1, "num_tokens_all": 1156, "is_greedy": false, "logits_per_token": -1.147806167602539, "logits_per_char": -0.5739030838012695, "num_chars": 2}, {"sum_logits": -1.0938347578048706, "num_tokens": 1, "num_tokens_all": 1156, "is_greedy": true, "logits_per_token": -1.0938347578048706, "logits_per_char": -0.5469173789024353, "num_chars": 2}, {"sum_logits": -1.8735308647155762, "num_tokens": 1, "num_tokens_all": 1156, "is_greedy": false, "logits_per_token": -1.8735308647155762, "logits_per_char": -0.9367654323577881, "num_chars": 2}, {"sum_logits": -1.691270351409912, "num_tokens": 1, "num_tokens_all": 1156, "is_greedy": false, "logits_per_token": -1.691270351409912, "logits_per_char": -0.845635175704956, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 667, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5449270009994507, "incorrect_loss_raw": 1.3512070973714192, "correct_loss_per_char": 0.7724635004997253, "incorrect_loss_per_char": 0.6756035486857096, "correct_loss_per_token": 1.5449270009994507, "incorrect_loss_per_token": 1.3512070973714192, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2969460487365723, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": true, "logits_per_token": -1.2969460487365723, "logits_per_char": -0.6484730243682861, "num_chars": 2}, {"sum_logits": -1.3037241697311401, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": false, "logits_per_token": -1.3037241697311401, "logits_per_char": -0.6518620848655701, "num_chars": 2}, {"sum_logits": -1.5449270009994507, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": false, "logits_per_token": -1.5449270009994507, "logits_per_char": -0.7724635004997253, "num_chars": 2}, {"sum_logits": -1.4529510736465454, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": false, "logits_per_token": -1.4529510736465454, "logits_per_char": -0.7264755368232727, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 668, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9232170581817627, "incorrect_loss_raw": 1.3043589194615681, "correct_loss_per_char": 0.9616085290908813, "incorrect_loss_per_char": 0.6521794597307841, "correct_loss_per_token": 1.9232170581817627, "incorrect_loss_per_token": 1.3043589194615681, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.075812578201294, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": true, "logits_per_token": -1.075812578201294, "logits_per_char": -0.537906289100647, "num_chars": 2}, {"sum_logits": -1.1325640678405762, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.1325640678405762, "logits_per_char": -0.5662820339202881, "num_chars": 2}, {"sum_logits": -1.9232170581817627, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.9232170581817627, "logits_per_char": -0.9616085290908813, "num_chars": 2}, {"sum_logits": -1.7047001123428345, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.7047001123428345, "logits_per_char": -0.8523500561714172, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 669, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0936537981033325, "incorrect_loss_raw": 1.5348867972691853, "correct_loss_per_char": 0.5468268990516663, "incorrect_loss_per_char": 0.7674433986345927, "correct_loss_per_token": 1.0936537981033325, "incorrect_loss_per_token": 1.5348867972691853, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0936537981033325, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": true, "logits_per_token": -1.0936537981033325, "logits_per_char": -0.5468268990516663, "num_chars": 2}, {"sum_logits": -1.3706663846969604, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": false, "logits_per_token": -1.3706663846969604, "logits_per_char": -0.6853331923484802, "num_chars": 2}, {"sum_logits": -1.8052080869674683, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": false, "logits_per_token": -1.8052080869674683, "logits_per_char": -0.9026040434837341, "num_chars": 2}, {"sum_logits": -1.4287859201431274, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": false, "logits_per_token": -1.4287859201431274, "logits_per_char": -0.7143929600715637, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 670, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9697805643081665, "incorrect_loss_raw": 1.2899991472562153, "correct_loss_per_char": 0.9848902821540833, "incorrect_loss_per_char": 0.6449995736281077, "correct_loss_per_token": 1.9697805643081665, "incorrect_loss_per_token": 1.2899991472562153, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9982369542121887, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": true, "logits_per_token": -0.9982369542121887, "logits_per_char": -0.49911847710609436, "num_chars": 2}, {"sum_logits": -1.2549444437026978, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -1.2549444437026978, "logits_per_char": -0.6274722218513489, "num_chars": 2}, {"sum_logits": -1.9697805643081665, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -1.9697805643081665, "logits_per_char": -0.9848902821540833, "num_chars": 2}, {"sum_logits": -1.6168160438537598, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -1.6168160438537598, "logits_per_char": -0.8084080219268799, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 671, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6386123895645142, "incorrect_loss_raw": 1.3558801809946697, "correct_loss_per_char": 0.8193061947822571, "incorrect_loss_per_char": 0.6779400904973348, "correct_loss_per_token": 1.6386123895645142, "incorrect_loss_per_token": 1.3558801809946697, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1830909252166748, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": false, "logits_per_token": -1.1830909252166748, "logits_per_char": -0.5915454626083374, "num_chars": 2}, {"sum_logits": -1.1676092147827148, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": true, "logits_per_token": -1.1676092147827148, "logits_per_char": -0.5838046073913574, "num_chars": 2}, {"sum_logits": -1.6386123895645142, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": false, "logits_per_token": -1.6386123895645142, "logits_per_char": -0.8193061947822571, "num_chars": 2}, {"sum_logits": -1.7169404029846191, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": false, "logits_per_token": -1.7169404029846191, "logits_per_char": -0.8584702014923096, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 672, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0557100772857666, "incorrect_loss_raw": 1.5984632174173992, "correct_loss_per_char": 0.5278550386428833, "incorrect_loss_per_char": 0.7992316087086996, "correct_loss_per_token": 1.0557100772857666, "incorrect_loss_per_token": 1.5984632174173992, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0557100772857666, "num_tokens": 1, "num_tokens_all": 1078, "is_greedy": true, "logits_per_token": -1.0557100772857666, "logits_per_char": -0.5278550386428833, "num_chars": 2}, {"sum_logits": -1.1492146253585815, "num_tokens": 1, "num_tokens_all": 1078, "is_greedy": false, "logits_per_token": -1.1492146253585815, "logits_per_char": -0.5746073126792908, "num_chars": 2}, {"sum_logits": -1.972361445426941, "num_tokens": 1, "num_tokens_all": 1078, "is_greedy": false, "logits_per_token": -1.972361445426941, "logits_per_char": -0.9861807227134705, "num_chars": 2}, {"sum_logits": -1.6738135814666748, "num_tokens": 1, "num_tokens_all": 1078, "is_greedy": false, "logits_per_token": -1.6738135814666748, "logits_per_char": -0.8369067907333374, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 673, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2503430843353271, "incorrect_loss_raw": 1.5320100386937459, "correct_loss_per_char": 0.6251715421676636, "incorrect_loss_per_char": 0.7660050193468729, "correct_loss_per_token": 1.2503430843353271, "incorrect_loss_per_token": 1.5320100386937459, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0103861093521118, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": true, "logits_per_token": -1.0103861093521118, "logits_per_char": -0.5051930546760559, "num_chars": 2}, {"sum_logits": -1.2503430843353271, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": false, "logits_per_token": -1.2503430843353271, "logits_per_char": -0.6251715421676636, "num_chars": 2}, {"sum_logits": -2.0162577629089355, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": false, "logits_per_token": -2.0162577629089355, "logits_per_char": -1.0081288814544678, "num_chars": 2}, {"sum_logits": -1.5693862438201904, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": false, "logits_per_token": -1.5693862438201904, "logits_per_char": -0.7846931219100952, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 674, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4308483600616455, "incorrect_loss_raw": 1.4599391222000122, "correct_loss_per_char": 0.7154241800308228, "incorrect_loss_per_char": 0.7299695611000061, "correct_loss_per_token": 1.4308483600616455, "incorrect_loss_per_token": 1.4599391222000122, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0671541690826416, "num_tokens": 1, "num_tokens_all": 1053, "is_greedy": true, "logits_per_token": -1.0671541690826416, "logits_per_char": -0.5335770845413208, "num_chars": 2}, {"sum_logits": -1.2741204500198364, "num_tokens": 1, "num_tokens_all": 1053, "is_greedy": false, "logits_per_token": -1.2741204500198364, "logits_per_char": -0.6370602250099182, "num_chars": 2}, {"sum_logits": -2.0385427474975586, "num_tokens": 1, "num_tokens_all": 1053, "is_greedy": false, "logits_per_token": -2.0385427474975586, "logits_per_char": -1.0192713737487793, "num_chars": 2}, {"sum_logits": -1.4308483600616455, "num_tokens": 1, "num_tokens_all": 1053, "is_greedy": false, "logits_per_token": -1.4308483600616455, "logits_per_char": -0.7154241800308228, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 675, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5945172309875488, "incorrect_loss_raw": 1.4104022582372029, "correct_loss_per_char": 0.7972586154937744, "incorrect_loss_per_char": 0.7052011291186014, "correct_loss_per_token": 1.5945172309875488, "incorrect_loss_per_token": 1.4104022582372029, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.09292733669281, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": true, "logits_per_token": -1.09292733669281, "logits_per_char": -0.546463668346405, "num_chars": 2}, {"sum_logits": -1.1706092357635498, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": false, "logits_per_token": -1.1706092357635498, "logits_per_char": -0.5853046178817749, "num_chars": 2}, {"sum_logits": -1.967670202255249, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": false, "logits_per_token": -1.967670202255249, "logits_per_char": -0.9838351011276245, "num_chars": 2}, {"sum_logits": -1.5945172309875488, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": false, "logits_per_token": -1.5945172309875488, "logits_per_char": -0.7972586154937744, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 676, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7336488962173462, "incorrect_loss_raw": 1.312289039293925, "correct_loss_per_char": 0.8668244481086731, "incorrect_loss_per_char": 0.6561445196469625, "correct_loss_per_token": 1.7336488962173462, "incorrect_loss_per_token": 1.312289039293925, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1233806610107422, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": true, "logits_per_token": -1.1233806610107422, "logits_per_char": -0.5616903305053711, "num_chars": 2}, {"sum_logits": -1.4161185026168823, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.4161185026168823, "logits_per_char": -0.7080592513084412, "num_chars": 2}, {"sum_logits": -1.7336488962173462, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.7336488962173462, "logits_per_char": -0.8668244481086731, "num_chars": 2}, {"sum_logits": -1.3973679542541504, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.3973679542541504, "logits_per_char": -0.6986839771270752, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 677, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0829499959945679, "incorrect_loss_raw": 1.5375601053237915, "correct_loss_per_char": 0.5414749979972839, "incorrect_loss_per_char": 0.7687800526618958, "correct_loss_per_token": 1.0829499959945679, "incorrect_loss_per_token": 1.5375601053237915, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0829499959945679, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": true, "logits_per_token": -1.0829499959945679, "logits_per_char": -0.5414749979972839, "num_chars": 2}, {"sum_logits": -1.3460406064987183, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": false, "logits_per_token": -1.3460406064987183, "logits_per_char": -0.6730203032493591, "num_chars": 2}, {"sum_logits": -1.7540361881256104, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": false, "logits_per_token": -1.7540361881256104, "logits_per_char": -0.8770180940628052, "num_chars": 2}, {"sum_logits": -1.512603521347046, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": false, "logits_per_token": -1.512603521347046, "logits_per_char": -0.756301760673523, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 678, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9685518741607666, "incorrect_loss_raw": 1.2928378979365032, "correct_loss_per_char": 0.9842759370803833, "incorrect_loss_per_char": 0.6464189489682516, "correct_loss_per_token": 1.9685518741607666, "incorrect_loss_per_token": 1.2928378979365032, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0356138944625854, "num_tokens": 1, "num_tokens_all": 1112, "is_greedy": true, "logits_per_token": -1.0356138944625854, "logits_per_char": -0.5178069472312927, "num_chars": 2}, {"sum_logits": -1.1912221908569336, "num_tokens": 1, "num_tokens_all": 1112, "is_greedy": false, "logits_per_token": -1.1912221908569336, "logits_per_char": -0.5956110954284668, "num_chars": 2}, {"sum_logits": -1.9685518741607666, "num_tokens": 1, "num_tokens_all": 1112, "is_greedy": false, "logits_per_token": -1.9685518741607666, "logits_per_char": -0.9842759370803833, "num_chars": 2}, {"sum_logits": -1.6516776084899902, "num_tokens": 1, "num_tokens_all": 1112, "is_greedy": false, "logits_per_token": -1.6516776084899902, "logits_per_char": -0.8258388042449951, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 679, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6258655786514282, "incorrect_loss_raw": 1.3479256232579548, "correct_loss_per_char": 0.8129327893257141, "incorrect_loss_per_char": 0.6739628116289774, "correct_loss_per_token": 1.6258655786514282, "incorrect_loss_per_token": 1.3479256232579548, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0837724208831787, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": true, "logits_per_token": -1.0837724208831787, "logits_per_char": -0.5418862104415894, "num_chars": 2}, {"sum_logits": -1.3965532779693604, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.3965532779693604, "logits_per_char": -0.6982766389846802, "num_chars": 2}, {"sum_logits": -1.6258655786514282, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.6258655786514282, "logits_per_char": -0.8129327893257141, "num_chars": 2}, {"sum_logits": -1.5634511709213257, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.5634511709213257, "logits_per_char": -0.7817255854606628, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 680, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.207453727722168, "incorrect_loss_raw": 1.5209678411483765, "correct_loss_per_char": 0.603726863861084, "incorrect_loss_per_char": 0.7604839205741882, "correct_loss_per_token": 1.207453727722168, "incorrect_loss_per_token": 1.5209678411483765, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.078895092010498, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": true, "logits_per_token": -1.078895092010498, "logits_per_char": -0.539447546005249, "num_chars": 2}, {"sum_logits": -1.207453727722168, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": false, "logits_per_token": -1.207453727722168, "logits_per_char": -0.603726863861084, "num_chars": 2}, {"sum_logits": -1.8667628765106201, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": false, "logits_per_token": -1.8667628765106201, "logits_per_char": -0.9333814382553101, "num_chars": 2}, {"sum_logits": -1.6172455549240112, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": false, "logits_per_token": -1.6172455549240112, "logits_per_char": -0.8086227774620056, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 681, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7216671705245972, "incorrect_loss_raw": 1.3242961168289185, "correct_loss_per_char": 0.8608335852622986, "incorrect_loss_per_char": 0.6621480584144592, "correct_loss_per_token": 1.7216671705245972, "incorrect_loss_per_token": 1.3242961168289185, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1095294952392578, "num_tokens": 1, "num_tokens_all": 1057, "is_greedy": true, "logits_per_token": -1.1095294952392578, "logits_per_char": -0.5547647476196289, "num_chars": 2}, {"sum_logits": -1.2829368114471436, "num_tokens": 1, "num_tokens_all": 1057, "is_greedy": false, "logits_per_token": -1.2829368114471436, "logits_per_char": -0.6414684057235718, "num_chars": 2}, {"sum_logits": -1.7216671705245972, "num_tokens": 1, "num_tokens_all": 1057, "is_greedy": false, "logits_per_token": -1.7216671705245972, "logits_per_char": -0.8608335852622986, "num_chars": 2}, {"sum_logits": -1.580422043800354, "num_tokens": 1, "num_tokens_all": 1057, "is_greedy": false, "logits_per_token": -1.580422043800354, "logits_per_char": -0.790211021900177, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 682, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6190389394760132, "incorrect_loss_raw": 1.3370225429534912, "correct_loss_per_char": 0.8095194697380066, "incorrect_loss_per_char": 0.6685112714767456, "correct_loss_per_token": 1.6190389394760132, "incorrect_loss_per_token": 1.3370225429534912, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2370425462722778, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": true, "logits_per_token": -1.2370425462722778, "logits_per_char": -0.6185212731361389, "num_chars": 2}, {"sum_logits": -1.2858004570007324, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -1.2858004570007324, "logits_per_char": -0.6429002285003662, "num_chars": 2}, {"sum_logits": -1.6190389394760132, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -1.6190389394760132, "logits_per_char": -0.8095194697380066, "num_chars": 2}, {"sum_logits": -1.4882246255874634, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -1.4882246255874634, "logits_per_char": -0.7441123127937317, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 683, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9314073920249939, "incorrect_loss_raw": 1.6486610571543376, "correct_loss_per_char": 0.46570369601249695, "incorrect_loss_per_char": 0.8243305285771688, "correct_loss_per_token": 0.9314073920249939, "incorrect_loss_per_token": 1.6486610571543376, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9314073920249939, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": true, "logits_per_token": -0.9314073920249939, "logits_per_char": -0.46570369601249695, "num_chars": 2}, {"sum_logits": -1.348171591758728, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.348171591758728, "logits_per_char": -0.674085795879364, "num_chars": 2}, {"sum_logits": -1.8785607814788818, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.8785607814788818, "logits_per_char": -0.9392803907394409, "num_chars": 2}, {"sum_logits": -1.7192507982254028, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.7192507982254028, "logits_per_char": -0.8596253991127014, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 684, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2325884103775024, "incorrect_loss_raw": 1.4832793076833088, "correct_loss_per_char": 0.6162942051887512, "incorrect_loss_per_char": 0.7416396538416544, "correct_loss_per_token": 1.2325884103775024, "incorrect_loss_per_token": 1.4832793076833088, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1896191835403442, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": true, "logits_per_token": -1.1896191835403442, "logits_per_char": -0.5948095917701721, "num_chars": 2}, {"sum_logits": -1.2325884103775024, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.2325884103775024, "logits_per_char": -0.6162942051887512, "num_chars": 2}, {"sum_logits": -1.7533903121948242, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.7533903121948242, "logits_per_char": -0.8766951560974121, "num_chars": 2}, {"sum_logits": -1.5068284273147583, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.5068284273147583, "logits_per_char": -0.7534142136573792, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 685, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0408273935317993, "incorrect_loss_raw": 1.594303051630656, "correct_loss_per_char": 0.5204136967658997, "incorrect_loss_per_char": 0.797151525815328, "correct_loss_per_token": 1.0408273935317993, "incorrect_loss_per_token": 1.594303051630656, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0408273935317993, "num_tokens": 1, "num_tokens_all": 1041, "is_greedy": true, "logits_per_token": -1.0408273935317993, "logits_per_char": -0.5204136967658997, "num_chars": 2}, {"sum_logits": -1.1793198585510254, "num_tokens": 1, "num_tokens_all": 1041, "is_greedy": false, "logits_per_token": -1.1793198585510254, "logits_per_char": -0.5896599292755127, "num_chars": 2}, {"sum_logits": -1.8552470207214355, "num_tokens": 1, "num_tokens_all": 1041, "is_greedy": false, "logits_per_token": -1.8552470207214355, "logits_per_char": -0.9276235103607178, "num_chars": 2}, {"sum_logits": -1.7483422756195068, "num_tokens": 1, "num_tokens_all": 1041, "is_greedy": false, "logits_per_token": -1.7483422756195068, "logits_per_char": -0.8741711378097534, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 686, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4240623712539673, "incorrect_loss_raw": 1.408314863840739, "correct_loss_per_char": 0.7120311856269836, "incorrect_loss_per_char": 0.7041574319203695, "correct_loss_per_token": 1.4240623712539673, "incorrect_loss_per_token": 1.408314863840739, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1405718326568604, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": true, "logits_per_token": -1.1405718326568604, "logits_per_char": -0.5702859163284302, "num_chars": 2}, {"sum_logits": -1.4240623712539673, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -1.4240623712539673, "logits_per_char": -0.7120311856269836, "num_chars": 2}, {"sum_logits": -1.6879371404647827, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -1.6879371404647827, "logits_per_char": -0.8439685702323914, "num_chars": 2}, {"sum_logits": -1.3964356184005737, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -1.3964356184005737, "logits_per_char": -0.6982178092002869, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 687, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.277807354927063, "incorrect_loss_raw": 1.4463916619618733, "correct_loss_per_char": 0.6389036774635315, "incorrect_loss_per_char": 0.7231958309809366, "correct_loss_per_token": 1.277807354927063, "incorrect_loss_per_token": 1.4463916619618733, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.277807354927063, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": false, "logits_per_token": -1.277807354927063, "logits_per_char": -0.6389036774635315, "num_chars": 2}, {"sum_logits": -1.269134283065796, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": true, "logits_per_token": -1.269134283065796, "logits_per_char": -0.634567141532898, "num_chars": 2}, {"sum_logits": -1.639687180519104, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": false, "logits_per_token": -1.639687180519104, "logits_per_char": -0.819843590259552, "num_chars": 2}, {"sum_logits": -1.4303535223007202, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": false, "logits_per_token": -1.4303535223007202, "logits_per_char": -0.7151767611503601, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 688, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5986049175262451, "incorrect_loss_raw": 1.4111957152684529, "correct_loss_per_char": 0.7993024587631226, "incorrect_loss_per_char": 0.7055978576342264, "correct_loss_per_token": 1.5986049175262451, "incorrect_loss_per_token": 1.4111957152684529, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.096595287322998, "num_tokens": 1, "num_tokens_all": 1111, "is_greedy": true, "logits_per_token": -1.096595287322998, "logits_per_char": -0.548297643661499, "num_chars": 2}, {"sum_logits": -1.1511791944503784, "num_tokens": 1, "num_tokens_all": 1111, "is_greedy": false, "logits_per_token": -1.1511791944503784, "logits_per_char": -0.5755895972251892, "num_chars": 2}, {"sum_logits": -1.9858126640319824, "num_tokens": 1, "num_tokens_all": 1111, "is_greedy": false, "logits_per_token": -1.9858126640319824, "logits_per_char": -0.9929063320159912, "num_chars": 2}, {"sum_logits": -1.5986049175262451, "num_tokens": 1, "num_tokens_all": 1111, "is_greedy": false, "logits_per_token": -1.5986049175262451, "logits_per_char": -0.7993024587631226, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 689, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2345123291015625, "incorrect_loss_raw": 1.5710036158561707, "correct_loss_per_char": 0.6172561645507812, "incorrect_loss_per_char": 0.7855018079280853, "correct_loss_per_token": 1.2345123291015625, "incorrect_loss_per_token": 1.5710036158561707, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9430559277534485, "num_tokens": 1, "num_tokens_all": 1139, "is_greedy": true, "logits_per_token": -0.9430559277534485, "logits_per_char": -0.47152796387672424, "num_chars": 2}, {"sum_logits": -1.2345123291015625, "num_tokens": 1, "num_tokens_all": 1139, "is_greedy": false, "logits_per_token": -1.2345123291015625, "logits_per_char": -0.6172561645507812, "num_chars": 2}, {"sum_logits": -2.085923433303833, "num_tokens": 1, "num_tokens_all": 1139, "is_greedy": false, "logits_per_token": -2.085923433303833, "logits_per_char": -1.0429617166519165, "num_chars": 2}, {"sum_logits": -1.6840314865112305, "num_tokens": 1, "num_tokens_all": 1139, "is_greedy": false, "logits_per_token": -1.6840314865112305, "logits_per_char": -0.8420157432556152, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 690, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2203806638717651, "incorrect_loss_raw": 1.5746740500132244, "correct_loss_per_char": 0.6101903319358826, "incorrect_loss_per_char": 0.7873370250066122, "correct_loss_per_token": 1.2203806638717651, "incorrect_loss_per_token": 1.5746740500132244, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9291239976882935, "num_tokens": 1, "num_tokens_all": 1066, "is_greedy": true, "logits_per_token": -0.9291239976882935, "logits_per_char": -0.46456199884414673, "num_chars": 2}, {"sum_logits": -1.2203806638717651, "num_tokens": 1, "num_tokens_all": 1066, "is_greedy": false, "logits_per_token": -1.2203806638717651, "logits_per_char": -0.6101903319358826, "num_chars": 2}, {"sum_logits": -2.0206801891326904, "num_tokens": 1, "num_tokens_all": 1066, "is_greedy": false, "logits_per_token": -2.0206801891326904, "logits_per_char": -1.0103400945663452, "num_chars": 2}, {"sum_logits": -1.774217963218689, "num_tokens": 1, "num_tokens_all": 1066, "is_greedy": false, "logits_per_token": -1.774217963218689, "logits_per_char": -0.8871089816093445, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 691, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4281922578811646, "incorrect_loss_raw": 1.4195595184961955, "correct_loss_per_char": 0.7140961289405823, "incorrect_loss_per_char": 0.7097797592480978, "correct_loss_per_token": 1.4281922578811646, "incorrect_loss_per_token": 1.4195595184961955, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1052677631378174, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": true, "logits_per_token": -1.1052677631378174, "logits_per_char": -0.5526338815689087, "num_chars": 2}, {"sum_logits": -1.4281922578811646, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.4281922578811646, "logits_per_char": -0.7140961289405823, "num_chars": 2}, {"sum_logits": -1.7834818363189697, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.7834818363189697, "logits_per_char": -0.8917409181594849, "num_chars": 2}, {"sum_logits": -1.3699289560317993, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.3699289560317993, "logits_per_char": -0.6849644780158997, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 692, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5570197105407715, "incorrect_loss_raw": 1.4035220940907795, "correct_loss_per_char": 0.7785098552703857, "incorrect_loss_per_char": 0.7017610470453898, "correct_loss_per_token": 1.5570197105407715, "incorrect_loss_per_token": 1.4035220940907795, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.072930097579956, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": true, "logits_per_token": -1.072930097579956, "logits_per_char": -0.536465048789978, "num_chars": 2}, {"sum_logits": -1.2431586980819702, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": false, "logits_per_token": -1.2431586980819702, "logits_per_char": -0.6215793490409851, "num_chars": 2}, {"sum_logits": -1.8944774866104126, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": false, "logits_per_token": -1.8944774866104126, "logits_per_char": -0.9472387433052063, "num_chars": 2}, {"sum_logits": -1.5570197105407715, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": false, "logits_per_token": -1.5570197105407715, "logits_per_char": -0.7785098552703857, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 693, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.627622127532959, "incorrect_loss_raw": 1.336234410603841, "correct_loss_per_char": 0.8138110637664795, "incorrect_loss_per_char": 0.6681172053019205, "correct_loss_per_token": 1.627622127532959, "incorrect_loss_per_token": 1.336234410603841, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2373592853546143, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": true, "logits_per_token": -1.2373592853546143, "logits_per_char": -0.6186796426773071, "num_chars": 2}, {"sum_logits": -1.2457062005996704, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": false, "logits_per_token": -1.2457062005996704, "logits_per_char": -0.6228531002998352, "num_chars": 2}, {"sum_logits": -1.627622127532959, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": false, "logits_per_token": -1.627622127532959, "logits_per_char": -0.8138110637664795, "num_chars": 2}, {"sum_logits": -1.5256377458572388, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": false, "logits_per_token": -1.5256377458572388, "logits_per_char": -0.7628188729286194, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 694, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.660873293876648, "incorrect_loss_raw": 1.3775584697723389, "correct_loss_per_char": 0.830436646938324, "incorrect_loss_per_char": 0.6887792348861694, "correct_loss_per_token": 1.660873293876648, "incorrect_loss_per_token": 1.3775584697723389, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0800824165344238, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": true, "logits_per_token": -1.0800824165344238, "logits_per_char": -0.5400412082672119, "num_chars": 2}, {"sum_logits": -1.1760509014129639, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": false, "logits_per_token": -1.1760509014129639, "logits_per_char": -0.5880254507064819, "num_chars": 2}, {"sum_logits": -1.876542091369629, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": false, "logits_per_token": -1.876542091369629, "logits_per_char": -0.9382710456848145, "num_chars": 2}, {"sum_logits": -1.660873293876648, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": false, "logits_per_token": -1.660873293876648, "logits_per_char": -0.830436646938324, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 695, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7221641540527344, "incorrect_loss_raw": 1.3420753081639607, "correct_loss_per_char": 0.8610820770263672, "incorrect_loss_per_char": 0.6710376540819804, "correct_loss_per_token": 1.7221641540527344, "incorrect_loss_per_token": 1.3420753081639607, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0534532070159912, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": true, "logits_per_token": -1.0534532070159912, "logits_per_char": -0.5267266035079956, "num_chars": 2}, {"sum_logits": -1.2536096572875977, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": false, "logits_per_token": -1.2536096572875977, "logits_per_char": -0.6268048286437988, "num_chars": 2}, {"sum_logits": -1.7191630601882935, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": false, "logits_per_token": -1.7191630601882935, "logits_per_char": -0.8595815300941467, "num_chars": 2}, {"sum_logits": -1.7221641540527344, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": false, "logits_per_token": -1.7221641540527344, "logits_per_char": -0.8610820770263672, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 696, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.516849160194397, "incorrect_loss_raw": 1.3915934960047405, "correct_loss_per_char": 0.7584245800971985, "incorrect_loss_per_char": 0.6957967480023702, "correct_loss_per_token": 1.516849160194397, "incorrect_loss_per_token": 1.3915934960047405, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2301737070083618, "num_tokens": 1, "num_tokens_all": 1020, "is_greedy": false, "logits_per_token": -1.2301737070083618, "logits_per_char": -0.6150868535041809, "num_chars": 2}, {"sum_logits": -1.172312617301941, "num_tokens": 1, "num_tokens_all": 1020, "is_greedy": true, "logits_per_token": -1.172312617301941, "logits_per_char": -0.5861563086509705, "num_chars": 2}, {"sum_logits": -1.7722941637039185, "num_tokens": 1, "num_tokens_all": 1020, "is_greedy": false, "logits_per_token": -1.7722941637039185, "logits_per_char": -0.8861470818519592, "num_chars": 2}, {"sum_logits": -1.516849160194397, "num_tokens": 1, "num_tokens_all": 1020, "is_greedy": false, "logits_per_token": -1.516849160194397, "logits_per_char": -0.7584245800971985, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 697, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2312507629394531, "incorrect_loss_raw": 1.6136549512545268, "correct_loss_per_char": 0.6156253814697266, "incorrect_loss_per_char": 0.8068274756272634, "correct_loss_per_token": 1.2312507629394531, "incorrect_loss_per_token": 1.6136549512545268, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8729947209358215, "num_tokens": 1, "num_tokens_all": 1125, "is_greedy": true, "logits_per_token": -0.8729947209358215, "logits_per_char": -0.43649736046791077, "num_chars": 2}, {"sum_logits": -1.2312507629394531, "num_tokens": 1, "num_tokens_all": 1125, "is_greedy": false, "logits_per_token": -1.2312507629394531, "logits_per_char": -0.6156253814697266, "num_chars": 2}, {"sum_logits": -2.1637096405029297, "num_tokens": 1, "num_tokens_all": 1125, "is_greedy": false, "logits_per_token": -2.1637096405029297, "logits_per_char": -1.0818548202514648, "num_chars": 2}, {"sum_logits": -1.804260492324829, "num_tokens": 1, "num_tokens_all": 1125, "is_greedy": false, "logits_per_token": -1.804260492324829, "logits_per_char": -0.9021302461624146, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 698, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.031325578689575, "incorrect_loss_raw": 1.279683033625285, "correct_loss_per_char": 1.0156627893447876, "incorrect_loss_per_char": 0.6398415168126425, "correct_loss_per_token": 2.031325578689575, "incorrect_loss_per_token": 1.279683033625285, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0261828899383545, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": true, "logits_per_token": -1.0261828899383545, "logits_per_char": -0.5130914449691772, "num_chars": 2}, {"sum_logits": -1.2023751735687256, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": false, "logits_per_token": -1.2023751735687256, "logits_per_char": -0.6011875867843628, "num_chars": 2}, {"sum_logits": -2.031325578689575, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": false, "logits_per_token": -2.031325578689575, "logits_per_char": -1.0156627893447876, "num_chars": 2}, {"sum_logits": -1.6104910373687744, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": false, "logits_per_token": -1.6104910373687744, "logits_per_char": -0.8052455186843872, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 699, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.269995093345642, "incorrect_loss_raw": 1.444178303082784, "correct_loss_per_char": 0.634997546672821, "incorrect_loss_per_char": 0.722089151541392, "correct_loss_per_token": 1.269995093345642, "incorrect_loss_per_token": 1.444178303082784, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.269995093345642, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": true, "logits_per_token": -1.269995093345642, "logits_per_char": -0.634997546672821, "num_chars": 2}, {"sum_logits": -1.3226817846298218, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": false, "logits_per_token": -1.3226817846298218, "logits_per_char": -0.6613408923149109, "num_chars": 2}, {"sum_logits": -1.6084771156311035, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": false, "logits_per_token": -1.6084771156311035, "logits_per_char": -0.8042385578155518, "num_chars": 2}, {"sum_logits": -1.4013760089874268, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": false, "logits_per_token": -1.4013760089874268, "logits_per_char": -0.7006880044937134, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 700, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1902179718017578, "incorrect_loss_raw": 1.5385937293370564, "correct_loss_per_char": 0.5951089859008789, "incorrect_loss_per_char": 0.7692968646685282, "correct_loss_per_token": 1.1902179718017578, "incorrect_loss_per_token": 1.5385937293370564, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.064663290977478, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": true, "logits_per_token": -1.064663290977478, "logits_per_char": -0.532331645488739, "num_chars": 2}, {"sum_logits": -1.1902179718017578, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.1902179718017578, "logits_per_char": -0.5951089859008789, "num_chars": 2}, {"sum_logits": -1.925267219543457, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.925267219543457, "logits_per_char": -0.9626336097717285, "num_chars": 2}, {"sum_logits": -1.6258506774902344, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.6258506774902344, "logits_per_char": -0.8129253387451172, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 701, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.37328040599823, "incorrect_loss_raw": 1.414556860923767, "correct_loss_per_char": 0.686640202999115, "incorrect_loss_per_char": 0.7072784304618835, "correct_loss_per_token": 1.37328040599823, "incorrect_loss_per_token": 1.414556860923767, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3559445142745972, "num_tokens": 1, "num_tokens_all": 1035, "is_greedy": false, "logits_per_token": -1.3559445142745972, "logits_per_char": -0.6779722571372986, "num_chars": 2}, {"sum_logits": -1.271575927734375, "num_tokens": 1, "num_tokens_all": 1035, "is_greedy": true, "logits_per_token": -1.271575927734375, "logits_per_char": -0.6357879638671875, "num_chars": 2}, {"sum_logits": -1.616150140762329, "num_tokens": 1, "num_tokens_all": 1035, "is_greedy": false, "logits_per_token": -1.616150140762329, "logits_per_char": -0.8080750703811646, "num_chars": 2}, {"sum_logits": -1.37328040599823, "num_tokens": 1, "num_tokens_all": 1035, "is_greedy": false, "logits_per_token": -1.37328040599823, "logits_per_char": -0.686640202999115, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 702, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.868091106414795, "incorrect_loss_raw": 1.2859208981196086, "correct_loss_per_char": 0.9340455532073975, "incorrect_loss_per_char": 0.6429604490598043, "correct_loss_per_token": 1.868091106414795, "incorrect_loss_per_token": 1.2859208981196086, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1416651010513306, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": true, "logits_per_token": -1.1416651010513306, "logits_per_char": -0.5708325505256653, "num_chars": 2}, {"sum_logits": -1.2264738082885742, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.2264738082885742, "logits_per_char": -0.6132369041442871, "num_chars": 2}, {"sum_logits": -1.868091106414795, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.868091106414795, "logits_per_char": -0.9340455532073975, "num_chars": 2}, {"sum_logits": -1.489623785018921, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.489623785018921, "logits_per_char": -0.7448118925094604, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 703, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8949174880981445, "incorrect_loss_raw": 1.2929815451304119, "correct_loss_per_char": 0.9474587440490723, "incorrect_loss_per_char": 0.6464907725652059, "correct_loss_per_token": 1.8949174880981445, "incorrect_loss_per_token": 1.2929815451304119, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0862910747528076, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": true, "logits_per_token": -1.0862910747528076, "logits_per_char": -0.5431455373764038, "num_chars": 2}, {"sum_logits": -1.2056596279144287, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -1.2056596279144287, "logits_per_char": -0.6028298139572144, "num_chars": 2}, {"sum_logits": -1.8949174880981445, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -1.8949174880981445, "logits_per_char": -0.9474587440490723, "num_chars": 2}, {"sum_logits": -1.586993932723999, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -1.586993932723999, "logits_per_char": -0.7934969663619995, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 704, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0616357326507568, "incorrect_loss_raw": 1.578810453414917, "correct_loss_per_char": 0.5308178663253784, "incorrect_loss_per_char": 0.7894052267074585, "correct_loss_per_token": 1.0616357326507568, "incorrect_loss_per_token": 1.578810453414917, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0616357326507568, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": true, "logits_per_token": -1.0616357326507568, "logits_per_char": -0.5308178663253784, "num_chars": 2}, {"sum_logits": -1.1930369138717651, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.1930369138717651, "logits_per_char": -0.5965184569358826, "num_chars": 2}, {"sum_logits": -1.9059439897537231, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.9059439897537231, "logits_per_char": -0.9529719948768616, "num_chars": 2}, {"sum_logits": -1.6374504566192627, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.6374504566192627, "logits_per_char": -0.8187252283096313, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 705, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8389763832092285, "incorrect_loss_raw": 1.300181269645691, "correct_loss_per_char": 0.9194881916046143, "incorrect_loss_per_char": 0.6500906348228455, "correct_loss_per_token": 1.8389763832092285, "incorrect_loss_per_token": 1.300181269645691, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.127483606338501, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": true, "logits_per_token": -1.127483606338501, "logits_per_char": -0.5637418031692505, "num_chars": 2}, {"sum_logits": -1.2034803628921509, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": false, "logits_per_token": -1.2034803628921509, "logits_per_char": -0.6017401814460754, "num_chars": 2}, {"sum_logits": -1.8389763832092285, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": false, "logits_per_token": -1.8389763832092285, "logits_per_char": -0.9194881916046143, "num_chars": 2}, {"sum_logits": -1.569579839706421, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": false, "logits_per_token": -1.569579839706421, "logits_per_char": -0.7847899198532104, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 706, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.74995756149292, "incorrect_loss_raw": 1.324268142382304, "correct_loss_per_char": 0.87497878074646, "incorrect_loss_per_char": 0.662134071191152, "correct_loss_per_token": 1.74995756149292, "incorrect_loss_per_token": 1.324268142382304, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0796735286712646, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": true, "logits_per_token": -1.0796735286712646, "logits_per_char": -0.5398367643356323, "num_chars": 2}, {"sum_logits": -1.3016200065612793, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.3016200065612793, "logits_per_char": -0.6508100032806396, "num_chars": 2}, {"sum_logits": -1.74995756149292, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.74995756149292, "logits_per_char": -0.87497878074646, "num_chars": 2}, {"sum_logits": -1.5915108919143677, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.5915108919143677, "logits_per_char": -0.7957554459571838, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 707, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0520715713500977, "incorrect_loss_raw": 1.5436593691507976, "correct_loss_per_char": 0.5260357856750488, "incorrect_loss_per_char": 0.7718296845753988, "correct_loss_per_token": 1.0520715713500977, "incorrect_loss_per_token": 1.5436593691507976, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0520715713500977, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -1.0520715713500977, "logits_per_char": -0.5260357856750488, "num_chars": 2}, {"sum_logits": -1.4676042795181274, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.4676042795181274, "logits_per_char": -0.7338021397590637, "num_chars": 2}, {"sum_logits": -1.6856437921524048, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.6856437921524048, "logits_per_char": -0.8428218960762024, "num_chars": 2}, {"sum_logits": -1.4777300357818604, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.4777300357818604, "logits_per_char": -0.7388650178909302, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 708, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2443625926971436, "incorrect_loss_raw": 1.5409106016159058, "correct_loss_per_char": 0.6221812963485718, "incorrect_loss_per_char": 0.7704553008079529, "correct_loss_per_token": 1.2443625926971436, "incorrect_loss_per_token": 1.5409106016159058, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9745756387710571, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": true, "logits_per_token": -0.9745756387710571, "logits_per_char": -0.48728781938552856, "num_chars": 2}, {"sum_logits": -1.2443625926971436, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.2443625926971436, "logits_per_char": -0.6221812963485718, "num_chars": 2}, {"sum_logits": -1.9305461645126343, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.9305461645126343, "logits_per_char": -0.9652730822563171, "num_chars": 2}, {"sum_logits": -1.7176100015640259, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.7176100015640259, "logits_per_char": -0.8588050007820129, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 709, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3064243793487549, "incorrect_loss_raw": 1.547404905160268, "correct_loss_per_char": 0.6532121896743774, "incorrect_loss_per_char": 0.773702452580134, "correct_loss_per_token": 1.3064243793487549, "incorrect_loss_per_token": 1.547404905160268, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9125751852989197, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": true, "logits_per_token": -0.9125751852989197, "logits_per_char": -0.45628759264945984, "num_chars": 2}, {"sum_logits": -1.3064243793487549, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.3064243793487549, "logits_per_char": -0.6532121896743774, "num_chars": 2}, {"sum_logits": -2.1112723350524902, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -2.1112723350524902, "logits_per_char": -1.0556361675262451, "num_chars": 2}, {"sum_logits": -1.6183671951293945, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.6183671951293945, "logits_per_char": -0.8091835975646973, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 710, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0629806518554688, "incorrect_loss_raw": 1.3045862913131714, "correct_loss_per_char": 1.0314903259277344, "incorrect_loss_per_char": 0.6522931456565857, "correct_loss_per_token": 2.0629806518554688, "incorrect_loss_per_token": 1.3045862913131714, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9615634679794312, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": true, "logits_per_token": -0.9615634679794312, "logits_per_char": -0.4807817339897156, "num_chars": 2}, {"sum_logits": -1.148653268814087, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.148653268814087, "logits_per_char": -0.5743266344070435, "num_chars": 2}, {"sum_logits": -2.0629806518554688, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -2.0629806518554688, "logits_per_char": -1.0314903259277344, "num_chars": 2}, {"sum_logits": -1.803542137145996, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.803542137145996, "logits_per_char": -0.901771068572998, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 711, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1327897310256958, "incorrect_loss_raw": 1.5226654211680095, "correct_loss_per_char": 0.5663948655128479, "incorrect_loss_per_char": 0.7613327105840048, "correct_loss_per_token": 1.1327897310256958, "incorrect_loss_per_token": 1.5226654211680095, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1327897310256958, "num_tokens": 1, "num_tokens_all": 1063, "is_greedy": true, "logits_per_token": -1.1327897310256958, "logits_per_char": -0.5663948655128479, "num_chars": 2}, {"sum_logits": -1.246981143951416, "num_tokens": 1, "num_tokens_all": 1063, "is_greedy": false, "logits_per_token": -1.246981143951416, "logits_per_char": -0.623490571975708, "num_chars": 2}, {"sum_logits": -1.7761855125427246, "num_tokens": 1, "num_tokens_all": 1063, "is_greedy": false, "logits_per_token": -1.7761855125427246, "logits_per_char": -0.8880927562713623, "num_chars": 2}, {"sum_logits": -1.5448296070098877, "num_tokens": 1, "num_tokens_all": 1063, "is_greedy": false, "logits_per_token": -1.5448296070098877, "logits_per_char": -0.7724148035049438, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 712, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.871253252029419, "incorrect_loss_raw": 1.314399003982544, "correct_loss_per_char": 0.9356266260147095, "incorrect_loss_per_char": 0.657199501991272, "correct_loss_per_token": 1.871253252029419, "incorrect_loss_per_token": 1.314399003982544, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0015323162078857, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": true, "logits_per_token": -1.0015323162078857, "logits_per_char": -0.5007661581039429, "num_chars": 2}, {"sum_logits": -1.2622759342193604, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.2622759342193604, "logits_per_char": -0.6311379671096802, "num_chars": 2}, {"sum_logits": -1.871253252029419, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.871253252029419, "logits_per_char": -0.9356266260147095, "num_chars": 2}, {"sum_logits": -1.6793887615203857, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.6793887615203857, "logits_per_char": -0.8396943807601929, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 713, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5018147230148315, "incorrect_loss_raw": 1.391751726468404, "correct_loss_per_char": 0.7509073615074158, "incorrect_loss_per_char": 0.695875863234202, "correct_loss_per_token": 1.5018147230148315, "incorrect_loss_per_token": 1.391751726468404, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.079681396484375, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": true, "logits_per_token": -1.079681396484375, "logits_per_char": -0.5398406982421875, "num_chars": 2}, {"sum_logits": -1.5018147230148315, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.5018147230148315, "logits_per_char": -0.7509073615074158, "num_chars": 2}, {"sum_logits": -1.7030881643295288, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.7030881643295288, "logits_per_char": -0.8515440821647644, "num_chars": 2}, {"sum_logits": -1.3924856185913086, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.3924856185913086, "logits_per_char": -0.6962428092956543, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 714, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0944807529449463, "incorrect_loss_raw": 1.5663890441258748, "correct_loss_per_char": 0.5472403764724731, "incorrect_loss_per_char": 0.7831945220629374, "correct_loss_per_token": 1.0944807529449463, "incorrect_loss_per_token": 1.5663890441258748, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0944807529449463, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": true, "logits_per_token": -1.0944807529449463, "logits_per_char": -0.5472403764724731, "num_chars": 2}, {"sum_logits": -1.1803580522537231, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -1.1803580522537231, "logits_per_char": -0.5901790261268616, "num_chars": 2}, {"sum_logits": -1.9581674337387085, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -1.9581674337387085, "logits_per_char": -0.9790837168693542, "num_chars": 2}, {"sum_logits": -1.5606416463851929, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -1.5606416463851929, "logits_per_char": -0.7803208231925964, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 715, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1252402067184448, "incorrect_loss_raw": 1.5257742007573445, "correct_loss_per_char": 0.5626201033592224, "incorrect_loss_per_char": 0.7628871003786722, "correct_loss_per_token": 1.1252402067184448, "incorrect_loss_per_token": 1.5257742007573445, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1252402067184448, "num_tokens": 1, "num_tokens_all": 1058, "is_greedy": true, "logits_per_token": -1.1252402067184448, "logits_per_char": -0.5626201033592224, "num_chars": 2}, {"sum_logits": -1.2639284133911133, "num_tokens": 1, "num_tokens_all": 1058, "is_greedy": false, "logits_per_token": -1.2639284133911133, "logits_per_char": -0.6319642066955566, "num_chars": 2}, {"sum_logits": -1.797935962677002, "num_tokens": 1, "num_tokens_all": 1058, "is_greedy": false, "logits_per_token": -1.797935962677002, "logits_per_char": -0.898967981338501, "num_chars": 2}, {"sum_logits": -1.5154582262039185, "num_tokens": 1, "num_tokens_all": 1058, "is_greedy": false, "logits_per_token": -1.5154582262039185, "logits_per_char": -0.7577291131019592, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 716, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8236846923828125, "incorrect_loss_raw": 1.3106101751327515, "correct_loss_per_char": 0.9118423461914062, "incorrect_loss_per_char": 0.6553050875663757, "correct_loss_per_token": 1.8236846923828125, "incorrect_loss_per_token": 1.3106101751327515, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0568641424179077, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": true, "logits_per_token": -1.0568641424179077, "logits_per_char": -0.5284320712089539, "num_chars": 2}, {"sum_logits": -1.2768182754516602, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.2768182754516602, "logits_per_char": -0.6384091377258301, "num_chars": 2}, {"sum_logits": -1.8236846923828125, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.8236846923828125, "logits_per_char": -0.9118423461914062, "num_chars": 2}, {"sum_logits": -1.5981481075286865, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.5981481075286865, "logits_per_char": -0.7990740537643433, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 717, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2305681705474854, "incorrect_loss_raw": 1.4678887526194255, "correct_loss_per_char": 0.6152840852737427, "incorrect_loss_per_char": 0.7339443763097128, "correct_loss_per_token": 1.2305681705474854, "incorrect_loss_per_token": 1.4678887526194255, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.280191421508789, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": false, "logits_per_token": -1.280191421508789, "logits_per_char": -0.6400957107543945, "num_chars": 2}, {"sum_logits": -1.2305681705474854, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": true, "logits_per_token": -1.2305681705474854, "logits_per_char": -0.6152840852737427, "num_chars": 2}, {"sum_logits": -1.5581716299057007, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": false, "logits_per_token": -1.5581716299057007, "logits_per_char": -0.7790858149528503, "num_chars": 2}, {"sum_logits": -1.5653032064437866, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": false, "logits_per_token": -1.5653032064437866, "logits_per_char": -0.7826516032218933, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 718, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7201155424118042, "incorrect_loss_raw": 1.320652683575948, "correct_loss_per_char": 0.8600577712059021, "incorrect_loss_per_char": 0.660326341787974, "correct_loss_per_token": 1.7201155424118042, "incorrect_loss_per_token": 1.320652683575948, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.139980673789978, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": true, "logits_per_token": -1.139980673789978, "logits_per_char": -0.569990336894989, "num_chars": 2}, {"sum_logits": -1.2526915073394775, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.2526915073394775, "logits_per_char": -0.6263457536697388, "num_chars": 2}, {"sum_logits": -1.7201155424118042, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.7201155424118042, "logits_per_char": -0.8600577712059021, "num_chars": 2}, {"sum_logits": -1.5692858695983887, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.5692858695983887, "logits_per_char": -0.7846429347991943, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 719, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1582539081573486, "incorrect_loss_raw": 1.5147997538248699, "correct_loss_per_char": 0.5791269540786743, "incorrect_loss_per_char": 0.7573998769124349, "correct_loss_per_token": 1.1582539081573486, "incorrect_loss_per_token": 1.5147997538248699, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1582539081573486, "num_tokens": 1, "num_tokens_all": 1010, "is_greedy": true, "logits_per_token": -1.1582539081573486, "logits_per_char": -0.5791269540786743, "num_chars": 2}, {"sum_logits": -1.2623182535171509, "num_tokens": 1, "num_tokens_all": 1010, "is_greedy": false, "logits_per_token": -1.2623182535171509, "logits_per_char": -0.6311591267585754, "num_chars": 2}, {"sum_logits": -1.8326951265335083, "num_tokens": 1, "num_tokens_all": 1010, "is_greedy": false, "logits_per_token": -1.8326951265335083, "logits_per_char": -0.9163475632667542, "num_chars": 2}, {"sum_logits": -1.4493858814239502, "num_tokens": 1, "num_tokens_all": 1010, "is_greedy": false, "logits_per_token": -1.4493858814239502, "logits_per_char": -0.7246929407119751, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 720, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5892373323440552, "incorrect_loss_raw": 1.36392080783844, "correct_loss_per_char": 0.7946186661720276, "incorrect_loss_per_char": 0.68196040391922, "correct_loss_per_token": 1.5892373323440552, "incorrect_loss_per_token": 1.36392080783844, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2249248027801514, "num_tokens": 1, "num_tokens_all": 996, "is_greedy": false, "logits_per_token": -1.2249248027801514, "logits_per_char": -0.6124624013900757, "num_chars": 2}, {"sum_logits": -1.170819878578186, "num_tokens": 1, "num_tokens_all": 996, "is_greedy": true, "logits_per_token": -1.170819878578186, "logits_per_char": -0.585409939289093, "num_chars": 2}, {"sum_logits": -1.6960177421569824, "num_tokens": 1, "num_tokens_all": 996, "is_greedy": false, "logits_per_token": -1.6960177421569824, "logits_per_char": -0.8480088710784912, "num_chars": 2}, {"sum_logits": -1.5892373323440552, "num_tokens": 1, "num_tokens_all": 996, "is_greedy": false, "logits_per_token": -1.5892373323440552, "logits_per_char": -0.7946186661720276, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 721, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1625360250473022, "incorrect_loss_raw": 1.5520347356796265, "correct_loss_per_char": 0.5812680125236511, "incorrect_loss_per_char": 0.7760173678398132, "correct_loss_per_token": 1.1625360250473022, "incorrect_loss_per_token": 1.5520347356796265, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1273555755615234, "num_tokens": 1, "num_tokens_all": 1094, "is_greedy": true, "logits_per_token": -1.1273555755615234, "logits_per_char": -0.5636777877807617, "num_chars": 2}, {"sum_logits": -1.1625360250473022, "num_tokens": 1, "num_tokens_all": 1094, "is_greedy": false, "logits_per_token": -1.1625360250473022, "logits_per_char": -0.5812680125236511, "num_chars": 2}, {"sum_logits": -2.03755521774292, "num_tokens": 1, "num_tokens_all": 1094, "is_greedy": false, "logits_per_token": -2.03755521774292, "logits_per_char": -1.01877760887146, "num_chars": 2}, {"sum_logits": -1.491193413734436, "num_tokens": 1, "num_tokens_all": 1094, "is_greedy": false, "logits_per_token": -1.491193413734436, "logits_per_char": -0.745596706867218, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 722, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.542054295539856, "incorrect_loss_raw": 1.4096213579177856, "correct_loss_per_char": 0.771027147769928, "incorrect_loss_per_char": 0.7048106789588928, "correct_loss_per_token": 1.542054295539856, "incorrect_loss_per_token": 1.4096213579177856, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1259996891021729, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": true, "logits_per_token": -1.1259996891021729, "logits_per_char": -0.5629998445510864, "num_chars": 2}, {"sum_logits": -1.1735432147979736, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.1735432147979736, "logits_per_char": -0.5867716073989868, "num_chars": 2}, {"sum_logits": -1.9293211698532104, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.9293211698532104, "logits_per_char": -0.9646605849266052, "num_chars": 2}, {"sum_logits": -1.542054295539856, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.542054295539856, "logits_per_char": -0.771027147769928, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 723, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8465180397033691, "incorrect_loss_raw": 1.2924071947733562, "correct_loss_per_char": 0.9232590198516846, "incorrect_loss_per_char": 0.6462035973866781, "correct_loss_per_token": 1.8465180397033691, "incorrect_loss_per_token": 1.2924071947733562, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1747477054595947, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": true, "logits_per_token": -1.1747477054595947, "logits_per_char": -0.5873738527297974, "num_chars": 2}, {"sum_logits": -1.1831638813018799, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": false, "logits_per_token": -1.1831638813018799, "logits_per_char": -0.5915819406509399, "num_chars": 2}, {"sum_logits": -1.8465180397033691, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": false, "logits_per_token": -1.8465180397033691, "logits_per_char": -0.9232590198516846, "num_chars": 2}, {"sum_logits": -1.5193099975585938, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": false, "logits_per_token": -1.5193099975585938, "logits_per_char": -0.7596549987792969, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 724, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.018533706665039, "incorrect_loss_raw": 1.6162115335464478, "correct_loss_per_char": 0.5092668533325195, "incorrect_loss_per_char": 0.8081057667732239, "correct_loss_per_token": 1.018533706665039, "incorrect_loss_per_token": 1.6162115335464478, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.018533706665039, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": true, "logits_per_token": -1.018533706665039, "logits_per_char": -0.5092668533325195, "num_chars": 2}, {"sum_logits": -1.175270676612854, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": false, "logits_per_token": -1.175270676612854, "logits_per_char": -0.587635338306427, "num_chars": 2}, {"sum_logits": -1.9833474159240723, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": false, "logits_per_token": -1.9833474159240723, "logits_per_char": -0.9916737079620361, "num_chars": 2}, {"sum_logits": -1.690016508102417, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": false, "logits_per_token": -1.690016508102417, "logits_per_char": -0.8450082540512085, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 725, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0376360416412354, "incorrect_loss_raw": 1.6947859128316243, "correct_loss_per_char": 0.5188180208206177, "incorrect_loss_per_char": 0.8473929564158121, "correct_loss_per_token": 1.0376360416412354, "incorrect_loss_per_token": 1.6947859128316243, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9704616069793701, "num_tokens": 1, "num_tokens_all": 1123, "is_greedy": true, "logits_per_token": -0.9704616069793701, "logits_per_char": -0.48523080348968506, "num_chars": 2}, {"sum_logits": -1.0376360416412354, "num_tokens": 1, "num_tokens_all": 1123, "is_greedy": false, "logits_per_token": -1.0376360416412354, "logits_per_char": -0.5188180208206177, "num_chars": 2}, {"sum_logits": -2.2124459743499756, "num_tokens": 1, "num_tokens_all": 1123, "is_greedy": false, "logits_per_token": -2.2124459743499756, "logits_per_char": -1.1062229871749878, "num_chars": 2}, {"sum_logits": -1.9014501571655273, "num_tokens": 1, "num_tokens_all": 1123, "is_greedy": false, "logits_per_token": -1.9014501571655273, "logits_per_char": -0.9507250785827637, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 726, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9317119121551514, "incorrect_loss_raw": 1.6610252857208252, "correct_loss_per_char": 0.4658559560775757, "incorrect_loss_per_char": 0.8305126428604126, "correct_loss_per_token": 0.9317119121551514, "incorrect_loss_per_token": 1.6610252857208252, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9317119121551514, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": true, "logits_per_token": -0.9317119121551514, "logits_per_char": -0.4658559560775757, "num_chars": 2}, {"sum_logits": -1.263002872467041, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": false, "logits_per_token": -1.263002872467041, "logits_per_char": -0.6315014362335205, "num_chars": 2}, {"sum_logits": -1.9116605520248413, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": false, "logits_per_token": -1.9116605520248413, "logits_per_char": -0.9558302760124207, "num_chars": 2}, {"sum_logits": -1.8084124326705933, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": false, "logits_per_token": -1.8084124326705933, "logits_per_char": -0.9042062163352966, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 727, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8227965831756592, "incorrect_loss_raw": 1.332189679145813, "correct_loss_per_char": 0.9113982915878296, "incorrect_loss_per_char": 0.6660948395729065, "correct_loss_per_token": 1.8227965831756592, "incorrect_loss_per_token": 1.332189679145813, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9945229291915894, "num_tokens": 1, "num_tokens_all": 1032, "is_greedy": true, "logits_per_token": -0.9945229291915894, "logits_per_char": -0.4972614645957947, "num_chars": 2}, {"sum_logits": -1.2519009113311768, "num_tokens": 1, "num_tokens_all": 1032, "is_greedy": false, "logits_per_token": -1.2519009113311768, "logits_per_char": -0.6259504556655884, "num_chars": 2}, {"sum_logits": -1.7501451969146729, "num_tokens": 1, "num_tokens_all": 1032, "is_greedy": false, "logits_per_token": -1.7501451969146729, "logits_per_char": -0.8750725984573364, "num_chars": 2}, {"sum_logits": -1.8227965831756592, "num_tokens": 1, "num_tokens_all": 1032, "is_greedy": false, "logits_per_token": -1.8227965831756592, "logits_per_char": -0.9113982915878296, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 728, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8578033447265625, "incorrect_loss_raw": 1.3017884095509846, "correct_loss_per_char": 0.9289016723632812, "incorrect_loss_per_char": 0.6508942047754923, "correct_loss_per_token": 1.8578033447265625, "incorrect_loss_per_token": 1.3017884095509846, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1129744052886963, "num_tokens": 1, "num_tokens_all": 1059, "is_greedy": true, "logits_per_token": -1.1129744052886963, "logits_per_char": -0.5564872026443481, "num_chars": 2}, {"sum_logits": -1.1674541234970093, "num_tokens": 1, "num_tokens_all": 1059, "is_greedy": false, "logits_per_token": -1.1674541234970093, "logits_per_char": -0.5837270617485046, "num_chars": 2}, {"sum_logits": -1.8578033447265625, "num_tokens": 1, "num_tokens_all": 1059, "is_greedy": false, "logits_per_token": -1.8578033447265625, "logits_per_char": -0.9289016723632812, "num_chars": 2}, {"sum_logits": -1.6249366998672485, "num_tokens": 1, "num_tokens_all": 1059, "is_greedy": false, "logits_per_token": -1.6249366998672485, "logits_per_char": -0.8124683499336243, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 729, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.455073356628418, "incorrect_loss_raw": 1.3873824278513591, "correct_loss_per_char": 0.727536678314209, "incorrect_loss_per_char": 0.6936912139256796, "correct_loss_per_token": 1.455073356628418, "incorrect_loss_per_token": 1.3873824278513591, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1737905740737915, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -1.1737905740737915, "logits_per_char": -0.5868952870368958, "num_chars": 2}, {"sum_logits": -1.396217942237854, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.396217942237854, "logits_per_char": -0.698108971118927, "num_chars": 2}, {"sum_logits": -1.5921387672424316, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.5921387672424316, "logits_per_char": -0.7960693836212158, "num_chars": 2}, {"sum_logits": -1.455073356628418, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.455073356628418, "logits_per_char": -0.727536678314209, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 730, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7819641828536987, "incorrect_loss_raw": 1.3189103205998738, "correct_loss_per_char": 0.8909820914268494, "incorrect_loss_per_char": 0.6594551602999369, "correct_loss_per_token": 1.7819641828536987, "incorrect_loss_per_token": 1.3189103205998738, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1093440055847168, "num_tokens": 1, "num_tokens_all": 1045, "is_greedy": true, "logits_per_token": -1.1093440055847168, "logits_per_char": -0.5546720027923584, "num_chars": 2}, {"sum_logits": -1.1891981363296509, "num_tokens": 1, "num_tokens_all": 1045, "is_greedy": false, "logits_per_token": -1.1891981363296509, "logits_per_char": -0.5945990681648254, "num_chars": 2}, {"sum_logits": -1.7819641828536987, "num_tokens": 1, "num_tokens_all": 1045, "is_greedy": false, "logits_per_token": -1.7819641828536987, "logits_per_char": -0.8909820914268494, "num_chars": 2}, {"sum_logits": -1.658188819885254, "num_tokens": 1, "num_tokens_all": 1045, "is_greedy": false, "logits_per_token": -1.658188819885254, "logits_per_char": -0.829094409942627, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 731, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6896476745605469, "incorrect_loss_raw": 1.331650972366333, "correct_loss_per_char": 0.8448238372802734, "incorrect_loss_per_char": 0.6658254861831665, "correct_loss_per_token": 1.6896476745605469, "incorrect_loss_per_token": 1.331650972366333, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0542399883270264, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": true, "logits_per_token": -1.0542399883270264, "logits_per_char": -0.5271199941635132, "num_chars": 2}, {"sum_logits": -1.4398378133773804, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.4398378133773804, "logits_per_char": -0.7199189066886902, "num_chars": 2}, {"sum_logits": -1.6896476745605469, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.6896476745605469, "logits_per_char": -0.8448238372802734, "num_chars": 2}, {"sum_logits": -1.5008751153945923, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.5008751153945923, "logits_per_char": -0.7504375576972961, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 732, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.196906566619873, "incorrect_loss_raw": 1.2894327243169148, "correct_loss_per_char": 1.0984532833099365, "incorrect_loss_per_char": 0.6447163621584574, "correct_loss_per_token": 2.196906566619873, "incorrect_loss_per_token": 1.2894327243169148, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.90070641040802, "num_tokens": 1, "num_tokens_all": 1130, "is_greedy": true, "logits_per_token": -0.90070641040802, "logits_per_char": -0.45035320520401, "num_chars": 2}, {"sum_logits": -1.1959303617477417, "num_tokens": 1, "num_tokens_all": 1130, "is_greedy": false, "logits_per_token": -1.1959303617477417, "logits_per_char": -0.5979651808738708, "num_chars": 2}, {"sum_logits": -2.196906566619873, "num_tokens": 1, "num_tokens_all": 1130, "is_greedy": false, "logits_per_token": -2.196906566619873, "logits_per_char": -1.0984532833099365, "num_chars": 2}, {"sum_logits": -1.771661400794983, "num_tokens": 1, "num_tokens_all": 1130, "is_greedy": false, "logits_per_token": -1.771661400794983, "logits_per_char": -0.8858307003974915, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 733, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.032397985458374, "incorrect_loss_raw": 1.6279137929280598, "correct_loss_per_char": 0.516198992729187, "incorrect_loss_per_char": 0.8139568964640299, "correct_loss_per_token": 1.032397985458374, "incorrect_loss_per_token": 1.6279137929280598, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.032397985458374, "num_tokens": 1, "num_tokens_all": 1126, "is_greedy": true, "logits_per_token": -1.032397985458374, "logits_per_char": -0.516198992729187, "num_chars": 2}, {"sum_logits": -1.130358338356018, "num_tokens": 1, "num_tokens_all": 1126, "is_greedy": false, "logits_per_token": -1.130358338356018, "logits_per_char": -0.565179169178009, "num_chars": 2}, {"sum_logits": -2.045440673828125, "num_tokens": 1, "num_tokens_all": 1126, "is_greedy": false, "logits_per_token": -2.045440673828125, "logits_per_char": -1.0227203369140625, "num_chars": 2}, {"sum_logits": -1.7079423666000366, "num_tokens": 1, "num_tokens_all": 1126, "is_greedy": false, "logits_per_token": -1.7079423666000366, "logits_per_char": -0.8539711833000183, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 734, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2807326316833496, "incorrect_loss_raw": 1.5266532103220622, "correct_loss_per_char": 0.6403663158416748, "incorrect_loss_per_char": 0.7633266051610311, "correct_loss_per_token": 1.2807326316833496, "incorrect_loss_per_token": 1.5266532103220622, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9743452072143555, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": true, "logits_per_token": -0.9743452072143555, "logits_per_char": -0.48717260360717773, "num_chars": 2}, {"sum_logits": -1.2807326316833496, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": false, "logits_per_token": -1.2807326316833496, "logits_per_char": -0.6403663158416748, "num_chars": 2}, {"sum_logits": -1.9713075160980225, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": false, "logits_per_token": -1.9713075160980225, "logits_per_char": -0.9856537580490112, "num_chars": 2}, {"sum_logits": -1.6343069076538086, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": false, "logits_per_token": -1.6343069076538086, "logits_per_char": -0.8171534538269043, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 735, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1346551179885864, "incorrect_loss_raw": 1.5573252042134602, "correct_loss_per_char": 0.5673275589942932, "incorrect_loss_per_char": 0.7786626021067301, "correct_loss_per_token": 1.1346551179885864, "incorrect_loss_per_token": 1.5573252042134602, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1346551179885864, "num_tokens": 1, "num_tokens_all": 1119, "is_greedy": true, "logits_per_token": -1.1346551179885864, "logits_per_char": -0.5673275589942932, "num_chars": 2}, {"sum_logits": -1.1433813571929932, "num_tokens": 1, "num_tokens_all": 1119, "is_greedy": false, "logits_per_token": -1.1433813571929932, "logits_per_char": -0.5716906785964966, "num_chars": 2}, {"sum_logits": -1.9797816276550293, "num_tokens": 1, "num_tokens_all": 1119, "is_greedy": false, "logits_per_token": -1.9797816276550293, "logits_per_char": -0.9898908138275146, "num_chars": 2}, {"sum_logits": -1.5488126277923584, "num_tokens": 1, "num_tokens_all": 1119, "is_greedy": false, "logits_per_token": -1.5488126277923584, "logits_per_char": -0.7744063138961792, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 736, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9867579936981201, "incorrect_loss_raw": 1.289882222811381, "correct_loss_per_char": 0.9933789968490601, "incorrect_loss_per_char": 0.6449411114056905, "correct_loss_per_token": 1.9867579936981201, "incorrect_loss_per_token": 1.289882222811381, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0186249017715454, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": true, "logits_per_token": -1.0186249017715454, "logits_per_char": -0.5093124508857727, "num_chars": 2}, {"sum_logits": -1.2152481079101562, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.2152481079101562, "logits_per_char": -0.6076240539550781, "num_chars": 2}, {"sum_logits": -1.9867579936981201, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.9867579936981201, "logits_per_char": -0.9933789968490601, "num_chars": 2}, {"sum_logits": -1.6357736587524414, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.6357736587524414, "logits_per_char": -0.8178868293762207, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 737, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7008310556411743, "incorrect_loss_raw": 1.3240787982940674, "correct_loss_per_char": 0.8504155278205872, "incorrect_loss_per_char": 0.6620393991470337, "correct_loss_per_token": 1.7008310556411743, "incorrect_loss_per_token": 1.3240787982940674, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.112538456916809, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": true, "logits_per_token": -1.112538456916809, "logits_per_char": -0.5562692284584045, "num_chars": 2}, {"sum_logits": -1.339605450630188, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.339605450630188, "logits_per_char": -0.669802725315094, "num_chars": 2}, {"sum_logits": -1.7008310556411743, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.7008310556411743, "logits_per_char": -0.8504155278205872, "num_chars": 2}, {"sum_logits": -1.520092487335205, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.520092487335205, "logits_per_char": -0.7600462436676025, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 738, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2832924127578735, "incorrect_loss_raw": 1.4542781909306843, "correct_loss_per_char": 0.6416462063789368, "incorrect_loss_per_char": 0.7271390954653422, "correct_loss_per_token": 1.2832924127578735, "incorrect_loss_per_token": 1.4542781909306843, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2832924127578735, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": false, "logits_per_token": -1.2832924127578735, "logits_per_char": -0.6416462063789368, "num_chars": 2}, {"sum_logits": -1.2222949266433716, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": true, "logits_per_token": -1.2222949266433716, "logits_per_char": -0.6111474633216858, "num_chars": 2}, {"sum_logits": -1.7217614650726318, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": false, "logits_per_token": -1.7217614650726318, "logits_per_char": -0.8608807325363159, "num_chars": 2}, {"sum_logits": -1.4187781810760498, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": false, "logits_per_token": -1.4187781810760498, "logits_per_char": -0.7093890905380249, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 739, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.543587327003479, "incorrect_loss_raw": 1.357032299041748, "correct_loss_per_char": 0.7717936635017395, "incorrect_loss_per_char": 0.678516149520874, "correct_loss_per_token": 1.543587327003479, "incorrect_loss_per_token": 1.357032299041748, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2530289888381958, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": true, "logits_per_token": -1.2530289888381958, "logits_per_char": -0.6265144944190979, "num_chars": 2}, {"sum_logits": -1.278517484664917, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": false, "logits_per_token": -1.278517484664917, "logits_per_char": -0.6392587423324585, "num_chars": 2}, {"sum_logits": -1.5395504236221313, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": false, "logits_per_token": -1.5395504236221313, "logits_per_char": -0.7697752118110657, "num_chars": 2}, {"sum_logits": -1.543587327003479, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": false, "logits_per_token": -1.543587327003479, "logits_per_char": -0.7717936635017395, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 740, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.381005883216858, "incorrect_loss_raw": 1.4150199095408122, "correct_loss_per_char": 0.690502941608429, "incorrect_loss_per_char": 0.7075099547704061, "correct_loss_per_token": 1.381005883216858, "incorrect_loss_per_token": 1.4150199095408122, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1838583946228027, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -1.1838583946228027, "logits_per_char": -0.5919291973114014, "num_chars": 2}, {"sum_logits": -1.4189183712005615, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.4189183712005615, "logits_per_char": -0.7094591856002808, "num_chars": 2}, {"sum_logits": -1.6422829627990723, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.6422829627990723, "logits_per_char": -0.8211414813995361, "num_chars": 2}, {"sum_logits": -1.381005883216858, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.381005883216858, "logits_per_char": -0.690502941608429, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 741, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5919009447097778, "incorrect_loss_raw": 1.3859581152598064, "correct_loss_per_char": 0.7959504723548889, "incorrect_loss_per_char": 0.6929790576299032, "correct_loss_per_token": 1.5919009447097778, "incorrect_loss_per_token": 1.3859581152598064, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.128004550933838, "num_tokens": 1, "num_tokens_all": 1037, "is_greedy": true, "logits_per_token": -1.128004550933838, "logits_per_char": -0.564002275466919, "num_chars": 2}, {"sum_logits": -1.1746677160263062, "num_tokens": 1, "num_tokens_all": 1037, "is_greedy": false, "logits_per_token": -1.1746677160263062, "logits_per_char": -0.5873338580131531, "num_chars": 2}, {"sum_logits": -1.855202078819275, "num_tokens": 1, "num_tokens_all": 1037, "is_greedy": false, "logits_per_token": -1.855202078819275, "logits_per_char": -0.9276010394096375, "num_chars": 2}, {"sum_logits": -1.5919009447097778, "num_tokens": 1, "num_tokens_all": 1037, "is_greedy": false, "logits_per_token": -1.5919009447097778, "logits_per_char": -0.7959504723548889, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 742, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0305792093276978, "incorrect_loss_raw": 1.5766944885253906, "correct_loss_per_char": 0.5152896046638489, "incorrect_loss_per_char": 0.7883472442626953, "correct_loss_per_token": 1.0305792093276978, "incorrect_loss_per_token": 1.5766944885253906, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0305792093276978, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": true, "logits_per_token": -1.0305792093276978, "logits_per_char": -0.5152896046638489, "num_chars": 2}, {"sum_logits": -1.310905933380127, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": false, "logits_per_token": -1.310905933380127, "logits_per_char": -0.6554529666900635, "num_chars": 2}, {"sum_logits": -1.72407865524292, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": false, "logits_per_token": -1.72407865524292, "logits_per_char": -0.86203932762146, "num_chars": 2}, {"sum_logits": -1.695098876953125, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": false, "logits_per_token": -1.695098876953125, "logits_per_char": -0.8475494384765625, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 743, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2682886123657227, "incorrect_loss_raw": 1.4554297129313152, "correct_loss_per_char": 0.6341443061828613, "incorrect_loss_per_char": 0.7277148564656576, "correct_loss_per_token": 1.2682886123657227, "incorrect_loss_per_token": 1.4554297129313152, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2682886123657227, "num_tokens": 1, "num_tokens_all": 1046, "is_greedy": false, "logits_per_token": -1.2682886123657227, "logits_per_char": -0.6341443061828613, "num_chars": 2}, {"sum_logits": -1.2343876361846924, "num_tokens": 1, "num_tokens_all": 1046, "is_greedy": true, "logits_per_token": -1.2343876361846924, "logits_per_char": -0.6171938180923462, "num_chars": 2}, {"sum_logits": -1.6401207447052002, "num_tokens": 1, "num_tokens_all": 1046, "is_greedy": false, "logits_per_token": -1.6401207447052002, "logits_per_char": -0.8200603723526001, "num_chars": 2}, {"sum_logits": -1.4917807579040527, "num_tokens": 1, "num_tokens_all": 1046, "is_greedy": false, "logits_per_token": -1.4917807579040527, "logits_per_char": -0.7458903789520264, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 744, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2121031284332275, "incorrect_loss_raw": 1.4722325801849365, "correct_loss_per_char": 0.6060515642166138, "incorrect_loss_per_char": 0.7361162900924683, "correct_loss_per_token": 1.2121031284332275, "incorrect_loss_per_token": 1.4722325801849365, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2121031284332275, "num_tokens": 1, "num_tokens_all": 968, "is_greedy": true, "logits_per_token": -1.2121031284332275, "logits_per_char": -0.6060515642166138, "num_chars": 2}, {"sum_logits": -1.3044408559799194, "num_tokens": 1, "num_tokens_all": 968, "is_greedy": false, "logits_per_token": -1.3044408559799194, "logits_per_char": -0.6522204279899597, "num_chars": 2}, {"sum_logits": -1.6507714986801147, "num_tokens": 1, "num_tokens_all": 968, "is_greedy": false, "logits_per_token": -1.6507714986801147, "logits_per_char": -0.8253857493400574, "num_chars": 2}, {"sum_logits": -1.4614853858947754, "num_tokens": 1, "num_tokens_all": 968, "is_greedy": false, "logits_per_token": -1.4614853858947754, "logits_per_char": -0.7307426929473877, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 745, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4077579975128174, "incorrect_loss_raw": 1.4028526941935222, "correct_loss_per_char": 0.7038789987564087, "incorrect_loss_per_char": 0.7014263470967611, "correct_loss_per_token": 1.4077579975128174, "incorrect_loss_per_token": 1.4028526941935222, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3146944046020508, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": false, "logits_per_token": -1.3146944046020508, "logits_per_char": -0.6573472023010254, "num_chars": 2}, {"sum_logits": -1.2681952714920044, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": true, "logits_per_token": -1.2681952714920044, "logits_per_char": -0.6340976357460022, "num_chars": 2}, {"sum_logits": -1.6256684064865112, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": false, "logits_per_token": -1.6256684064865112, "logits_per_char": -0.8128342032432556, "num_chars": 2}, {"sum_logits": -1.4077579975128174, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": false, "logits_per_token": -1.4077579975128174, "logits_per_char": -0.7038789987564087, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 746, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1028642654418945, "incorrect_loss_raw": 1.5467190345128377, "correct_loss_per_char": 0.5514321327209473, "incorrect_loss_per_char": 0.7733595172564188, "correct_loss_per_token": 1.1028642654418945, "incorrect_loss_per_token": 1.5467190345128377, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1028642654418945, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": true, "logits_per_token": -1.1028642654418945, "logits_per_char": -0.5514321327209473, "num_chars": 2}, {"sum_logits": -1.2069199085235596, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.2069199085235596, "logits_per_char": -0.6034599542617798, "num_chars": 2}, {"sum_logits": -1.7790030241012573, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.7790030241012573, "logits_per_char": -0.8895015120506287, "num_chars": 2}, {"sum_logits": -1.6542341709136963, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.6542341709136963, "logits_per_char": -0.8271170854568481, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 747, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9882670640945435, "incorrect_loss_raw": 1.2894229491551716, "correct_loss_per_char": 0.9941335320472717, "incorrect_loss_per_char": 0.6447114745775858, "correct_loss_per_token": 1.9882670640945435, "incorrect_loss_per_token": 1.2894229491551716, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0752428770065308, "num_tokens": 1, "num_tokens_all": 1114, "is_greedy": true, "logits_per_token": -1.0752428770065308, "logits_per_char": -0.5376214385032654, "num_chars": 2}, {"sum_logits": -1.134249210357666, "num_tokens": 1, "num_tokens_all": 1114, "is_greedy": false, "logits_per_token": -1.134249210357666, "logits_per_char": -0.567124605178833, "num_chars": 2}, {"sum_logits": -1.9882670640945435, "num_tokens": 1, "num_tokens_all": 1114, "is_greedy": false, "logits_per_token": -1.9882670640945435, "logits_per_char": -0.9941335320472717, "num_chars": 2}, {"sum_logits": -1.6587767601013184, "num_tokens": 1, "num_tokens_all": 1114, "is_greedy": false, "logits_per_token": -1.6587767601013184, "logits_per_char": -0.8293883800506592, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 748, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1670160293579102, "incorrect_loss_raw": 1.5097775061925252, "correct_loss_per_char": 0.5835080146789551, "incorrect_loss_per_char": 0.7548887530962626, "correct_loss_per_token": 1.1670160293579102, "incorrect_loss_per_token": 1.5097775061925252, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2231532335281372, "num_tokens": 1, "num_tokens_all": 986, "is_greedy": false, "logits_per_token": -1.2231532335281372, "logits_per_char": -0.6115766167640686, "num_chars": 2}, {"sum_logits": -1.1670160293579102, "num_tokens": 1, "num_tokens_all": 986, "is_greedy": true, "logits_per_token": -1.1670160293579102, "logits_per_char": -0.5835080146789551, "num_chars": 2}, {"sum_logits": -1.7655556201934814, "num_tokens": 1, "num_tokens_all": 986, "is_greedy": false, "logits_per_token": -1.7655556201934814, "logits_per_char": -0.8827778100967407, "num_chars": 2}, {"sum_logits": -1.540623664855957, "num_tokens": 1, "num_tokens_all": 986, "is_greedy": false, "logits_per_token": -1.540623664855957, "logits_per_char": -0.7703118324279785, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 749, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5497487783432007, "incorrect_loss_raw": 1.3579816420873005, "correct_loss_per_char": 0.7748743891716003, "incorrect_loss_per_char": 0.6789908210436503, "correct_loss_per_token": 1.5497487783432007, "incorrect_loss_per_token": 1.3579816420873005, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2388564348220825, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": true, "logits_per_token": -1.2388564348220825, "logits_per_char": -0.6194282174110413, "num_chars": 2}, {"sum_logits": -1.2651026248931885, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": false, "logits_per_token": -1.2651026248931885, "logits_per_char": -0.6325513124465942, "num_chars": 2}, {"sum_logits": -1.5699858665466309, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": false, "logits_per_token": -1.5699858665466309, "logits_per_char": -0.7849929332733154, "num_chars": 2}, {"sum_logits": -1.5497487783432007, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": false, "logits_per_token": -1.5497487783432007, "logits_per_char": -0.7748743891716003, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 750, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.921433448791504, "incorrect_loss_raw": 1.3166853189468384, "correct_loss_per_char": 0.960716724395752, "incorrect_loss_per_char": 0.6583426594734192, "correct_loss_per_token": 1.921433448791504, "incorrect_loss_per_token": 1.3166853189468384, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9762340784072876, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": true, "logits_per_token": -0.9762340784072876, "logits_per_char": -0.4881170392036438, "num_chars": 2}, {"sum_logits": -1.2226402759552002, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": false, "logits_per_token": -1.2226402759552002, "logits_per_char": -0.6113201379776001, "num_chars": 2}, {"sum_logits": -1.921433448791504, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": false, "logits_per_token": -1.921433448791504, "logits_per_char": -0.960716724395752, "num_chars": 2}, {"sum_logits": -1.7511816024780273, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": false, "logits_per_token": -1.7511816024780273, "logits_per_char": -0.8755908012390137, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 751, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9412746429443359, "incorrect_loss_raw": 1.6768471399943035, "correct_loss_per_char": 0.47063732147216797, "incorrect_loss_per_char": 0.8384235699971517, "correct_loss_per_token": 0.9412746429443359, "incorrect_loss_per_token": 1.6768471399943035, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9412746429443359, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": true, "logits_per_token": -0.9412746429443359, "logits_per_char": -0.47063732147216797, "num_chars": 2}, {"sum_logits": -1.210852026939392, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": false, "logits_per_token": -1.210852026939392, "logits_per_char": -0.605426013469696, "num_chars": 2}, {"sum_logits": -2.067976474761963, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": false, "logits_per_token": -2.067976474761963, "logits_per_char": -1.0339882373809814, "num_chars": 2}, {"sum_logits": -1.7517129182815552, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": false, "logits_per_token": -1.7517129182815552, "logits_per_char": -0.8758564591407776, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 752, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8139944076538086, "incorrect_loss_raw": 1.3147345383961995, "correct_loss_per_char": 0.9069972038269043, "incorrect_loss_per_char": 0.6573672691980997, "correct_loss_per_token": 1.8139944076538086, "incorrect_loss_per_token": 1.3147345383961995, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.060281753540039, "num_tokens": 1, "num_tokens_all": 1051, "is_greedy": true, "logits_per_token": -1.060281753540039, "logits_per_char": -0.5301408767700195, "num_chars": 2}, {"sum_logits": -1.2540678977966309, "num_tokens": 1, "num_tokens_all": 1051, "is_greedy": false, "logits_per_token": -1.2540678977966309, "logits_per_char": -0.6270339488983154, "num_chars": 2}, {"sum_logits": -1.8139944076538086, "num_tokens": 1, "num_tokens_all": 1051, "is_greedy": false, "logits_per_token": -1.8139944076538086, "logits_per_char": -0.9069972038269043, "num_chars": 2}, {"sum_logits": -1.6298539638519287, "num_tokens": 1, "num_tokens_all": 1051, "is_greedy": false, "logits_per_token": -1.6298539638519287, "logits_per_char": -0.8149269819259644, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 753, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2179863452911377, "incorrect_loss_raw": 1.5368642409642537, "correct_loss_per_char": 0.6089931726455688, "incorrect_loss_per_char": 0.7684321204821268, "correct_loss_per_token": 1.2179863452911377, "incorrect_loss_per_token": 1.5368642409642537, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.031925916671753, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": true, "logits_per_token": -1.031925916671753, "logits_per_char": -0.5159629583358765, "num_chars": 2}, {"sum_logits": -1.2179863452911377, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -1.2179863452911377, "logits_per_char": -0.6089931726455688, "num_chars": 2}, {"sum_logits": -1.9473861455917358, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -1.9473861455917358, "logits_per_char": -0.9736930727958679, "num_chars": 2}, {"sum_logits": -1.6312806606292725, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -1.6312806606292725, "logits_per_char": -0.8156403303146362, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 754, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2770909070968628, "incorrect_loss_raw": 1.4455687999725342, "correct_loss_per_char": 0.6385454535484314, "incorrect_loss_per_char": 0.7227843999862671, "correct_loss_per_token": 1.2770909070968628, "incorrect_loss_per_token": 1.4455687999725342, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2763103246688843, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": true, "logits_per_token": -1.2763103246688843, "logits_per_char": -0.6381551623344421, "num_chars": 2}, {"sum_logits": -1.2770909070968628, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": false, "logits_per_token": -1.2770909070968628, "logits_per_char": -0.6385454535484314, "num_chars": 2}, {"sum_logits": -1.4761391878128052, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": false, "logits_per_token": -1.4761391878128052, "logits_per_char": -0.7380695939064026, "num_chars": 2}, {"sum_logits": -1.584256887435913, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": false, "logits_per_token": -1.584256887435913, "logits_per_char": -0.7921284437179565, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 755, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5715793371200562, "incorrect_loss_raw": 1.4200071493784587, "correct_loss_per_char": 0.7857896685600281, "incorrect_loss_per_char": 0.7100035746892294, "correct_loss_per_token": 1.5715793371200562, "incorrect_loss_per_token": 1.4200071493784587, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0864858627319336, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": true, "logits_per_token": -1.0864858627319336, "logits_per_char": -0.5432429313659668, "num_chars": 2}, {"sum_logits": -1.1675951480865479, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -1.1675951480865479, "logits_per_char": -0.5837975740432739, "num_chars": 2}, {"sum_logits": -2.0059404373168945, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -2.0059404373168945, "logits_per_char": -1.0029702186584473, "num_chars": 2}, {"sum_logits": -1.5715793371200562, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -1.5715793371200562, "logits_per_char": -0.7857896685600281, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 756, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0109009742736816, "incorrect_loss_raw": 1.6144402027130127, "correct_loss_per_char": 0.5054504871368408, "incorrect_loss_per_char": 0.8072201013565063, "correct_loss_per_token": 1.0109009742736816, "incorrect_loss_per_token": 1.6144402027130127, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0109009742736816, "num_tokens": 1, "num_tokens_all": 1109, "is_greedy": true, "logits_per_token": -1.0109009742736816, "logits_per_char": -0.5054504871368408, "num_chars": 2}, {"sum_logits": -1.2329216003417969, "num_tokens": 1, "num_tokens_all": 1109, "is_greedy": false, "logits_per_token": -1.2329216003417969, "logits_per_char": -0.6164608001708984, "num_chars": 2}, {"sum_logits": -1.978773593902588, "num_tokens": 1, "num_tokens_all": 1109, "is_greedy": false, "logits_per_token": -1.978773593902588, "logits_per_char": -0.989386796951294, "num_chars": 2}, {"sum_logits": -1.6316254138946533, "num_tokens": 1, "num_tokens_all": 1109, "is_greedy": false, "logits_per_token": -1.6316254138946533, "logits_per_char": -0.8158127069473267, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 757, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8252023458480835, "incorrect_loss_raw": 1.3048296372095745, "correct_loss_per_char": 0.9126011729240417, "incorrect_loss_per_char": 0.6524148186047872, "correct_loss_per_token": 1.8252023458480835, "incorrect_loss_per_token": 1.3048296372095745, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0652649402618408, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": true, "logits_per_token": -1.0652649402618408, "logits_per_char": -0.5326324701309204, "num_chars": 2}, {"sum_logits": -1.2771213054656982, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.2771213054656982, "logits_per_char": -0.6385606527328491, "num_chars": 2}, {"sum_logits": -1.8252023458480835, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.8252023458480835, "logits_per_char": -0.9126011729240417, "num_chars": 2}, {"sum_logits": -1.572102665901184, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.572102665901184, "logits_per_char": -0.786051332950592, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 758, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7630836963653564, "incorrect_loss_raw": 1.2961256504058838, "correct_loss_per_char": 0.8815418481826782, "incorrect_loss_per_char": 0.6480628252029419, "correct_loss_per_token": 1.7630836963653564, "incorrect_loss_per_token": 1.2961256504058838, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2125444412231445, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": true, "logits_per_token": -1.2125444412231445, "logits_per_char": -0.6062722206115723, "num_chars": 2}, {"sum_logits": -1.3274542093276978, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.3274542093276978, "logits_per_char": -0.6637271046638489, "num_chars": 2}, {"sum_logits": -1.7630836963653564, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.7630836963653564, "logits_per_char": -0.8815418481826782, "num_chars": 2}, {"sum_logits": -1.348378300666809, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.348378300666809, "logits_per_char": -0.6741891503334045, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 759, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4790434837341309, "incorrect_loss_raw": 1.429850180943807, "correct_loss_per_char": 0.7395217418670654, "incorrect_loss_per_char": 0.7149250904719034, "correct_loss_per_token": 1.4790434837341309, "incorrect_loss_per_token": 1.429850180943807, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.112349510192871, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": true, "logits_per_token": -1.112349510192871, "logits_per_char": -0.5561747550964355, "num_chars": 2}, {"sum_logits": -1.229017734527588, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -1.229017734527588, "logits_per_char": -0.614508867263794, "num_chars": 2}, {"sum_logits": -1.948183298110962, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -1.948183298110962, "logits_per_char": -0.974091649055481, "num_chars": 2}, {"sum_logits": -1.4790434837341309, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -1.4790434837341309, "logits_per_char": -0.7395217418670654, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 760, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9778095483779907, "incorrect_loss_raw": 1.2873267730077107, "correct_loss_per_char": 0.9889047741889954, "incorrect_loss_per_char": 0.6436633865038554, "correct_loss_per_token": 1.9778095483779907, "incorrect_loss_per_token": 1.2873267730077107, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0903679132461548, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": true, "logits_per_token": -1.0903679132461548, "logits_per_char": -0.5451839566230774, "num_chars": 2}, {"sum_logits": -1.1406199932098389, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -1.1406199932098389, "logits_per_char": -0.5703099966049194, "num_chars": 2}, {"sum_logits": -1.9778095483779907, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -1.9778095483779907, "logits_per_char": -0.9889047741889954, "num_chars": 2}, {"sum_logits": -1.6309924125671387, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -1.6309924125671387, "logits_per_char": -0.8154962062835693, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 761, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5552854537963867, "incorrect_loss_raw": 1.3745766878128052, "correct_loss_per_char": 0.7776427268981934, "incorrect_loss_per_char": 0.6872883439064026, "correct_loss_per_token": 1.5552854537963867, "incorrect_loss_per_token": 1.3745766878128052, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.219505786895752, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": false, "logits_per_token": -1.219505786895752, "logits_per_char": -0.609752893447876, "num_chars": 2}, {"sum_logits": -1.1818903684616089, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": true, "logits_per_token": -1.1818903684616089, "logits_per_char": -0.5909451842308044, "num_chars": 2}, {"sum_logits": -1.7223339080810547, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": false, "logits_per_token": -1.7223339080810547, "logits_per_char": -0.8611669540405273, "num_chars": 2}, {"sum_logits": -1.5552854537963867, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": false, "logits_per_token": -1.5552854537963867, "logits_per_char": -0.7776427268981934, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 762, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5419604778289795, "incorrect_loss_raw": 1.3907618522644043, "correct_loss_per_char": 0.7709802389144897, "incorrect_loss_per_char": 0.6953809261322021, "correct_loss_per_token": 1.5419604778289795, "incorrect_loss_per_token": 1.3907618522644043, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1712439060211182, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": true, "logits_per_token": -1.1712439060211182, "logits_per_char": -0.5856219530105591, "num_chars": 2}, {"sum_logits": -1.1999661922454834, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": false, "logits_per_token": -1.1999661922454834, "logits_per_char": -0.5999830961227417, "num_chars": 2}, {"sum_logits": -1.8010754585266113, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": false, "logits_per_token": -1.8010754585266113, "logits_per_char": -0.9005377292633057, "num_chars": 2}, {"sum_logits": -1.5419604778289795, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": false, "logits_per_token": -1.5419604778289795, "logits_per_char": -0.7709802389144897, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 763, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6326855421066284, "incorrect_loss_raw": 1.3335203727086384, "correct_loss_per_char": 0.8163427710533142, "incorrect_loss_per_char": 0.6667601863543192, "correct_loss_per_token": 1.6326855421066284, "incorrect_loss_per_token": 1.3335203727086384, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2392141819000244, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": true, "logits_per_token": -1.2392141819000244, "logits_per_char": -0.6196070909500122, "num_chars": 2}, {"sum_logits": -1.2504006624221802, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": false, "logits_per_token": -1.2504006624221802, "logits_per_char": -0.6252003312110901, "num_chars": 2}, {"sum_logits": -1.510946273803711, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": false, "logits_per_token": -1.510946273803711, "logits_per_char": -0.7554731369018555, "num_chars": 2}, {"sum_logits": -1.6326855421066284, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": false, "logits_per_token": -1.6326855421066284, "logits_per_char": -0.8163427710533142, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 764, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5268536806106567, "incorrect_loss_raw": 1.390247901280721, "correct_loss_per_char": 0.7634268403053284, "incorrect_loss_per_char": 0.6951239506403605, "correct_loss_per_token": 1.5268536806106567, "incorrect_loss_per_token": 1.390247901280721, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1350557804107666, "num_tokens": 1, "num_tokens_all": 1074, "is_greedy": true, "logits_per_token": -1.1350557804107666, "logits_per_char": -0.5675278902053833, "num_chars": 2}, {"sum_logits": -1.2664786577224731, "num_tokens": 1, "num_tokens_all": 1074, "is_greedy": false, "logits_per_token": -1.2664786577224731, "logits_per_char": -0.6332393288612366, "num_chars": 2}, {"sum_logits": -1.7692092657089233, "num_tokens": 1, "num_tokens_all": 1074, "is_greedy": false, "logits_per_token": -1.7692092657089233, "logits_per_char": -0.8846046328544617, "num_chars": 2}, {"sum_logits": -1.5268536806106567, "num_tokens": 1, "num_tokens_all": 1074, "is_greedy": false, "logits_per_token": -1.5268536806106567, "logits_per_char": -0.7634268403053284, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 765, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6841644048690796, "incorrect_loss_raw": 1.399039665857951, "correct_loss_per_char": 0.8420822024345398, "incorrect_loss_per_char": 0.6995198329289755, "correct_loss_per_token": 1.6841644048690796, "incorrect_loss_per_token": 1.399039665857951, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0027306079864502, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": true, "logits_per_token": -1.0027306079864502, "logits_per_char": -0.5013653039932251, "num_chars": 2}, {"sum_logits": -1.2011165618896484, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.2011165618896484, "logits_per_char": -0.6005582809448242, "num_chars": 2}, {"sum_logits": -1.993271827697754, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.993271827697754, "logits_per_char": -0.996635913848877, "num_chars": 2}, {"sum_logits": -1.6841644048690796, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.6841644048690796, "logits_per_char": -0.8420822024345398, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 766, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8470627069473267, "incorrect_loss_raw": 1.2853906551996868, "correct_loss_per_char": 0.9235313534736633, "incorrect_loss_per_char": 0.6426953275998434, "correct_loss_per_token": 1.8470627069473267, "incorrect_loss_per_token": 1.2853906551996868, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2469052076339722, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.2469052076339722, "logits_per_char": -0.6234526038169861, "num_chars": 2}, {"sum_logits": -1.1738940477371216, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": true, "logits_per_token": -1.1738940477371216, "logits_per_char": -0.5869470238685608, "num_chars": 2}, {"sum_logits": -1.8470627069473267, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.8470627069473267, "logits_per_char": -0.9235313534736633, "num_chars": 2}, {"sum_logits": -1.4353727102279663, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.4353727102279663, "logits_per_char": -0.7176863551139832, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 767, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6629109382629395, "incorrect_loss_raw": 1.3264717658360798, "correct_loss_per_char": 0.8314554691314697, "incorrect_loss_per_char": 0.6632358829180399, "correct_loss_per_token": 1.6629109382629395, "incorrect_loss_per_token": 1.3264717658360798, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1316304206848145, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": true, "logits_per_token": -1.1316304206848145, "logits_per_char": -0.5658152103424072, "num_chars": 2}, {"sum_logits": -1.4390041828155518, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.4390041828155518, "logits_per_char": -0.7195020914077759, "num_chars": 2}, {"sum_logits": -1.6629109382629395, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.6629109382629395, "logits_per_char": -0.8314554691314697, "num_chars": 2}, {"sum_logits": -1.4087806940078735, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.4087806940078735, "logits_per_char": -0.7043903470039368, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 768, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7997698783874512, "incorrect_loss_raw": 1.3561111688613892, "correct_loss_per_char": 0.8998849391937256, "incorrect_loss_per_char": 0.6780555844306946, "correct_loss_per_token": 1.7997698783874512, "incorrect_loss_per_token": 1.3561111688613892, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9649474620819092, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": true, "logits_per_token": -0.9649474620819092, "logits_per_char": -0.4824737310409546, "num_chars": 2}, {"sum_logits": -1.2507115602493286, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": false, "logits_per_token": -1.2507115602493286, "logits_per_char": -0.6253557801246643, "num_chars": 2}, {"sum_logits": -1.8526744842529297, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": false, "logits_per_token": -1.8526744842529297, "logits_per_char": -0.9263372421264648, "num_chars": 2}, {"sum_logits": -1.7997698783874512, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": false, "logits_per_token": -1.7997698783874512, "logits_per_char": -0.8998849391937256, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 769, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.726236343383789, "incorrect_loss_raw": 1.408272425333659, "correct_loss_per_char": 0.8631181716918945, "incorrect_loss_per_char": 0.7041362126668295, "correct_loss_per_token": 1.726236343383789, "incorrect_loss_per_token": 1.408272425333659, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.090195894241333, "num_tokens": 1, "num_tokens_all": 1135, "is_greedy": false, "logits_per_token": -1.090195894241333, "logits_per_char": -0.5450979471206665, "num_chars": 2}, {"sum_logits": -1.028594732284546, "num_tokens": 1, "num_tokens_all": 1135, "is_greedy": true, "logits_per_token": -1.028594732284546, "logits_per_char": -0.514297366142273, "num_chars": 2}, {"sum_logits": -2.1060266494750977, "num_tokens": 1, "num_tokens_all": 1135, "is_greedy": false, "logits_per_token": -2.1060266494750977, "logits_per_char": -1.0530133247375488, "num_chars": 2}, {"sum_logits": -1.726236343383789, "num_tokens": 1, "num_tokens_all": 1135, "is_greedy": false, "logits_per_token": -1.726236343383789, "logits_per_char": -0.8631181716918945, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 770, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.746442437171936, "incorrect_loss_raw": 1.3250763416290283, "correct_loss_per_char": 0.873221218585968, "incorrect_loss_per_char": 0.6625381708145142, "correct_loss_per_token": 1.746442437171936, "incorrect_loss_per_token": 1.3250763416290283, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1318790912628174, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": true, "logits_per_token": -1.1318790912628174, "logits_per_char": -0.5659395456314087, "num_chars": 2}, {"sum_logits": -1.2037333250045776, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -1.2037333250045776, "logits_per_char": -0.6018666625022888, "num_chars": 2}, {"sum_logits": -1.746442437171936, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -1.746442437171936, "logits_per_char": -0.873221218585968, "num_chars": 2}, {"sum_logits": -1.63961660861969, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -1.63961660861969, "logits_per_char": -0.819808304309845, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 771, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.22029709815979, "incorrect_loss_raw": 1.551333745320638, "correct_loss_per_char": 0.610148549079895, "incorrect_loss_per_char": 0.775666872660319, "correct_loss_per_token": 1.22029709815979, "incorrect_loss_per_token": 1.551333745320638, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.997575044631958, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": true, "logits_per_token": -0.997575044631958, "logits_per_char": -0.498787522315979, "num_chars": 2}, {"sum_logits": -1.22029709815979, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": false, "logits_per_token": -1.22029709815979, "logits_per_char": -0.610148549079895, "num_chars": 2}, {"sum_logits": -2.0184059143066406, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": false, "logits_per_token": -2.0184059143066406, "logits_per_char": -1.0092029571533203, "num_chars": 2}, {"sum_logits": -1.6380202770233154, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": false, "logits_per_token": -1.6380202770233154, "logits_per_char": -0.8190101385116577, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 772, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8674615621566772, "incorrect_loss_raw": 1.3044921159744263, "correct_loss_per_char": 0.9337307810783386, "incorrect_loss_per_char": 0.6522460579872131, "correct_loss_per_token": 1.8674615621566772, "incorrect_loss_per_token": 1.3044921159744263, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0828702449798584, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": true, "logits_per_token": -1.0828702449798584, "logits_per_char": -0.5414351224899292, "num_chars": 2}, {"sum_logits": -1.1799179315567017, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.1799179315567017, "logits_per_char": -0.5899589657783508, "num_chars": 2}, {"sum_logits": -1.8674615621566772, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.8674615621566772, "logits_per_char": -0.9337307810783386, "num_chars": 2}, {"sum_logits": -1.6506881713867188, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.6506881713867188, "logits_per_char": -0.8253440856933594, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 773, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.481859564781189, "incorrect_loss_raw": 1.4274156093597412, "correct_loss_per_char": 0.7409297823905945, "incorrect_loss_per_char": 0.7137078046798706, "correct_loss_per_token": 1.481859564781189, "incorrect_loss_per_token": 1.4274156093597412, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1028343439102173, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": true, "logits_per_token": -1.1028343439102173, "logits_per_char": -0.5514171719551086, "num_chars": 2}, {"sum_logits": -1.2384496927261353, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": false, "logits_per_token": -1.2384496927261353, "logits_per_char": -0.6192248463630676, "num_chars": 2}, {"sum_logits": -1.940962791442871, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": false, "logits_per_token": -1.940962791442871, "logits_per_char": -0.9704813957214355, "num_chars": 2}, {"sum_logits": -1.481859564781189, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": false, "logits_per_token": -1.481859564781189, "logits_per_char": -0.7409297823905945, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 774, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0887451171875, "incorrect_loss_raw": 1.5852202971776326, "correct_loss_per_char": 0.54437255859375, "incorrect_loss_per_char": 0.7926101485888163, "correct_loss_per_token": 1.0887451171875, "incorrect_loss_per_token": 1.5852202971776326, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1387321949005127, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": false, "logits_per_token": -1.1387321949005127, "logits_per_char": -0.5693660974502563, "num_chars": 2}, {"sum_logits": -1.0887451171875, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": true, "logits_per_token": -1.0887451171875, "logits_per_char": -0.54437255859375, "num_chars": 2}, {"sum_logits": -2.022606372833252, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": false, "logits_per_token": -2.022606372833252, "logits_per_char": -1.011303186416626, "num_chars": 2}, {"sum_logits": -1.5943223237991333, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": false, "logits_per_token": -1.5943223237991333, "logits_per_char": -0.7971611618995667, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 775, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6068120002746582, "incorrect_loss_raw": 1.3781832456588745, "correct_loss_per_char": 0.8034060001373291, "incorrect_loss_per_char": 0.6890916228294373, "correct_loss_per_token": 1.6068120002746582, "incorrect_loss_per_token": 1.3781832456588745, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1446486711502075, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": true, "logits_per_token": -1.1446486711502075, "logits_per_char": -0.5723243355751038, "num_chars": 2}, {"sum_logits": -1.1676998138427734, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.1676998138427734, "logits_per_char": -0.5838499069213867, "num_chars": 2}, {"sum_logits": -1.8222012519836426, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.8222012519836426, "logits_per_char": -0.9111006259918213, "num_chars": 2}, {"sum_logits": -1.6068120002746582, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.6068120002746582, "logits_per_char": -0.8034060001373291, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 776, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.191587209701538, "incorrect_loss_raw": 1.6220826705296834, "correct_loss_per_char": 0.595793604850769, "incorrect_loss_per_char": 0.8110413352648417, "correct_loss_per_token": 1.191587209701538, "incorrect_loss_per_token": 1.6220826705296834, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9009742736816406, "num_tokens": 1, "num_tokens_all": 1120, "is_greedy": true, "logits_per_token": -0.9009742736816406, "logits_per_char": -0.4504871368408203, "num_chars": 2}, {"sum_logits": -1.191587209701538, "num_tokens": 1, "num_tokens_all": 1120, "is_greedy": false, "logits_per_token": -1.191587209701538, "logits_per_char": -0.595793604850769, "num_chars": 2}, {"sum_logits": -2.134075880050659, "num_tokens": 1, "num_tokens_all": 1120, "is_greedy": false, "logits_per_token": -2.134075880050659, "logits_per_char": -1.0670379400253296, "num_chars": 2}, {"sum_logits": -1.8311978578567505, "num_tokens": 1, "num_tokens_all": 1120, "is_greedy": false, "logits_per_token": -1.8311978578567505, "logits_per_char": -0.9155989289283752, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 777, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1687204837799072, "incorrect_loss_raw": 1.523441751797994, "correct_loss_per_char": 0.5843602418899536, "incorrect_loss_per_char": 0.761720875898997, "correct_loss_per_token": 1.1687204837799072, "incorrect_loss_per_token": 1.523441751797994, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1687204837799072, "num_tokens": 1, "num_tokens_all": 1098, "is_greedy": true, "logits_per_token": -1.1687204837799072, "logits_per_char": -0.5843602418899536, "num_chars": 2}, {"sum_logits": -1.1741105318069458, "num_tokens": 1, "num_tokens_all": 1098, "is_greedy": false, "logits_per_token": -1.1741105318069458, "logits_per_char": -0.5870552659034729, "num_chars": 2}, {"sum_logits": -1.895496129989624, "num_tokens": 1, "num_tokens_all": 1098, "is_greedy": false, "logits_per_token": -1.895496129989624, "logits_per_char": -0.947748064994812, "num_chars": 2}, {"sum_logits": -1.500718593597412, "num_tokens": 1, "num_tokens_all": 1098, "is_greedy": false, "logits_per_token": -1.500718593597412, "logits_per_char": -0.750359296798706, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 778, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2916309833526611, "incorrect_loss_raw": 1.6048877239227295, "correct_loss_per_char": 0.6458154916763306, "incorrect_loss_per_char": 0.8024438619613647, "correct_loss_per_token": 1.2916309833526611, "incorrect_loss_per_token": 1.6048877239227295, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8217401504516602, "num_tokens": 1, "num_tokens_all": 1127, "is_greedy": true, "logits_per_token": -0.8217401504516602, "logits_per_char": -0.4108700752258301, "num_chars": 2}, {"sum_logits": -1.2916309833526611, "num_tokens": 1, "num_tokens_all": 1127, "is_greedy": false, "logits_per_token": -1.2916309833526611, "logits_per_char": -0.6458154916763306, "num_chars": 2}, {"sum_logits": -2.1054880619049072, "num_tokens": 1, "num_tokens_all": 1127, "is_greedy": false, "logits_per_token": -2.1054880619049072, "logits_per_char": -1.0527440309524536, "num_chars": 2}, {"sum_logits": -1.887434959411621, "num_tokens": 1, "num_tokens_all": 1127, "is_greedy": false, "logits_per_token": -1.887434959411621, "logits_per_char": -0.9437174797058105, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 779, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1235790252685547, "incorrect_loss_raw": 1.6514700253804524, "correct_loss_per_char": 0.5617895126342773, "incorrect_loss_per_char": 0.8257350126902262, "correct_loss_per_token": 1.1235790252685547, "incorrect_loss_per_token": 1.6514700253804524, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9432892799377441, "num_tokens": 1, "num_tokens_all": 1120, "is_greedy": true, "logits_per_token": -0.9432892799377441, "logits_per_char": -0.47164463996887207, "num_chars": 2}, {"sum_logits": -1.1235790252685547, "num_tokens": 1, "num_tokens_all": 1120, "is_greedy": false, "logits_per_token": -1.1235790252685547, "logits_per_char": -0.5617895126342773, "num_chars": 2}, {"sum_logits": -2.244441032409668, "num_tokens": 1, "num_tokens_all": 1120, "is_greedy": false, "logits_per_token": -2.244441032409668, "logits_per_char": -1.122220516204834, "num_chars": 2}, {"sum_logits": -1.7666797637939453, "num_tokens": 1, "num_tokens_all": 1120, "is_greedy": false, "logits_per_token": -1.7666797637939453, "logits_per_char": -0.8833398818969727, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 780, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1113274097442627, "incorrect_loss_raw": 1.5226096709569295, "correct_loss_per_char": 0.5556637048721313, "incorrect_loss_per_char": 0.7613048354784647, "correct_loss_per_token": 1.1113274097442627, "incorrect_loss_per_token": 1.5226096709569295, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1113274097442627, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": true, "logits_per_token": -1.1113274097442627, "logits_per_char": -0.5556637048721313, "num_chars": 2}, {"sum_logits": -1.3177739381790161, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": false, "logits_per_token": -1.3177739381790161, "logits_per_char": -0.6588869690895081, "num_chars": 2}, {"sum_logits": -1.7247849702835083, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": false, "logits_per_token": -1.7247849702835083, "logits_per_char": -0.8623924851417542, "num_chars": 2}, {"sum_logits": -1.5252701044082642, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": false, "logits_per_token": -1.5252701044082642, "logits_per_char": -0.7626350522041321, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 781, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9736332893371582, "incorrect_loss_raw": 1.637437105178833, "correct_loss_per_char": 0.4868166446685791, "incorrect_loss_per_char": 0.8187185525894165, "correct_loss_per_token": 0.9736332893371582, "incorrect_loss_per_token": 1.637437105178833, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9736332893371582, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": true, "logits_per_token": -0.9736332893371582, "logits_per_char": -0.4868166446685791, "num_chars": 2}, {"sum_logits": -1.2101819515228271, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -1.2101819515228271, "logits_per_char": -0.6050909757614136, "num_chars": 2}, {"sum_logits": -1.9329984188079834, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -1.9329984188079834, "logits_per_char": -0.9664992094039917, "num_chars": 2}, {"sum_logits": -1.7691309452056885, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -1.7691309452056885, "logits_per_char": -0.8845654726028442, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 782, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4852076768875122, "incorrect_loss_raw": 1.391469677289327, "correct_loss_per_char": 0.7426038384437561, "incorrect_loss_per_char": 0.6957348386446635, "correct_loss_per_token": 1.4852076768875122, "incorrect_loss_per_token": 1.391469677289327, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0856075286865234, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": true, "logits_per_token": -1.0856075286865234, "logits_per_char": -0.5428037643432617, "num_chars": 2}, {"sum_logits": -1.4251047372817993, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.4251047372817993, "logits_per_char": -0.7125523686408997, "num_chars": 2}, {"sum_logits": -1.6636967658996582, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.6636967658996582, "logits_per_char": -0.8318483829498291, "num_chars": 2}, {"sum_logits": -1.4852076768875122, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.4852076768875122, "logits_per_char": -0.7426038384437561, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 783, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6449246406555176, "incorrect_loss_raw": 1.3991214632987976, "correct_loss_per_char": 0.8224623203277588, "incorrect_loss_per_char": 0.6995607316493988, "correct_loss_per_token": 1.6449246406555176, "incorrect_loss_per_token": 1.3991214632987976, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9818688035011292, "num_tokens": 1, "num_tokens_all": 1053, "is_greedy": true, "logits_per_token": -0.9818688035011292, "logits_per_char": -0.4909344017505646, "num_chars": 2}, {"sum_logits": -1.277084231376648, "num_tokens": 1, "num_tokens_all": 1053, "is_greedy": false, "logits_per_token": -1.277084231376648, "logits_per_char": -0.638542115688324, "num_chars": 2}, {"sum_logits": -1.9384113550186157, "num_tokens": 1, "num_tokens_all": 1053, "is_greedy": false, "logits_per_token": -1.9384113550186157, "logits_per_char": -0.9692056775093079, "num_chars": 2}, {"sum_logits": -1.6449246406555176, "num_tokens": 1, "num_tokens_all": 1053, "is_greedy": false, "logits_per_token": -1.6449246406555176, "logits_per_char": -0.8224623203277588, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 784, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5897998809814453, "incorrect_loss_raw": 1.3887864748636882, "correct_loss_per_char": 0.7948999404907227, "incorrect_loss_per_char": 0.6943932374318441, "correct_loss_per_token": 1.5897998809814453, "incorrect_loss_per_token": 1.3887864748636882, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.064824104309082, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": true, "logits_per_token": -1.064824104309082, "logits_per_char": -0.532412052154541, "num_chars": 2}, {"sum_logits": -1.2580451965332031, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.2580451965332031, "logits_per_char": -0.6290225982666016, "num_chars": 2}, {"sum_logits": -1.8434901237487793, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.8434901237487793, "logits_per_char": -0.9217450618743896, "num_chars": 2}, {"sum_logits": -1.5897998809814453, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.5897998809814453, "logits_per_char": -0.7948999404907227, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 785, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.046749234199524, "incorrect_loss_raw": 1.6141211191813152, "correct_loss_per_char": 0.523374617099762, "incorrect_loss_per_char": 0.8070605595906576, "correct_loss_per_token": 1.046749234199524, "incorrect_loss_per_token": 1.6141211191813152, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.046749234199524, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": true, "logits_per_token": -1.046749234199524, "logits_per_char": -0.523374617099762, "num_chars": 2}, {"sum_logits": -1.1317387819290161, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.1317387819290161, "logits_per_char": -0.5658693909645081, "num_chars": 2}, {"sum_logits": -2.0578317642211914, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -2.0578317642211914, "logits_per_char": -1.0289158821105957, "num_chars": 2}, {"sum_logits": -1.6527928113937378, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.6527928113937378, "logits_per_char": -0.8263964056968689, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 786, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.671641230583191, "incorrect_loss_raw": 1.3265273173650105, "correct_loss_per_char": 0.8358206152915955, "incorrect_loss_per_char": 0.6632636586825053, "correct_loss_per_token": 1.671641230583191, "incorrect_loss_per_token": 1.3265273173650105, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3398475646972656, "num_tokens": 1, "num_tokens_all": 980, "is_greedy": false, "logits_per_token": -1.3398475646972656, "logits_per_char": -0.6699237823486328, "num_chars": 2}, {"sum_logits": -1.143922209739685, "num_tokens": 1, "num_tokens_all": 980, "is_greedy": true, "logits_per_token": -1.143922209739685, "logits_per_char": -0.5719611048698425, "num_chars": 2}, {"sum_logits": -1.671641230583191, "num_tokens": 1, "num_tokens_all": 980, "is_greedy": false, "logits_per_token": -1.671641230583191, "logits_per_char": -0.8358206152915955, "num_chars": 2}, {"sum_logits": -1.495812177658081, "num_tokens": 1, "num_tokens_all": 980, "is_greedy": false, "logits_per_token": -1.495812177658081, "logits_per_char": -0.7479060888290405, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 787, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4715710878372192, "incorrect_loss_raw": 1.3937604824701946, "correct_loss_per_char": 0.7357855439186096, "incorrect_loss_per_char": 0.6968802412350973, "correct_loss_per_token": 1.4715710878372192, "incorrect_loss_per_token": 1.3937604824701946, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0931428670883179, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -1.0931428670883179, "logits_per_char": -0.5465714335441589, "num_chars": 2}, {"sum_logits": -1.4715710878372192, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.4715710878372192, "logits_per_char": -0.7357855439186096, "num_chars": 2}, {"sum_logits": -1.6448091268539429, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.6448091268539429, "logits_per_char": -0.8224045634269714, "num_chars": 2}, {"sum_logits": -1.4433294534683228, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.4433294534683228, "logits_per_char": -0.7216647267341614, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 788, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8063409328460693, "incorrect_loss_raw": 1.3068934679031372, "correct_loss_per_char": 0.9031704664230347, "incorrect_loss_per_char": 0.6534467339515686, "correct_loss_per_token": 1.8063409328460693, "incorrect_loss_per_token": 1.3068934679031372, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0888326168060303, "num_tokens": 1, "num_tokens_all": 1059, "is_greedy": true, "logits_per_token": -1.0888326168060303, "logits_per_char": -0.5444163084030151, "num_chars": 2}, {"sum_logits": -1.2593045234680176, "num_tokens": 1, "num_tokens_all": 1059, "is_greedy": false, "logits_per_token": -1.2593045234680176, "logits_per_char": -0.6296522617340088, "num_chars": 2}, {"sum_logits": -1.8063409328460693, "num_tokens": 1, "num_tokens_all": 1059, "is_greedy": false, "logits_per_token": -1.8063409328460693, "logits_per_char": -0.9031704664230347, "num_chars": 2}, {"sum_logits": -1.5725432634353638, "num_tokens": 1, "num_tokens_all": 1059, "is_greedy": false, "logits_per_token": -1.5725432634353638, "logits_per_char": -0.7862716317176819, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 789, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.876305103302002, "incorrect_loss_raw": 1.29365070660909, "correct_loss_per_char": 0.938152551651001, "incorrect_loss_per_char": 0.646825353304545, "correct_loss_per_token": 1.876305103302002, "incorrect_loss_per_token": 1.29365070660909, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1202337741851807, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": true, "logits_per_token": -1.1202337741851807, "logits_per_char": -0.5601168870925903, "num_chars": 2}, {"sum_logits": -1.1840391159057617, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": false, "logits_per_token": -1.1840391159057617, "logits_per_char": -0.5920195579528809, "num_chars": 2}, {"sum_logits": -1.876305103302002, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": false, "logits_per_token": -1.876305103302002, "logits_per_char": -0.938152551651001, "num_chars": 2}, {"sum_logits": -1.5766792297363281, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": false, "logits_per_token": -1.5766792297363281, "logits_per_char": -0.7883396148681641, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 790, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7900201082229614, "incorrect_loss_raw": 1.3136155207951863, "correct_loss_per_char": 0.8950100541114807, "incorrect_loss_per_char": 0.6568077603975931, "correct_loss_per_token": 1.7900201082229614, "incorrect_loss_per_token": 1.3136155207951863, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1395941972732544, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": true, "logits_per_token": -1.1395941972732544, "logits_per_char": -0.5697970986366272, "num_chars": 2}, {"sum_logits": -1.179882287979126, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -1.179882287979126, "logits_per_char": -0.589941143989563, "num_chars": 2}, {"sum_logits": -1.7900201082229614, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -1.7900201082229614, "logits_per_char": -0.8950100541114807, "num_chars": 2}, {"sum_logits": -1.6213700771331787, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -1.6213700771331787, "logits_per_char": -0.8106850385665894, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 791, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3249940872192383, "incorrect_loss_raw": 1.4411649306615193, "correct_loss_per_char": 0.6624970436096191, "incorrect_loss_per_char": 0.7205824653307596, "correct_loss_per_token": 1.3249940872192383, "incorrect_loss_per_token": 1.4411649306615193, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1745620965957642, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": true, "logits_per_token": -1.1745620965957642, "logits_per_char": -0.5872810482978821, "num_chars": 2}, {"sum_logits": -1.3249940872192383, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -1.3249940872192383, "logits_per_char": -0.6624970436096191, "num_chars": 2}, {"sum_logits": -1.6924175024032593, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -1.6924175024032593, "logits_per_char": -0.8462087512016296, "num_chars": 2}, {"sum_logits": -1.4565151929855347, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -1.4565151929855347, "logits_per_char": -0.7282575964927673, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 792, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1038918495178223, "incorrect_loss_raw": 1.5536680221557617, "correct_loss_per_char": 0.5519459247589111, "incorrect_loss_per_char": 0.7768340110778809, "correct_loss_per_token": 1.1038918495178223, "incorrect_loss_per_token": 1.5536680221557617, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1038918495178223, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": true, "logits_per_token": -1.1038918495178223, "logits_per_char": -0.5519459247589111, "num_chars": 2}, {"sum_logits": -1.2138917446136475, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.2138917446136475, "logits_per_char": -0.6069458723068237, "num_chars": 2}, {"sum_logits": -1.906229019165039, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.906229019165039, "logits_per_char": -0.9531145095825195, "num_chars": 2}, {"sum_logits": -1.5408833026885986, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.5408833026885986, "logits_per_char": -0.7704416513442993, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 793, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5738388299942017, "incorrect_loss_raw": 1.371799906094869, "correct_loss_per_char": 0.7869194149971008, "incorrect_loss_per_char": 0.6858999530474345, "correct_loss_per_token": 1.5738388299942017, "incorrect_loss_per_token": 1.371799906094869, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1413649320602417, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": true, "logits_per_token": -1.1413649320602417, "logits_per_char": -0.5706824660301208, "num_chars": 2}, {"sum_logits": -1.2438206672668457, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": false, "logits_per_token": -1.2438206672668457, "logits_per_char": -0.6219103336334229, "num_chars": 2}, {"sum_logits": -1.7302141189575195, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": false, "logits_per_token": -1.7302141189575195, "logits_per_char": -0.8651070594787598, "num_chars": 2}, {"sum_logits": -1.5738388299942017, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": false, "logits_per_token": -1.5738388299942017, "logits_per_char": -0.7869194149971008, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 794, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.900359869003296, "incorrect_loss_raw": 1.2849667072296143, "correct_loss_per_char": 0.950179934501648, "incorrect_loss_per_char": 0.6424833536148071, "correct_loss_per_token": 1.900359869003296, "incorrect_loss_per_token": 1.2849667072296143, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0856726169586182, "num_tokens": 1, "num_tokens_all": 1048, "is_greedy": true, "logits_per_token": -1.0856726169586182, "logits_per_char": -0.5428363084793091, "num_chars": 2}, {"sum_logits": -1.2513266801834106, "num_tokens": 1, "num_tokens_all": 1048, "is_greedy": false, "logits_per_token": -1.2513266801834106, "logits_per_char": -0.6256633400917053, "num_chars": 2}, {"sum_logits": -1.900359869003296, "num_tokens": 1, "num_tokens_all": 1048, "is_greedy": false, "logits_per_token": -1.900359869003296, "logits_per_char": -0.950179934501648, "num_chars": 2}, {"sum_logits": -1.517900824546814, "num_tokens": 1, "num_tokens_all": 1048, "is_greedy": false, "logits_per_token": -1.517900824546814, "logits_per_char": -0.758950412273407, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 795, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.540272831916809, "incorrect_loss_raw": 1.3725944757461548, "correct_loss_per_char": 0.7701364159584045, "incorrect_loss_per_char": 0.6862972378730774, "correct_loss_per_token": 1.540272831916809, "incorrect_loss_per_token": 1.3725944757461548, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2031794786453247, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": true, "logits_per_token": -1.2031794786453247, "logits_per_char": -0.6015897393226624, "num_chars": 2}, {"sum_logits": -1.2520161867141724, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": false, "logits_per_token": -1.2520161867141724, "logits_per_char": -0.6260080933570862, "num_chars": 2}, {"sum_logits": -1.6625877618789673, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": false, "logits_per_token": -1.6625877618789673, "logits_per_char": -0.8312938809394836, "num_chars": 2}, {"sum_logits": -1.540272831916809, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": false, "logits_per_token": -1.540272831916809, "logits_per_char": -0.7701364159584045, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 796, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5606523752212524, "incorrect_loss_raw": 1.400521953900655, "correct_loss_per_char": 0.7803261876106262, "incorrect_loss_per_char": 0.7002609769503275, "correct_loss_per_token": 1.5606523752212524, "incorrect_loss_per_token": 1.400521953900655, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1413419246673584, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": true, "logits_per_token": -1.1413419246673584, "logits_per_char": -0.5706709623336792, "num_chars": 2}, {"sum_logits": -1.1686129570007324, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": false, "logits_per_token": -1.1686129570007324, "logits_per_char": -0.5843064785003662, "num_chars": 2}, {"sum_logits": -1.8916109800338745, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": false, "logits_per_token": -1.8916109800338745, "logits_per_char": -0.9458054900169373, "num_chars": 2}, {"sum_logits": -1.5606523752212524, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": false, "logits_per_token": -1.5606523752212524, "logits_per_char": -0.7803261876106262, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 797, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2019155025482178, "incorrect_loss_raw": 1.5253077348073323, "correct_loss_per_char": 0.6009577512741089, "incorrect_loss_per_char": 0.7626538674036661, "correct_loss_per_token": 1.2019155025482178, "incorrect_loss_per_token": 1.5253077348073323, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.083653211593628, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": true, "logits_per_token": -1.083653211593628, "logits_per_char": -0.541826605796814, "num_chars": 2}, {"sum_logits": -1.2019155025482178, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.2019155025482178, "logits_per_char": -0.6009577512741089, "num_chars": 2}, {"sum_logits": -1.8807213306427002, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.8807213306427002, "logits_per_char": -0.9403606653213501, "num_chars": 2}, {"sum_logits": -1.611548662185669, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.611548662185669, "logits_per_char": -0.8057743310928345, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 798, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.717687726020813, "incorrect_loss_raw": 1.332047661145528, "correct_loss_per_char": 0.8588438630104065, "incorrect_loss_per_char": 0.666023830572764, "correct_loss_per_token": 1.717687726020813, "incorrect_loss_per_token": 1.332047661145528, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0888395309448242, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": true, "logits_per_token": -1.0888395309448242, "logits_per_char": -0.5444197654724121, "num_chars": 2}, {"sum_logits": -1.2582066059112549, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": false, "logits_per_token": -1.2582066059112549, "logits_per_char": -0.6291033029556274, "num_chars": 2}, {"sum_logits": -1.6490968465805054, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": false, "logits_per_token": -1.6490968465805054, "logits_per_char": -0.8245484232902527, "num_chars": 2}, {"sum_logits": -1.717687726020813, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": false, "logits_per_token": -1.717687726020813, "logits_per_char": -0.8588438630104065, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 799, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.517105221748352, "incorrect_loss_raw": 1.3816285928090413, "correct_loss_per_char": 0.758552610874176, "incorrect_loss_per_char": 0.6908142964045206, "correct_loss_per_token": 1.517105221748352, "incorrect_loss_per_token": 1.3816285928090413, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2282196283340454, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": false, "logits_per_token": -1.2282196283340454, "logits_per_char": -0.6141098141670227, "num_chars": 2}, {"sum_logits": -1.2212083339691162, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": true, "logits_per_token": -1.2212083339691162, "logits_per_char": -0.6106041669845581, "num_chars": 2}, {"sum_logits": -1.6954578161239624, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": false, "logits_per_token": -1.6954578161239624, "logits_per_char": -0.8477289080619812, "num_chars": 2}, {"sum_logits": -1.517105221748352, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": false, "logits_per_token": -1.517105221748352, "logits_per_char": -0.758552610874176, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 800, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5855824947357178, "incorrect_loss_raw": 1.3682742516199748, "correct_loss_per_char": 0.7927912473678589, "incorrect_loss_per_char": 0.6841371258099874, "correct_loss_per_token": 1.5855824947357178, "incorrect_loss_per_token": 1.3682742516199748, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1783679723739624, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": true, "logits_per_token": -1.1783679723739624, "logits_per_char": -0.5891839861869812, "num_chars": 2}, {"sum_logits": -1.1987239122390747, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": false, "logits_per_token": -1.1987239122390747, "logits_per_char": -0.5993619561195374, "num_chars": 2}, {"sum_logits": -1.7277308702468872, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": false, "logits_per_token": -1.7277308702468872, "logits_per_char": -0.8638654351234436, "num_chars": 2}, {"sum_logits": -1.5855824947357178, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": false, "logits_per_token": -1.5855824947357178, "logits_per_char": -0.7927912473678589, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 801, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6913281679153442, "incorrect_loss_raw": 1.3210761149724324, "correct_loss_per_char": 0.8456640839576721, "incorrect_loss_per_char": 0.6605380574862162, "correct_loss_per_token": 1.6913281679153442, "incorrect_loss_per_token": 1.3210761149724324, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.140260934829712, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": true, "logits_per_token": -1.140260934829712, "logits_per_char": -0.570130467414856, "num_chars": 2}, {"sum_logits": -1.4324711561203003, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": false, "logits_per_token": -1.4324711561203003, "logits_per_char": -0.7162355780601501, "num_chars": 2}, {"sum_logits": -1.6913281679153442, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": false, "logits_per_token": -1.6913281679153442, "logits_per_char": -0.8456640839576721, "num_chars": 2}, {"sum_logits": -1.3904962539672852, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": false, "logits_per_token": -1.3904962539672852, "logits_per_char": -0.6952481269836426, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 802, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2161391973495483, "incorrect_loss_raw": 1.4856454531351726, "correct_loss_per_char": 0.6080695986747742, "incorrect_loss_per_char": 0.7428227265675863, "correct_loss_per_token": 1.2161391973495483, "incorrect_loss_per_token": 1.4856454531351726, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2502368688583374, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.2502368688583374, "logits_per_char": -0.6251184344291687, "num_chars": 2}, {"sum_logits": -1.2161391973495483, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": true, "logits_per_token": -1.2161391973495483, "logits_per_char": -0.6080695986747742, "num_chars": 2}, {"sum_logits": -1.775514841079712, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.775514841079712, "logits_per_char": -0.887757420539856, "num_chars": 2}, {"sum_logits": -1.4311846494674683, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.4311846494674683, "logits_per_char": -0.7155923247337341, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 803, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1702287197113037, "incorrect_loss_raw": 1.5563157002131145, "correct_loss_per_char": 0.5851143598556519, "incorrect_loss_per_char": 0.7781578501065572, "correct_loss_per_token": 1.1702287197113037, "incorrect_loss_per_token": 1.5563157002131145, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0427405834197998, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": true, "logits_per_token": -1.0427405834197998, "logits_per_char": -0.5213702917098999, "num_chars": 2}, {"sum_logits": -1.1702287197113037, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.1702287197113037, "logits_per_char": -0.5851143598556519, "num_chars": 2}, {"sum_logits": -1.9217768907546997, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.9217768907546997, "logits_per_char": -0.9608884453773499, "num_chars": 2}, {"sum_logits": -1.7044296264648438, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.7044296264648438, "logits_per_char": -0.8522148132324219, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 804, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1486586332321167, "incorrect_loss_raw": 1.5320660670598347, "correct_loss_per_char": 0.5743293166160583, "incorrect_loss_per_char": 0.7660330335299174, "correct_loss_per_token": 1.1486586332321167, "incorrect_loss_per_token": 1.5320660670598347, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1726939678192139, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.1726939678192139, "logits_per_char": -0.5863469839096069, "num_chars": 2}, {"sum_logits": -1.1486586332321167, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": true, "logits_per_token": -1.1486586332321167, "logits_per_char": -0.5743293166160583, "num_chars": 2}, {"sum_logits": -1.8732576370239258, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.8732576370239258, "logits_per_char": -0.9366288185119629, "num_chars": 2}, {"sum_logits": -1.5502465963363647, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.5502465963363647, "logits_per_char": -0.7751232981681824, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 805, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9850561618804932, "incorrect_loss_raw": 1.2667141358057659, "correct_loss_per_char": 0.9925280809402466, "incorrect_loss_per_char": 0.6333570679028829, "correct_loss_per_token": 1.9850561618804932, "incorrect_loss_per_token": 1.2667141358057659, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.114234447479248, "num_tokens": 1, "num_tokens_all": 1112, "is_greedy": true, "logits_per_token": -1.114234447479248, "logits_per_char": -0.557117223739624, "num_chars": 2}, {"sum_logits": -1.2240396738052368, "num_tokens": 1, "num_tokens_all": 1112, "is_greedy": false, "logits_per_token": -1.2240396738052368, "logits_per_char": -0.6120198369026184, "num_chars": 2}, {"sum_logits": -1.9850561618804932, "num_tokens": 1, "num_tokens_all": 1112, "is_greedy": false, "logits_per_token": -1.9850561618804932, "logits_per_char": -0.9925280809402466, "num_chars": 2}, {"sum_logits": -1.4618682861328125, "num_tokens": 1, "num_tokens_all": 1112, "is_greedy": false, "logits_per_token": -1.4618682861328125, "logits_per_char": -0.7309341430664062, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 806, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6995820999145508, "incorrect_loss_raw": 1.3672996759414673, "correct_loss_per_char": 0.8497910499572754, "incorrect_loss_per_char": 0.6836498379707336, "correct_loss_per_token": 1.6995820999145508, "incorrect_loss_per_token": 1.3672996759414673, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0488455295562744, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": true, "logits_per_token": -1.0488455295562744, "logits_per_char": -0.5244227647781372, "num_chars": 2}, {"sum_logits": -1.214341163635254, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.214341163635254, "logits_per_char": -0.607170581817627, "num_chars": 2}, {"sum_logits": -1.8387123346328735, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.8387123346328735, "logits_per_char": -0.9193561673164368, "num_chars": 2}, {"sum_logits": -1.6995820999145508, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.6995820999145508, "logits_per_char": -0.8497910499572754, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 807, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7619606256484985, "incorrect_loss_raw": 1.325421651204427, "correct_loss_per_char": 0.8809803128242493, "incorrect_loss_per_char": 0.6627108256022135, "correct_loss_per_token": 1.7619606256484985, "incorrect_loss_per_token": 1.325421651204427, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1207051277160645, "num_tokens": 1, "num_tokens_all": 1066, "is_greedy": true, "logits_per_token": -1.1207051277160645, "logits_per_char": -0.5603525638580322, "num_chars": 2}, {"sum_logits": -1.1754153966903687, "num_tokens": 1, "num_tokens_all": 1066, "is_greedy": false, "logits_per_token": -1.1754153966903687, "logits_per_char": -0.5877076983451843, "num_chars": 2}, {"sum_logits": -1.7619606256484985, "num_tokens": 1, "num_tokens_all": 1066, "is_greedy": false, "logits_per_token": -1.7619606256484985, "logits_per_char": -0.8809803128242493, "num_chars": 2}, {"sum_logits": -1.6801444292068481, "num_tokens": 1, "num_tokens_all": 1066, "is_greedy": false, "logits_per_token": -1.6801444292068481, "logits_per_char": -0.8400722146034241, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 808, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.183474063873291, "incorrect_loss_raw": 1.5082582235336304, "correct_loss_per_char": 0.5917370319366455, "incorrect_loss_per_char": 0.7541291117668152, "correct_loss_per_token": 1.183474063873291, "incorrect_loss_per_token": 1.5082582235336304, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.183474063873291, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": true, "logits_per_token": -1.183474063873291, "logits_per_char": -0.5917370319366455, "num_chars": 2}, {"sum_logits": -1.216116189956665, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.216116189956665, "logits_per_char": -0.6080580949783325, "num_chars": 2}, {"sum_logits": -1.8197691440582275, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.8197691440582275, "logits_per_char": -0.9098845720291138, "num_chars": 2}, {"sum_logits": -1.4888893365859985, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.4888893365859985, "logits_per_char": -0.7444446682929993, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 809, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1591870784759521, "incorrect_loss_raw": 1.554584304491679, "correct_loss_per_char": 0.5795935392379761, "incorrect_loss_per_char": 0.7772921522458395, "correct_loss_per_token": 1.1591870784759521, "incorrect_loss_per_token": 1.554584304491679, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0934717655181885, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": true, "logits_per_token": -1.0934717655181885, "logits_per_char": -0.5467358827590942, "num_chars": 2}, {"sum_logits": -1.1591870784759521, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.1591870784759521, "logits_per_char": -0.5795935392379761, "num_chars": 2}, {"sum_logits": -1.9861191511154175, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.9861191511154175, "logits_per_char": -0.9930595755577087, "num_chars": 2}, {"sum_logits": -1.5841619968414307, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.5841619968414307, "logits_per_char": -0.7920809984207153, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 810, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0081205368041992, "incorrect_loss_raw": 1.6161329746246338, "correct_loss_per_char": 0.5040602684020996, "incorrect_loss_per_char": 0.8080664873123169, "correct_loss_per_token": 1.0081205368041992, "incorrect_loss_per_token": 1.6161329746246338, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0081205368041992, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": true, "logits_per_token": -1.0081205368041992, "logits_per_char": -0.5040602684020996, "num_chars": 2}, {"sum_logits": -1.219366431236267, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.219366431236267, "logits_per_char": -0.6096832156181335, "num_chars": 2}, {"sum_logits": -1.977327585220337, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.977327585220337, "logits_per_char": -0.9886637926101685, "num_chars": 2}, {"sum_logits": -1.6517049074172974, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.6517049074172974, "logits_per_char": -0.8258524537086487, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 811, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6578863859176636, "incorrect_loss_raw": 1.3368050654729207, "correct_loss_per_char": 0.8289431929588318, "incorrect_loss_per_char": 0.6684025327364603, "correct_loss_per_token": 1.6578863859176636, "incorrect_loss_per_token": 1.3368050654729207, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1132113933563232, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": true, "logits_per_token": -1.1132113933563232, "logits_per_char": -0.5566056966781616, "num_chars": 2}, {"sum_logits": -1.3662294149398804, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.3662294149398804, "logits_per_char": -0.6831147074699402, "num_chars": 2}, {"sum_logits": -1.6578863859176636, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.6578863859176636, "logits_per_char": -0.8289431929588318, "num_chars": 2}, {"sum_logits": -1.5309743881225586, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.5309743881225586, "logits_per_char": -0.7654871940612793, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 812, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1019362211227417, "incorrect_loss_raw": 1.567334810892741, "correct_loss_per_char": 0.5509681105613708, "incorrect_loss_per_char": 0.7836674054463705, "correct_loss_per_token": 1.1019362211227417, "incorrect_loss_per_token": 1.567334810892741, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1019362211227417, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": true, "logits_per_token": -1.1019362211227417, "logits_per_char": -0.5509681105613708, "num_chars": 2}, {"sum_logits": -1.1562371253967285, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.1562371253967285, "logits_per_char": -0.5781185626983643, "num_chars": 2}, {"sum_logits": -1.9138469696044922, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.9138469696044922, "logits_per_char": -0.9569234848022461, "num_chars": 2}, {"sum_logits": -1.631920337677002, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.631920337677002, "logits_per_char": -0.815960168838501, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 813, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0013724565505981, "incorrect_loss_raw": 1.6012026468912761, "correct_loss_per_char": 0.5006862282752991, "incorrect_loss_per_char": 0.8006013234456381, "correct_loss_per_token": 1.0013724565505981, "incorrect_loss_per_token": 1.6012026468912761, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0013724565505981, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": true, "logits_per_token": -1.0013724565505981, "logits_per_char": -0.5006862282752991, "num_chars": 2}, {"sum_logits": -1.309441328048706, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.309441328048706, "logits_per_char": -0.654720664024353, "num_chars": 2}, {"sum_logits": -1.812917709350586, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.812917709350586, "logits_per_char": -0.906458854675293, "num_chars": 2}, {"sum_logits": -1.6812489032745361, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.6812489032745361, "logits_per_char": -0.8406244516372681, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 814, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.150107502937317, "incorrect_loss_raw": 1.5247783660888672, "correct_loss_per_char": 0.5750537514686584, "incorrect_loss_per_char": 0.7623891830444336, "correct_loss_per_token": 1.150107502937317, "incorrect_loss_per_token": 1.5247783660888672, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.150107502937317, "num_tokens": 1, "num_tokens_all": 1052, "is_greedy": true, "logits_per_token": -1.150107502937317, "logits_per_char": -0.5750537514686584, "num_chars": 2}, {"sum_logits": -1.1867954730987549, "num_tokens": 1, "num_tokens_all": 1052, "is_greedy": false, "logits_per_token": -1.1867954730987549, "logits_per_char": -0.5933977365493774, "num_chars": 2}, {"sum_logits": -1.8372420072555542, "num_tokens": 1, "num_tokens_all": 1052, "is_greedy": false, "logits_per_token": -1.8372420072555542, "logits_per_char": -0.9186210036277771, "num_chars": 2}, {"sum_logits": -1.5502976179122925, "num_tokens": 1, "num_tokens_all": 1052, "is_greedy": false, "logits_per_token": -1.5502976179122925, "logits_per_char": -0.7751488089561462, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 815, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.194333553314209, "incorrect_loss_raw": 1.4759031136830647, "correct_loss_per_char": 0.5971667766571045, "incorrect_loss_per_char": 0.7379515568415324, "correct_loss_per_token": 1.194333553314209, "incorrect_loss_per_token": 1.4759031136830647, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3567023277282715, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": false, "logits_per_token": -1.3567023277282715, "logits_per_char": -0.6783511638641357, "num_chars": 2}, {"sum_logits": -1.194333553314209, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": true, "logits_per_token": -1.194333553314209, "logits_per_char": -0.5971667766571045, "num_chars": 2}, {"sum_logits": -1.5984363555908203, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": false, "logits_per_token": -1.5984363555908203, "logits_per_char": -0.7992181777954102, "num_chars": 2}, {"sum_logits": -1.4725706577301025, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": false, "logits_per_token": -1.4725706577301025, "logits_per_char": -0.7362853288650513, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 816, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1368372440338135, "incorrect_loss_raw": 1.5416026910146077, "correct_loss_per_char": 0.5684186220169067, "incorrect_loss_per_char": 0.7708013455073038, "correct_loss_per_token": 1.1368372440338135, "incorrect_loss_per_token": 1.5416026910146077, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1368372440338135, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": true, "logits_per_token": -1.1368372440338135, "logits_per_char": -0.5684186220169067, "num_chars": 2}, {"sum_logits": -1.2213584184646606, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.2213584184646606, "logits_per_char": -0.6106792092323303, "num_chars": 2}, {"sum_logits": -1.955925703048706, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.955925703048706, "logits_per_char": -0.977962851524353, "num_chars": 2}, {"sum_logits": -1.4475239515304565, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.4475239515304565, "logits_per_char": -0.7237619757652283, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 817, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1468956470489502, "incorrect_loss_raw": 1.5010958115259807, "correct_loss_per_char": 0.5734478235244751, "incorrect_loss_per_char": 0.7505479057629904, "correct_loss_per_token": 1.1468956470489502, "incorrect_loss_per_token": 1.5010958115259807, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1468956470489502, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": true, "logits_per_token": -1.1468956470489502, "logits_per_char": -0.5734478235244751, "num_chars": 2}, {"sum_logits": -1.3525899648666382, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.3525899648666382, "logits_per_char": -0.6762949824333191, "num_chars": 2}, {"sum_logits": -1.6349480152130127, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.6349480152130127, "logits_per_char": -0.8174740076065063, "num_chars": 2}, {"sum_logits": -1.515749454498291, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.515749454498291, "logits_per_char": -0.7578747272491455, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 818, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6735273599624634, "incorrect_loss_raw": 1.4024241367975872, "correct_loss_per_char": 0.8367636799812317, "incorrect_loss_per_char": 0.7012120683987936, "correct_loss_per_token": 1.6735273599624634, "incorrect_loss_per_token": 1.4024241367975872, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9879587888717651, "num_tokens": 1, "num_tokens_all": 1098, "is_greedy": true, "logits_per_token": -0.9879587888717651, "logits_per_char": -0.49397939443588257, "num_chars": 2}, {"sum_logits": -1.2181768417358398, "num_tokens": 1, "num_tokens_all": 1098, "is_greedy": false, "logits_per_token": -1.2181768417358398, "logits_per_char": -0.6090884208679199, "num_chars": 2}, {"sum_logits": -2.0011367797851562, "num_tokens": 1, "num_tokens_all": 1098, "is_greedy": false, "logits_per_token": -2.0011367797851562, "logits_per_char": -1.0005683898925781, "num_chars": 2}, {"sum_logits": -1.6735273599624634, "num_tokens": 1, "num_tokens_all": 1098, "is_greedy": false, "logits_per_token": -1.6735273599624634, "logits_per_char": -0.8367636799812317, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 819, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2029030323028564, "incorrect_loss_raw": 1.5190032720565796, "correct_loss_per_char": 0.6014515161514282, "incorrect_loss_per_char": 0.7595016360282898, "correct_loss_per_token": 1.2029030323028564, "incorrect_loss_per_token": 1.5190032720565796, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1057894229888916, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": true, "logits_per_token": -1.1057894229888916, "logits_per_char": -0.5528947114944458, "num_chars": 2}, {"sum_logits": -1.2029030323028564, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.2029030323028564, "logits_per_char": -0.6014515161514282, "num_chars": 2}, {"sum_logits": -1.8844540119171143, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.8844540119171143, "logits_per_char": -0.9422270059585571, "num_chars": 2}, {"sum_logits": -1.566766381263733, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.566766381263733, "logits_per_char": -0.7833831906318665, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 820, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.521090030670166, "incorrect_loss_raw": 1.3842743635177612, "correct_loss_per_char": 0.760545015335083, "incorrect_loss_per_char": 0.6921371817588806, "correct_loss_per_token": 1.521090030670166, "incorrect_loss_per_token": 1.3842743635177612, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1598950624465942, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": true, "logits_per_token": -1.1598950624465942, "logits_per_char": -0.5799475312232971, "num_chars": 2}, {"sum_logits": -1.2873637676239014, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.2873637676239014, "logits_per_char": -0.6436818838119507, "num_chars": 2}, {"sum_logits": -1.705564260482788, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.705564260482788, "logits_per_char": -0.852782130241394, "num_chars": 2}, {"sum_logits": -1.521090030670166, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.521090030670166, "logits_per_char": -0.760545015335083, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 821, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9427288770675659, "incorrect_loss_raw": 1.7201478878657024, "correct_loss_per_char": 0.47136443853378296, "incorrect_loss_per_char": 0.8600739439328512, "correct_loss_per_token": 0.9427288770675659, "incorrect_loss_per_token": 1.7201478878657024, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9427288770675659, "num_tokens": 1, "num_tokens_all": 1160, "is_greedy": true, "logits_per_token": -0.9427288770675659, "logits_per_char": -0.47136443853378296, "num_chars": 2}, {"sum_logits": -1.0930795669555664, "num_tokens": 1, "num_tokens_all": 1160, "is_greedy": false, "logits_per_token": -1.0930795669555664, "logits_per_char": -0.5465397834777832, "num_chars": 2}, {"sum_logits": -2.078157901763916, "num_tokens": 1, "num_tokens_all": 1160, "is_greedy": false, "logits_per_token": -2.078157901763916, "logits_per_char": -1.039078950881958, "num_chars": 2}, {"sum_logits": -1.9892061948776245, "num_tokens": 1, "num_tokens_all": 1160, "is_greedy": false, "logits_per_token": -1.9892061948776245, "logits_per_char": -0.9946030974388123, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 822, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0172977447509766, "incorrect_loss_raw": 1.277337908744812, "correct_loss_per_char": 1.0086488723754883, "incorrect_loss_per_char": 0.638668954372406, "correct_loss_per_token": 2.0172977447509766, "incorrect_loss_per_token": 1.277337908744812, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0602182149887085, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": true, "logits_per_token": -1.0602182149887085, "logits_per_char": -0.5301091074943542, "num_chars": 2}, {"sum_logits": -1.1637945175170898, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.1637945175170898, "logits_per_char": -0.5818972587585449, "num_chars": 2}, {"sum_logits": -2.0172977447509766, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -2.0172977447509766, "logits_per_char": -1.0086488723754883, "num_chars": 2}, {"sum_logits": -1.6080009937286377, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.6080009937286377, "logits_per_char": -0.8040004968643188, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 823, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6647599935531616, "incorrect_loss_raw": 1.3716761271158855, "correct_loss_per_char": 0.8323799967765808, "incorrect_loss_per_char": 0.6858380635579427, "correct_loss_per_token": 1.6647599935531616, "incorrect_loss_per_token": 1.3716761271158855, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0782496929168701, "num_tokens": 1, "num_tokens_all": 1057, "is_greedy": true, "logits_per_token": -1.0782496929168701, "logits_per_char": -0.5391248464584351, "num_chars": 2}, {"sum_logits": -1.2328059673309326, "num_tokens": 1, "num_tokens_all": 1057, "is_greedy": false, "logits_per_token": -1.2328059673309326, "logits_per_char": -0.6164029836654663, "num_chars": 2}, {"sum_logits": -1.8039727210998535, "num_tokens": 1, "num_tokens_all": 1057, "is_greedy": false, "logits_per_token": -1.8039727210998535, "logits_per_char": -0.9019863605499268, "num_chars": 2}, {"sum_logits": -1.6647599935531616, "num_tokens": 1, "num_tokens_all": 1057, "is_greedy": false, "logits_per_token": -1.6647599935531616, "logits_per_char": -0.8323799967765808, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 824, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1614505052566528, "incorrect_loss_raw": 1.5306394894917805, "correct_loss_per_char": 0.5807252526283264, "incorrect_loss_per_char": 0.7653197447458903, "correct_loss_per_token": 1.1614505052566528, "incorrect_loss_per_token": 1.5306394894917805, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1614505052566528, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": false, "logits_per_token": -1.1614505052566528, "logits_per_char": -0.5807252526283264, "num_chars": 2}, {"sum_logits": -1.1431554555892944, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": true, "logits_per_token": -1.1431554555892944, "logits_per_char": -0.5715777277946472, "num_chars": 2}, {"sum_logits": -1.8597853183746338, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": false, "logits_per_token": -1.8597853183746338, "logits_per_char": -0.9298926591873169, "num_chars": 2}, {"sum_logits": -1.5889776945114136, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": false, "logits_per_token": -1.5889776945114136, "logits_per_char": -0.7944888472557068, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 825, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9733459949493408, "incorrect_loss_raw": 1.2925682465235393, "correct_loss_per_char": 0.9866729974746704, "incorrect_loss_per_char": 0.6462841232617696, "correct_loss_per_token": 1.9733459949493408, "incorrect_loss_per_token": 1.2925682465235393, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0449175834655762, "num_tokens": 1, "num_tokens_all": 1098, "is_greedy": true, "logits_per_token": -1.0449175834655762, "logits_per_char": -0.5224587917327881, "num_chars": 2}, {"sum_logits": -1.15767502784729, "num_tokens": 1, "num_tokens_all": 1098, "is_greedy": false, "logits_per_token": -1.15767502784729, "logits_per_char": -0.578837513923645, "num_chars": 2}, {"sum_logits": -1.9733459949493408, "num_tokens": 1, "num_tokens_all": 1098, "is_greedy": false, "logits_per_token": -1.9733459949493408, "logits_per_char": -0.9866729974746704, "num_chars": 2}, {"sum_logits": -1.6751121282577515, "num_tokens": 1, "num_tokens_all": 1098, "is_greedy": false, "logits_per_token": -1.6751121282577515, "logits_per_char": -0.8375560641288757, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 826, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.806483268737793, "incorrect_loss_raw": 1.3555911382039387, "correct_loss_per_char": 0.9032416343688965, "incorrect_loss_per_char": 0.6777955691019694, "correct_loss_per_token": 1.806483268737793, "incorrect_loss_per_token": 1.3555911382039387, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.919166088104248, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": true, "logits_per_token": -0.919166088104248, "logits_per_char": -0.459583044052124, "num_chars": 2}, {"sum_logits": -1.3438913822174072, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": false, "logits_per_token": -1.3438913822174072, "logits_per_char": -0.6719456911087036, "num_chars": 2}, {"sum_logits": -1.8037159442901611, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": false, "logits_per_token": -1.8037159442901611, "logits_per_char": -0.9018579721450806, "num_chars": 2}, {"sum_logits": -1.806483268737793, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": false, "logits_per_token": -1.806483268737793, "logits_per_char": -0.9032416343688965, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 827, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.405136227607727, "incorrect_loss_raw": 1.4093978802363079, "correct_loss_per_char": 0.7025681138038635, "incorrect_loss_per_char": 0.7046989401181539, "correct_loss_per_token": 1.405136227607727, "incorrect_loss_per_token": 1.4093978802363079, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2085802555084229, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -1.2085802555084229, "logits_per_char": -0.6042901277542114, "num_chars": 2}, {"sum_logits": -1.405136227607727, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.405136227607727, "logits_per_char": -0.7025681138038635, "num_chars": 2}, {"sum_logits": -1.6986371278762817, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.6986371278762817, "logits_per_char": -0.8493185639381409, "num_chars": 2}, {"sum_logits": -1.3209762573242188, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.3209762573242188, "logits_per_char": -0.6604881286621094, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 828, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2597256898880005, "incorrect_loss_raw": 1.4828710158665974, "correct_loss_per_char": 0.6298628449440002, "incorrect_loss_per_char": 0.7414355079332987, "correct_loss_per_token": 1.2597256898880005, "incorrect_loss_per_token": 1.4828710158665974, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0998060703277588, "num_tokens": 1, "num_tokens_all": 1036, "is_greedy": true, "logits_per_token": -1.0998060703277588, "logits_per_char": -0.5499030351638794, "num_chars": 2}, {"sum_logits": -1.2597256898880005, "num_tokens": 1, "num_tokens_all": 1036, "is_greedy": false, "logits_per_token": -1.2597256898880005, "logits_per_char": -0.6298628449440002, "num_chars": 2}, {"sum_logits": -1.6709052324295044, "num_tokens": 1, "num_tokens_all": 1036, "is_greedy": false, "logits_per_token": -1.6709052324295044, "logits_per_char": -0.8354526162147522, "num_chars": 2}, {"sum_logits": -1.6779017448425293, "num_tokens": 1, "num_tokens_all": 1036, "is_greedy": false, "logits_per_token": -1.6779017448425293, "logits_per_char": -0.8389508724212646, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 829, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0421671867370605, "incorrect_loss_raw": 1.590055783589681, "correct_loss_per_char": 0.5210835933685303, "incorrect_loss_per_char": 0.7950278917948405, "correct_loss_per_token": 1.0421671867370605, "incorrect_loss_per_token": 1.590055783589681, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0421671867370605, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": true, "logits_per_token": -1.0421671867370605, "logits_per_char": -0.5210835933685303, "num_chars": 2}, {"sum_logits": -1.2180774211883545, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.2180774211883545, "logits_per_char": -0.6090387105941772, "num_chars": 2}, {"sum_logits": -1.9409241676330566, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.9409241676330566, "logits_per_char": -0.9704620838165283, "num_chars": 2}, {"sum_logits": -1.6111657619476318, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.6111657619476318, "logits_per_char": -0.8055828809738159, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 830, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.167827844619751, "incorrect_loss_raw": 1.5252414147059123, "correct_loss_per_char": 0.5839139223098755, "incorrect_loss_per_char": 0.7626207073529562, "correct_loss_per_token": 1.167827844619751, "incorrect_loss_per_token": 1.5252414147059123, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.167827844619751, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.167827844619751, "logits_per_char": -0.5839139223098755, "num_chars": 2}, {"sum_logits": -1.1580113172531128, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": true, "logits_per_token": -1.1580113172531128, "logits_per_char": -0.5790056586265564, "num_chars": 2}, {"sum_logits": -1.8715996742248535, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.8715996742248535, "logits_per_char": -0.9357998371124268, "num_chars": 2}, {"sum_logits": -1.5461132526397705, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.5461132526397705, "logits_per_char": -0.7730566263198853, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 831, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5965971946716309, "incorrect_loss_raw": 1.4244690736134846, "correct_loss_per_char": 0.7982985973358154, "incorrect_loss_per_char": 0.7122345368067423, "correct_loss_per_token": 1.5965971946716309, "incorrect_loss_per_token": 1.4244690736134846, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0164971351623535, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": true, "logits_per_token": -1.0164971351623535, "logits_per_char": -0.5082485675811768, "num_chars": 2}, {"sum_logits": -1.212327241897583, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.212327241897583, "logits_per_char": -0.6061636209487915, "num_chars": 2}, {"sum_logits": -2.0445828437805176, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -2.0445828437805176, "logits_per_char": -1.0222914218902588, "num_chars": 2}, {"sum_logits": -1.5965971946716309, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.5965971946716309, "logits_per_char": -0.7982985973358154, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 832, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5803415775299072, "incorrect_loss_raw": 1.4071298440297444, "correct_loss_per_char": 0.7901707887649536, "incorrect_loss_per_char": 0.7035649220148722, "correct_loss_per_token": 1.5803415775299072, "incorrect_loss_per_token": 1.4071298440297444, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.119995355606079, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": true, "logits_per_token": -1.119995355606079, "logits_per_char": -0.5599976778030396, "num_chars": 2}, {"sum_logits": -1.1605629920959473, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.1605629920959473, "logits_per_char": -0.5802814960479736, "num_chars": 2}, {"sum_logits": -1.940831184387207, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.940831184387207, "logits_per_char": -0.9704155921936035, "num_chars": 2}, {"sum_logits": -1.5803415775299072, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.5803415775299072, "logits_per_char": -0.7901707887649536, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 833, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4886729717254639, "incorrect_loss_raw": 1.3835458755493164, "correct_loss_per_char": 0.7443364858627319, "incorrect_loss_per_char": 0.6917729377746582, "correct_loss_per_token": 1.4886729717254639, "incorrect_loss_per_token": 1.3835458755493164, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2365949153900146, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": true, "logits_per_token": -1.2365949153900146, "logits_per_char": -0.6182974576950073, "num_chars": 2}, {"sum_logits": -1.2418694496154785, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": false, "logits_per_token": -1.2418694496154785, "logits_per_char": -0.6209347248077393, "num_chars": 2}, {"sum_logits": -1.672173261642456, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": false, "logits_per_token": -1.672173261642456, "logits_per_char": -0.836086630821228, "num_chars": 2}, {"sum_logits": -1.4886729717254639, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": false, "logits_per_token": -1.4886729717254639, "logits_per_char": -0.7443364858627319, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 834, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0252277851104736, "incorrect_loss_raw": 1.3034368356068928, "correct_loss_per_char": 1.0126138925552368, "incorrect_loss_per_char": 0.6517184178034464, "correct_loss_per_token": 2.0252277851104736, "incorrect_loss_per_token": 1.3034368356068928, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9882886409759521, "num_tokens": 1, "num_tokens_all": 1078, "is_greedy": true, "logits_per_token": -0.9882886409759521, "logits_per_char": -0.4941443204879761, "num_chars": 2}, {"sum_logits": -1.1439437866210938, "num_tokens": 1, "num_tokens_all": 1078, "is_greedy": false, "logits_per_token": -1.1439437866210938, "logits_per_char": -0.5719718933105469, "num_chars": 2}, {"sum_logits": -2.0252277851104736, "num_tokens": 1, "num_tokens_all": 1078, "is_greedy": false, "logits_per_token": -2.0252277851104736, "logits_per_char": -1.0126138925552368, "num_chars": 2}, {"sum_logits": -1.7780780792236328, "num_tokens": 1, "num_tokens_all": 1078, "is_greedy": false, "logits_per_token": -1.7780780792236328, "logits_per_char": -0.8890390396118164, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 835, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1713719367980957, "incorrect_loss_raw": 1.5125234127044678, "correct_loss_per_char": 0.5856859683990479, "incorrect_loss_per_char": 0.7562617063522339, "correct_loss_per_token": 1.1713719367980957, "incorrect_loss_per_token": 1.5125234127044678, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2072609663009644, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.2072609663009644, "logits_per_char": -0.6036304831504822, "num_chars": 2}, {"sum_logits": -1.1713719367980957, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": true, "logits_per_token": -1.1713719367980957, "logits_per_char": -0.5856859683990479, "num_chars": 2}, {"sum_logits": -1.8391615152359009, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.8391615152359009, "logits_per_char": -0.9195807576179504, "num_chars": 2}, {"sum_logits": -1.491147756576538, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.491147756576538, "logits_per_char": -0.745573878288269, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 836, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.013041615486145, "incorrect_loss_raw": 1.6248639424641926, "correct_loss_per_char": 0.5065208077430725, "incorrect_loss_per_char": 0.8124319712320963, "correct_loss_per_token": 1.013041615486145, "incorrect_loss_per_token": 1.6248639424641926, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.013041615486145, "num_tokens": 1, "num_tokens_all": 1120, "is_greedy": true, "logits_per_token": -1.013041615486145, "logits_per_char": -0.5065208077430725, "num_chars": 2}, {"sum_logits": -1.2316179275512695, "num_tokens": 1, "num_tokens_all": 1120, "is_greedy": false, "logits_per_token": -1.2316179275512695, "logits_per_char": -0.6158089637756348, "num_chars": 2}, {"sum_logits": -2.059263229370117, "num_tokens": 1, "num_tokens_all": 1120, "is_greedy": false, "logits_per_token": -2.059263229370117, "logits_per_char": -1.0296316146850586, "num_chars": 2}, {"sum_logits": -1.5837106704711914, "num_tokens": 1, "num_tokens_all": 1120, "is_greedy": false, "logits_per_token": -1.5837106704711914, "logits_per_char": -0.7918553352355957, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 837, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.654367446899414, "incorrect_loss_raw": 1.3859749635060628, "correct_loss_per_char": 0.827183723449707, "incorrect_loss_per_char": 0.6929874817530314, "correct_loss_per_token": 1.654367446899414, "incorrect_loss_per_token": 1.3859749635060628, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1133739948272705, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": true, "logits_per_token": -1.1133739948272705, "logits_per_char": -0.5566869974136353, "num_chars": 2}, {"sum_logits": -1.140031337738037, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": false, "logits_per_token": -1.140031337738037, "logits_per_char": -0.5700156688690186, "num_chars": 2}, {"sum_logits": -1.9045195579528809, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": false, "logits_per_token": -1.9045195579528809, "logits_per_char": -0.9522597789764404, "num_chars": 2}, {"sum_logits": -1.654367446899414, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": false, "logits_per_token": -1.654367446899414, "logits_per_char": -0.827183723449707, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 838, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6603754758834839, "incorrect_loss_raw": 1.3281296491622925, "correct_loss_per_char": 0.8301877379417419, "incorrect_loss_per_char": 0.6640648245811462, "correct_loss_per_token": 1.6603754758834839, "incorrect_loss_per_token": 1.3281296491622925, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1483893394470215, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": true, "logits_per_token": -1.1483893394470215, "logits_per_char": -0.5741946697235107, "num_chars": 2}, {"sum_logits": -1.3819515705108643, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": false, "logits_per_token": -1.3819515705108643, "logits_per_char": -0.6909757852554321, "num_chars": 2}, {"sum_logits": -1.6603754758834839, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": false, "logits_per_token": -1.6603754758834839, "logits_per_char": -0.8301877379417419, "num_chars": 2}, {"sum_logits": -1.4540480375289917, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": false, "logits_per_token": -1.4540480375289917, "logits_per_char": -0.7270240187644958, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 839, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9733234643936157, "incorrect_loss_raw": 1.6609614690144856, "correct_loss_per_char": 0.48666173219680786, "incorrect_loss_per_char": 0.8304807345072428, "correct_loss_per_token": 0.9733234643936157, "incorrect_loss_per_token": 1.6609614690144856, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9733234643936157, "num_tokens": 1, "num_tokens_all": 1125, "is_greedy": true, "logits_per_token": -0.9733234643936157, "logits_per_char": -0.48666173219680786, "num_chars": 2}, {"sum_logits": -1.181049108505249, "num_tokens": 1, "num_tokens_all": 1125, "is_greedy": false, "logits_per_token": -1.181049108505249, "logits_per_char": -0.5905245542526245, "num_chars": 2}, {"sum_logits": -2.123716354370117, "num_tokens": 1, "num_tokens_all": 1125, "is_greedy": false, "logits_per_token": -2.123716354370117, "logits_per_char": -1.0618581771850586, "num_chars": 2}, {"sum_logits": -1.6781189441680908, "num_tokens": 1, "num_tokens_all": 1125, "is_greedy": false, "logits_per_token": -1.6781189441680908, "logits_per_char": -0.8390594720840454, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 840, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0054845809936523, "incorrect_loss_raw": 1.259398341178894, "correct_loss_per_char": 1.0027422904968262, "incorrect_loss_per_char": 0.629699170589447, "correct_loss_per_token": 2.0054845809936523, "incorrect_loss_per_token": 1.259398341178894, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1597496271133423, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": true, "logits_per_token": -1.1597496271133423, "logits_per_char": -0.5798748135566711, "num_chars": 2}, {"sum_logits": -1.169119954109192, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.169119954109192, "logits_per_char": -0.584559977054596, "num_chars": 2}, {"sum_logits": -2.0054845809936523, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -2.0054845809936523, "logits_per_char": -1.0027422904968262, "num_chars": 2}, {"sum_logits": -1.449325442314148, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.449325442314148, "logits_per_char": -0.724662721157074, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 841, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9397414922714233, "incorrect_loss_raw": 1.701566497484843, "correct_loss_per_char": 0.46987074613571167, "incorrect_loss_per_char": 0.8507832487424215, "correct_loss_per_token": 0.9397414922714233, "incorrect_loss_per_token": 1.701566497484843, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9397414922714233, "num_tokens": 1, "num_tokens_all": 1121, "is_greedy": true, "logits_per_token": -0.9397414922714233, "logits_per_char": -0.46987074613571167, "num_chars": 2}, {"sum_logits": -1.1355375051498413, "num_tokens": 1, "num_tokens_all": 1121, "is_greedy": false, "logits_per_token": -1.1355375051498413, "logits_per_char": -0.5677687525749207, "num_chars": 2}, {"sum_logits": -2.0858407020568848, "num_tokens": 1, "num_tokens_all": 1121, "is_greedy": false, "logits_per_token": -2.0858407020568848, "logits_per_char": -1.0429203510284424, "num_chars": 2}, {"sum_logits": -1.8833212852478027, "num_tokens": 1, "num_tokens_all": 1121, "is_greedy": false, "logits_per_token": -1.8833212852478027, "logits_per_char": -0.9416606426239014, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 842, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.055367946624756, "incorrect_loss_raw": 1.3120860656102498, "correct_loss_per_char": 1.027683973312378, "incorrect_loss_per_char": 0.6560430328051249, "correct_loss_per_token": 2.055367946624756, "incorrect_loss_per_token": 1.3120860656102498, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.919182300567627, "num_tokens": 1, "num_tokens_all": 1138, "is_greedy": true, "logits_per_token": -0.919182300567627, "logits_per_char": -0.4595911502838135, "num_chars": 2}, {"sum_logits": -1.2148139476776123, "num_tokens": 1, "num_tokens_all": 1138, "is_greedy": false, "logits_per_token": -1.2148139476776123, "logits_per_char": -0.6074069738388062, "num_chars": 2}, {"sum_logits": -2.055367946624756, "num_tokens": 1, "num_tokens_all": 1138, "is_greedy": false, "logits_per_token": -2.055367946624756, "logits_per_char": -1.027683973312378, "num_chars": 2}, {"sum_logits": -1.8022619485855103, "num_tokens": 1, "num_tokens_all": 1138, "is_greedy": false, "logits_per_token": -1.8022619485855103, "logits_per_char": -0.9011309742927551, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 843, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8337699174880981, "incorrect_loss_raw": 1.2954365015029907, "correct_loss_per_char": 0.9168849587440491, "incorrect_loss_per_char": 0.6477182507514954, "correct_loss_per_token": 1.8337699174880981, "incorrect_loss_per_token": 1.2954365015029907, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1914969682693481, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.1914969682693481, "logits_per_char": -0.5957484841346741, "num_chars": 2}, {"sum_logits": -1.1828150749206543, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": true, "logits_per_token": -1.1828150749206543, "logits_per_char": -0.5914075374603271, "num_chars": 2}, {"sum_logits": -1.8337699174880981, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.8337699174880981, "logits_per_char": -0.9168849587440491, "num_chars": 2}, {"sum_logits": -1.5119974613189697, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.5119974613189697, "logits_per_char": -0.7559987306594849, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 844, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.030064344406128, "incorrect_loss_raw": 1.6236851215362549, "correct_loss_per_char": 0.515032172203064, "incorrect_loss_per_char": 0.8118425607681274, "correct_loss_per_token": 1.030064344406128, "incorrect_loss_per_token": 1.6236851215362549, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.030064344406128, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": true, "logits_per_token": -1.030064344406128, "logits_per_char": -0.515032172203064, "num_chars": 2}, {"sum_logits": -1.1396996974945068, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.1396996974945068, "logits_per_char": -0.5698498487472534, "num_chars": 2}, {"sum_logits": -2.044632911682129, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -2.044632911682129, "logits_per_char": -1.0223164558410645, "num_chars": 2}, {"sum_logits": -1.686722755432129, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.686722755432129, "logits_per_char": -0.8433613777160645, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 845, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7890384197235107, "incorrect_loss_raw": 1.3605212767918904, "correct_loss_per_char": 0.8945192098617554, "incorrect_loss_per_char": 0.6802606383959452, "correct_loss_per_token": 1.7890384197235107, "incorrect_loss_per_token": 1.3605212767918904, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0331424474716187, "num_tokens": 1, "num_tokens_all": 1027, "is_greedy": true, "logits_per_token": -1.0331424474716187, "logits_per_char": -0.5165712237358093, "num_chars": 2}, {"sum_logits": -1.1427173614501953, "num_tokens": 1, "num_tokens_all": 1027, "is_greedy": false, "logits_per_token": -1.1427173614501953, "logits_per_char": -0.5713586807250977, "num_chars": 2}, {"sum_logits": -1.9057040214538574, "num_tokens": 1, "num_tokens_all": 1027, "is_greedy": false, "logits_per_token": -1.9057040214538574, "logits_per_char": -0.9528520107269287, "num_chars": 2}, {"sum_logits": -1.7890384197235107, "num_tokens": 1, "num_tokens_all": 1027, "is_greedy": false, "logits_per_token": -1.7890384197235107, "logits_per_char": -0.8945192098617554, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 846, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1222180128097534, "incorrect_loss_raw": 1.5823113123575847, "correct_loss_per_char": 0.5611090064048767, "incorrect_loss_per_char": 0.7911556561787924, "correct_loss_per_token": 1.1222180128097534, "incorrect_loss_per_token": 1.5823113123575847, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0664708614349365, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": true, "logits_per_token": -1.0664708614349365, "logits_per_char": -0.5332354307174683, "num_chars": 2}, {"sum_logits": -1.1222180128097534, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.1222180128097534, "logits_per_char": -0.5611090064048767, "num_chars": 2}, {"sum_logits": -1.9630634784698486, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.9630634784698486, "logits_per_char": -0.9815317392349243, "num_chars": 2}, {"sum_logits": -1.7173995971679688, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.7173995971679688, "logits_per_char": -0.8586997985839844, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 847, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6250040531158447, "incorrect_loss_raw": 1.404333233833313, "correct_loss_per_char": 0.8125020265579224, "incorrect_loss_per_char": 0.7021666169166565, "correct_loss_per_token": 1.6250040531158447, "incorrect_loss_per_token": 1.404333233833313, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0470528602600098, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": true, "logits_per_token": -1.0470528602600098, "logits_per_char": -0.5235264301300049, "num_chars": 2}, {"sum_logits": -1.1940804719924927, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": false, "logits_per_token": -1.1940804719924927, "logits_per_char": -0.5970402359962463, "num_chars": 2}, {"sum_logits": -1.9718663692474365, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": false, "logits_per_token": -1.9718663692474365, "logits_per_char": -0.9859331846237183, "num_chars": 2}, {"sum_logits": -1.6250040531158447, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": false, "logits_per_token": -1.6250040531158447, "logits_per_char": -0.8125020265579224, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 848, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0505915880203247, "incorrect_loss_raw": 1.5698659022649128, "correct_loss_per_char": 0.5252957940101624, "incorrect_loss_per_char": 0.7849329511324564, "correct_loss_per_token": 1.0505915880203247, "incorrect_loss_per_token": 1.5698659022649128, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0505915880203247, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": true, "logits_per_token": -1.0505915880203247, "logits_per_char": -0.5252957940101624, "num_chars": 2}, {"sum_logits": -1.2745400667190552, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": false, "logits_per_token": -1.2745400667190552, "logits_per_char": -0.6372700333595276, "num_chars": 2}, {"sum_logits": -1.8378984928131104, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": false, "logits_per_token": -1.8378984928131104, "logits_per_char": -0.9189492464065552, "num_chars": 2}, {"sum_logits": -1.5971591472625732, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": false, "logits_per_token": -1.5971591472625732, "logits_per_char": -0.7985795736312866, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 849, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9891973733901978, "incorrect_loss_raw": 1.6297828356424968, "correct_loss_per_char": 0.4945986866950989, "incorrect_loss_per_char": 0.8148914178212484, "correct_loss_per_token": 0.9891973733901978, "incorrect_loss_per_token": 1.6297828356424968, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9891973733901978, "num_tokens": 1, "num_tokens_all": 1096, "is_greedy": true, "logits_per_token": -0.9891973733901978, "logits_per_char": -0.4945986866950989, "num_chars": 2}, {"sum_logits": -1.2034544944763184, "num_tokens": 1, "num_tokens_all": 1096, "is_greedy": false, "logits_per_token": -1.2034544944763184, "logits_per_char": -0.6017272472381592, "num_chars": 2}, {"sum_logits": -1.9879367351531982, "num_tokens": 1, "num_tokens_all": 1096, "is_greedy": false, "logits_per_token": -1.9879367351531982, "logits_per_char": -0.9939683675765991, "num_chars": 2}, {"sum_logits": -1.6979572772979736, "num_tokens": 1, "num_tokens_all": 1096, "is_greedy": false, "logits_per_token": -1.6979572772979736, "logits_per_char": -0.8489786386489868, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 850, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4357956647872925, "incorrect_loss_raw": 1.3890105088551838, "correct_loss_per_char": 0.7178978323936462, "incorrect_loss_per_char": 0.6945052544275919, "correct_loss_per_token": 1.4357956647872925, "incorrect_loss_per_token": 1.3890105088551838, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2183337211608887, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": true, "logits_per_token": -1.2183337211608887, "logits_per_char": -0.6091668605804443, "num_chars": 2}, {"sum_logits": -1.4357956647872925, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.4357956647872925, "logits_per_char": -0.7178978323936462, "num_chars": 2}, {"sum_logits": -1.5593528747558594, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.5593528747558594, "logits_per_char": -0.7796764373779297, "num_chars": 2}, {"sum_logits": -1.3893449306488037, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.3893449306488037, "logits_per_char": -0.6946724653244019, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 851, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1102280616760254, "incorrect_loss_raw": 1.5565952062606812, "correct_loss_per_char": 0.5551140308380127, "incorrect_loss_per_char": 0.7782976031303406, "correct_loss_per_token": 1.1102280616760254, "incorrect_loss_per_token": 1.5565952062606812, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1102280616760254, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": true, "logits_per_token": -1.1102280616760254, "logits_per_char": -0.5551140308380127, "num_chars": 2}, {"sum_logits": -1.1720950603485107, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.1720950603485107, "logits_per_char": -0.5860475301742554, "num_chars": 2}, {"sum_logits": -1.8531818389892578, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.8531818389892578, "logits_per_char": -0.9265909194946289, "num_chars": 2}, {"sum_logits": -1.644508719444275, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.644508719444275, "logits_per_char": -0.8222543597221375, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 852, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1260955333709717, "incorrect_loss_raw": 1.5544233719507854, "correct_loss_per_char": 0.5630477666854858, "incorrect_loss_per_char": 0.7772116859753927, "correct_loss_per_token": 1.1260955333709717, "incorrect_loss_per_token": 1.5544233719507854, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1260955333709717, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": true, "logits_per_token": -1.1260955333709717, "logits_per_char": -0.5630477666854858, "num_chars": 2}, {"sum_logits": -1.1610488891601562, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.1610488891601562, "logits_per_char": -0.5805244445800781, "num_chars": 2}, {"sum_logits": -1.9581108093261719, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.9581108093261719, "logits_per_char": -0.9790554046630859, "num_chars": 2}, {"sum_logits": -1.5441104173660278, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.5441104173660278, "logits_per_char": -0.7720552086830139, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 853, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1750420331954956, "incorrect_loss_raw": 1.5475303332010906, "correct_loss_per_char": 0.5875210165977478, "incorrect_loss_per_char": 0.7737651666005453, "correct_loss_per_token": 1.1750420331954956, "incorrect_loss_per_token": 1.5475303332010906, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1012630462646484, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": true, "logits_per_token": -1.1012630462646484, "logits_per_char": -0.5506315231323242, "num_chars": 2}, {"sum_logits": -1.1750420331954956, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.1750420331954956, "logits_per_char": -0.5875210165977478, "num_chars": 2}, {"sum_logits": -2.0049688816070557, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -2.0049688816070557, "logits_per_char": -1.0024844408035278, "num_chars": 2}, {"sum_logits": -1.5363590717315674, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.5363590717315674, "logits_per_char": -0.7681795358657837, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 854, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0452306270599365, "incorrect_loss_raw": 1.6021690368652344, "correct_loss_per_char": 0.5226153135299683, "incorrect_loss_per_char": 0.8010845184326172, "correct_loss_per_token": 1.0452306270599365, "incorrect_loss_per_token": 1.6021690368652344, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0452306270599365, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": true, "logits_per_token": -1.0452306270599365, "logits_per_char": -0.5226153135299683, "num_chars": 2}, {"sum_logits": -1.4702918529510498, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": false, "logits_per_token": -1.4702918529510498, "logits_per_char": -0.7351459264755249, "num_chars": 2}, {"sum_logits": -1.9459302425384521, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": false, "logits_per_token": -1.9459302425384521, "logits_per_char": -0.9729651212692261, "num_chars": 2}, {"sum_logits": -1.3902850151062012, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": false, "logits_per_token": -1.3902850151062012, "logits_per_char": -0.6951425075531006, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 855, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8449468612670898, "incorrect_loss_raw": 1.296231985092163, "correct_loss_per_char": 0.9224734306335449, "incorrect_loss_per_char": 0.6481159925460815, "correct_loss_per_token": 1.8449468612670898, "incorrect_loss_per_token": 1.296231985092163, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1791105270385742, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.1791105270385742, "logits_per_char": -0.5895552635192871, "num_chars": 2}, {"sum_logits": -1.163449764251709, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": true, "logits_per_token": -1.163449764251709, "logits_per_char": -0.5817248821258545, "num_chars": 2}, {"sum_logits": -1.8449468612670898, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.8449468612670898, "logits_per_char": -0.9224734306335449, "num_chars": 2}, {"sum_logits": -1.546135663986206, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.546135663986206, "logits_per_char": -0.773067831993103, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 856, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.602295994758606, "incorrect_loss_raw": 1.3503442605336506, "correct_loss_per_char": 0.801147997379303, "incorrect_loss_per_char": 0.6751721302668253, "correct_loss_per_token": 1.602295994758606, "incorrect_loss_per_token": 1.3503442605336506, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2404357194900513, "num_tokens": 1, "num_tokens_all": 976, "is_greedy": false, "logits_per_token": -1.2404357194900513, "logits_per_char": -0.6202178597450256, "num_chars": 2}, {"sum_logits": -1.1837975978851318, "num_tokens": 1, "num_tokens_all": 976, "is_greedy": true, "logits_per_token": -1.1837975978851318, "logits_per_char": -0.5918987989425659, "num_chars": 2}, {"sum_logits": -1.626799464225769, "num_tokens": 1, "num_tokens_all": 976, "is_greedy": false, "logits_per_token": -1.626799464225769, "logits_per_char": -0.8133997321128845, "num_chars": 2}, {"sum_logits": -1.602295994758606, "num_tokens": 1, "num_tokens_all": 976, "is_greedy": false, "logits_per_token": -1.602295994758606, "logits_per_char": -0.801147997379303, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 857, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4384119510650635, "incorrect_loss_raw": 1.4009198347727458, "correct_loss_per_char": 0.7192059755325317, "incorrect_loss_per_char": 0.7004599173863729, "correct_loss_per_token": 1.4384119510650635, "incorrect_loss_per_token": 1.4009198347727458, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2473289966583252, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": true, "logits_per_token": -1.2473289966583252, "logits_per_char": -0.6236644983291626, "num_chars": 2}, {"sum_logits": -1.258074164390564, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": false, "logits_per_token": -1.258074164390564, "logits_per_char": -0.629037082195282, "num_chars": 2}, {"sum_logits": -1.6973563432693481, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": false, "logits_per_token": -1.6973563432693481, "logits_per_char": -0.8486781716346741, "num_chars": 2}, {"sum_logits": -1.4384119510650635, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": false, "logits_per_token": -1.4384119510650635, "logits_per_char": -0.7192059755325317, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 858, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1761904954910278, "incorrect_loss_raw": 1.5133617719014485, "correct_loss_per_char": 0.5880952477455139, "incorrect_loss_per_char": 0.7566808859507242, "correct_loss_per_token": 1.1761904954910278, "incorrect_loss_per_token": 1.5133617719014485, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2365795373916626, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.2365795373916626, "logits_per_char": -0.6182897686958313, "num_chars": 2}, {"sum_logits": -1.1761904954910278, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": true, "logits_per_token": -1.1761904954910278, "logits_per_char": -0.5880952477455139, "num_chars": 2}, {"sum_logits": -1.8753937482833862, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.8753937482833862, "logits_per_char": -0.9376968741416931, "num_chars": 2}, {"sum_logits": -1.4281120300292969, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.4281120300292969, "logits_per_char": -0.7140560150146484, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 859, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.960997462272644, "incorrect_loss_raw": 1.675616979598999, "correct_loss_per_char": 0.480498731136322, "incorrect_loss_per_char": 0.8378084897994995, "correct_loss_per_token": 0.960997462272644, "incorrect_loss_per_token": 1.675616979598999, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.960997462272644, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": true, "logits_per_token": -0.960997462272644, "logits_per_char": -0.480498731136322, "num_chars": 2}, {"sum_logits": -1.187591314315796, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": false, "logits_per_token": -1.187591314315796, "logits_per_char": -0.593795657157898, "num_chars": 2}, {"sum_logits": -2.170172691345215, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": false, "logits_per_token": -2.170172691345215, "logits_per_char": -1.0850863456726074, "num_chars": 2}, {"sum_logits": -1.6690869331359863, "num_tokens": 1, "num_tokens_all": 1116, "is_greedy": false, "logits_per_token": -1.6690869331359863, "logits_per_char": -0.8345434665679932, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 860, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5074056386947632, "incorrect_loss_raw": 1.3875600496927898, "correct_loss_per_char": 0.7537028193473816, "incorrect_loss_per_char": 0.6937800248463949, "correct_loss_per_token": 1.5074056386947632, "incorrect_loss_per_token": 1.3875600496927898, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0742857456207275, "num_tokens": 1, "num_tokens_all": 918, "is_greedy": true, "logits_per_token": -1.0742857456207275, "logits_per_char": -0.5371428728103638, "num_chars": 2}, {"sum_logits": -1.4383951425552368, "num_tokens": 1, "num_tokens_all": 918, "is_greedy": false, "logits_per_token": -1.4383951425552368, "logits_per_char": -0.7191975712776184, "num_chars": 2}, {"sum_logits": -1.6499992609024048, "num_tokens": 1, "num_tokens_all": 918, "is_greedy": false, "logits_per_token": -1.6499992609024048, "logits_per_char": -0.8249996304512024, "num_chars": 2}, {"sum_logits": -1.5074056386947632, "num_tokens": 1, "num_tokens_all": 918, "is_greedy": false, "logits_per_token": -1.5074056386947632, "logits_per_char": -0.7537028193473816, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 861, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.272963523864746, "incorrect_loss_raw": 1.4643619060516357, "correct_loss_per_char": 0.636481761932373, "incorrect_loss_per_char": 0.7321809530258179, "correct_loss_per_token": 1.272963523864746, "incorrect_loss_per_token": 1.4643619060516357, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.272963523864746, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": false, "logits_per_token": -1.272963523864746, "logits_per_char": -0.636481761932373, "num_chars": 2}, {"sum_logits": -1.1771372556686401, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": true, "logits_per_token": -1.1771372556686401, "logits_per_char": -0.5885686278343201, "num_chars": 2}, {"sum_logits": -1.7231199741363525, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": false, "logits_per_token": -1.7231199741363525, "logits_per_char": -0.8615599870681763, "num_chars": 2}, {"sum_logits": -1.4928284883499146, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": false, "logits_per_token": -1.4928284883499146, "logits_per_char": -0.7464142441749573, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 862, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2011629343032837, "incorrect_loss_raw": 1.5152254899342854, "correct_loss_per_char": 0.6005814671516418, "incorrect_loss_per_char": 0.7576127449671427, "correct_loss_per_token": 1.2011629343032837, "incorrect_loss_per_token": 1.5152254899342854, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1275666952133179, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": true, "logits_per_token": -1.1275666952133179, "logits_per_char": -0.5637833476066589, "num_chars": 2}, {"sum_logits": -1.2011629343032837, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.2011629343032837, "logits_per_char": -0.6005814671516418, "num_chars": 2}, {"sum_logits": -1.878294587135315, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.878294587135315, "logits_per_char": -0.9391472935676575, "num_chars": 2}, {"sum_logits": -1.5398151874542236, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.5398151874542236, "logits_per_char": -0.7699075937271118, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 863, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4598520994186401, "incorrect_loss_raw": 1.4214963515599568, "correct_loss_per_char": 0.7299260497093201, "incorrect_loss_per_char": 0.7107481757799784, "correct_loss_per_token": 1.4598520994186401, "incorrect_loss_per_token": 1.4214963515599568, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1216683387756348, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": true, "logits_per_token": -1.1216683387756348, "logits_per_char": -0.5608341693878174, "num_chars": 2}, {"sum_logits": -1.2915666103363037, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.2915666103363037, "logits_per_char": -0.6457833051681519, "num_chars": 2}, {"sum_logits": -1.8512541055679321, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.8512541055679321, "logits_per_char": -0.9256270527839661, "num_chars": 2}, {"sum_logits": -1.4598520994186401, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.4598520994186401, "logits_per_char": -0.7299260497093201, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 864, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8956958055496216, "incorrect_loss_raw": 1.2877133289972942, "correct_loss_per_char": 0.9478479027748108, "incorrect_loss_per_char": 0.6438566644986471, "correct_loss_per_token": 1.8956958055496216, "incorrect_loss_per_token": 1.2877133289972942, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1400935649871826, "num_tokens": 1, "num_tokens_all": 1099, "is_greedy": true, "logits_per_token": -1.1400935649871826, "logits_per_char": -0.5700467824935913, "num_chars": 2}, {"sum_logits": -1.1927495002746582, "num_tokens": 1, "num_tokens_all": 1099, "is_greedy": false, "logits_per_token": -1.1927495002746582, "logits_per_char": -0.5963747501373291, "num_chars": 2}, {"sum_logits": -1.8956958055496216, "num_tokens": 1, "num_tokens_all": 1099, "is_greedy": false, "logits_per_token": -1.8956958055496216, "logits_per_char": -0.9478479027748108, "num_chars": 2}, {"sum_logits": -1.5302969217300415, "num_tokens": 1, "num_tokens_all": 1099, "is_greedy": false, "logits_per_token": -1.5302969217300415, "logits_per_char": -0.7651484608650208, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 865, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0223054885864258, "incorrect_loss_raw": 1.565560261408488, "correct_loss_per_char": 0.5111527442932129, "incorrect_loss_per_char": 0.782780130704244, "correct_loss_per_token": 1.0223054885864258, "incorrect_loss_per_token": 1.565560261408488, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0223054885864258, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -1.0223054885864258, "logits_per_char": -0.5111527442932129, "num_chars": 2}, {"sum_logits": -1.4004489183425903, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.4004489183425903, "logits_per_char": -0.7002244591712952, "num_chars": 2}, {"sum_logits": -1.7117387056350708, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.7117387056350708, "logits_per_char": -0.8558693528175354, "num_chars": 2}, {"sum_logits": -1.5844931602478027, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.5844931602478027, "logits_per_char": -0.7922465801239014, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 866, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0867993831634521, "incorrect_loss_raw": 1.568367640177409, "correct_loss_per_char": 0.5433996915817261, "incorrect_loss_per_char": 0.7841838200887045, "correct_loss_per_token": 1.0867993831634521, "incorrect_loss_per_token": 1.568367640177409, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0867993831634521, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": true, "logits_per_token": -1.0867993831634521, "logits_per_char": -0.5433996915817261, "num_chars": 2}, {"sum_logits": -1.2171368598937988, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.2171368598937988, "logits_per_char": -0.6085684299468994, "num_chars": 2}, {"sum_logits": -1.963881015777588, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.963881015777588, "logits_per_char": -0.981940507888794, "num_chars": 2}, {"sum_logits": -1.5240850448608398, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.5240850448608398, "logits_per_char": -0.7620425224304199, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 867, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2216055393218994, "incorrect_loss_raw": 1.5375022490819295, "correct_loss_per_char": 0.6108027696609497, "incorrect_loss_per_char": 0.7687511245409647, "correct_loss_per_token": 1.2216055393218994, "incorrect_loss_per_token": 1.5375022490819295, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0428173542022705, "num_tokens": 1, "num_tokens_all": 1106, "is_greedy": true, "logits_per_token": -1.0428173542022705, "logits_per_char": -0.5214086771011353, "num_chars": 2}, {"sum_logits": -1.2216055393218994, "num_tokens": 1, "num_tokens_all": 1106, "is_greedy": false, "logits_per_token": -1.2216055393218994, "logits_per_char": -0.6108027696609497, "num_chars": 2}, {"sum_logits": -2.012533664703369, "num_tokens": 1, "num_tokens_all": 1106, "is_greedy": false, "logits_per_token": -2.012533664703369, "logits_per_char": -1.0062668323516846, "num_chars": 2}, {"sum_logits": -1.557155728340149, "num_tokens": 1, "num_tokens_all": 1106, "is_greedy": false, "logits_per_token": -1.557155728340149, "logits_per_char": -0.7785778641700745, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 868, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3990498781204224, "incorrect_loss_raw": 1.409281055132548, "correct_loss_per_char": 0.6995249390602112, "incorrect_loss_per_char": 0.704640527566274, "correct_loss_per_token": 1.3990498781204224, "incorrect_loss_per_token": 1.409281055132548, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1796541213989258, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -1.1796541213989258, "logits_per_char": -0.5898270606994629, "num_chars": 2}, {"sum_logits": -1.4047646522521973, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.4047646522521973, "logits_per_char": -0.7023823261260986, "num_chars": 2}, {"sum_logits": -1.643424391746521, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.643424391746521, "logits_per_char": -0.8217121958732605, "num_chars": 2}, {"sum_logits": -1.3990498781204224, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.3990498781204224, "logits_per_char": -0.6995249390602112, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 869, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3799737691879272, "incorrect_loss_raw": 1.4283148845036824, "correct_loss_per_char": 0.6899868845939636, "incorrect_loss_per_char": 0.7141574422518412, "correct_loss_per_token": 1.3799737691879272, "incorrect_loss_per_token": 1.4283148845036824, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1444214582443237, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -1.1444214582443237, "logits_per_char": -0.5722107291221619, "num_chars": 2}, {"sum_logits": -1.3803929090499878, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.3803929090499878, "logits_per_char": -0.6901964545249939, "num_chars": 2}, {"sum_logits": -1.7601302862167358, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.7601302862167358, "logits_per_char": -0.8800651431083679, "num_chars": 2}, {"sum_logits": -1.3799737691879272, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.3799737691879272, "logits_per_char": -0.6899868845939636, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 870, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3766398429870605, "incorrect_loss_raw": 1.4186160961786907, "correct_loss_per_char": 0.6883199214935303, "incorrect_loss_per_char": 0.7093080480893453, "correct_loss_per_token": 1.3766398429870605, "incorrect_loss_per_token": 1.4186160961786907, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1946314573287964, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": true, "logits_per_token": -1.1946314573287964, "logits_per_char": -0.5973157286643982, "num_chars": 2}, {"sum_logits": -1.3882801532745361, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": false, "logits_per_token": -1.3882801532745361, "logits_per_char": -0.6941400766372681, "num_chars": 2}, {"sum_logits": -1.6729366779327393, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": false, "logits_per_token": -1.6729366779327393, "logits_per_char": -0.8364683389663696, "num_chars": 2}, {"sum_logits": -1.3766398429870605, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": false, "logits_per_token": -1.3766398429870605, "logits_per_char": -0.6883199214935303, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 871, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.929458737373352, "incorrect_loss_raw": 1.6354783773422241, "correct_loss_per_char": 0.464729368686676, "incorrect_loss_per_char": 0.8177391886711121, "correct_loss_per_token": 0.929458737373352, "incorrect_loss_per_token": 1.6354783773422241, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.929458737373352, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": true, "logits_per_token": -0.929458737373352, "logits_per_char": -0.464729368686676, "num_chars": 2}, {"sum_logits": -1.3691142797470093, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -1.3691142797470093, "logits_per_char": -0.6845571398735046, "num_chars": 2}, {"sum_logits": -1.7998179197311401, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -1.7998179197311401, "logits_per_char": -0.8999089598655701, "num_chars": 2}, {"sum_logits": -1.737502932548523, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -1.737502932548523, "logits_per_char": -0.8687514662742615, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 872, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1343146562576294, "incorrect_loss_raw": 1.5282329718271892, "correct_loss_per_char": 0.5671573281288147, "incorrect_loss_per_char": 0.7641164859135946, "correct_loss_per_token": 1.1343146562576294, "incorrect_loss_per_token": 1.5282329718271892, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1343146562576294, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": true, "logits_per_token": -1.1343146562576294, "logits_per_char": -0.5671573281288147, "num_chars": 2}, {"sum_logits": -1.2691020965576172, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -1.2691020965576172, "logits_per_char": -0.6345510482788086, "num_chars": 2}, {"sum_logits": -1.8505287170410156, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -1.8505287170410156, "logits_per_char": -0.9252643585205078, "num_chars": 2}, {"sum_logits": -1.4650681018829346, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -1.4650681018829346, "logits_per_char": -0.7325340509414673, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 873, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5416775941848755, "incorrect_loss_raw": 1.3890226284662883, "correct_loss_per_char": 0.7708387970924377, "incorrect_loss_per_char": 0.6945113142331442, "correct_loss_per_token": 1.5416775941848755, "incorrect_loss_per_token": 1.3890226284662883, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1383492946624756, "num_tokens": 1, "num_tokens_all": 1032, "is_greedy": true, "logits_per_token": -1.1383492946624756, "logits_per_char": -0.5691746473312378, "num_chars": 2}, {"sum_logits": -1.2347733974456787, "num_tokens": 1, "num_tokens_all": 1032, "is_greedy": false, "logits_per_token": -1.2347733974456787, "logits_per_char": -0.6173866987228394, "num_chars": 2}, {"sum_logits": -1.7939451932907104, "num_tokens": 1, "num_tokens_all": 1032, "is_greedy": false, "logits_per_token": -1.7939451932907104, "logits_per_char": -0.8969725966453552, "num_chars": 2}, {"sum_logits": -1.5416775941848755, "num_tokens": 1, "num_tokens_all": 1032, "is_greedy": false, "logits_per_token": -1.5416775941848755, "logits_per_char": -0.7708387970924377, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 874, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1452531814575195, "incorrect_loss_raw": 1.4998538494110107, "correct_loss_per_char": 0.5726265907287598, "incorrect_loss_per_char": 0.7499269247055054, "correct_loss_per_token": 1.1452531814575195, "incorrect_loss_per_token": 1.4998538494110107, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1452531814575195, "num_tokens": 1, "num_tokens_all": 941, "is_greedy": true, "logits_per_token": -1.1452531814575195, "logits_per_char": -0.5726265907287598, "num_chars": 2}, {"sum_logits": -1.3900201320648193, "num_tokens": 1, "num_tokens_all": 941, "is_greedy": false, "logits_per_token": -1.3900201320648193, "logits_per_char": -0.6950100660324097, "num_chars": 2}, {"sum_logits": -1.6298491954803467, "num_tokens": 1, "num_tokens_all": 941, "is_greedy": false, "logits_per_token": -1.6298491954803467, "logits_per_char": -0.8149245977401733, "num_chars": 2}, {"sum_logits": -1.4796922206878662, "num_tokens": 1, "num_tokens_all": 941, "is_greedy": false, "logits_per_token": -1.4796922206878662, "logits_per_char": -0.7398461103439331, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 875, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.987797737121582, "incorrect_loss_raw": 1.2914463480313618, "correct_loss_per_char": 0.993898868560791, "incorrect_loss_per_char": 0.6457231740156809, "correct_loss_per_token": 1.987797737121582, "incorrect_loss_per_token": 1.2914463480313618, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9989786744117737, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": true, "logits_per_token": -0.9989786744117737, "logits_per_char": -0.49948933720588684, "num_chars": 2}, {"sum_logits": -1.2380495071411133, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.2380495071411133, "logits_per_char": -0.6190247535705566, "num_chars": 2}, {"sum_logits": -1.987797737121582, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.987797737121582, "logits_per_char": -0.993898868560791, "num_chars": 2}, {"sum_logits": -1.6373108625411987, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.6373108625411987, "logits_per_char": -0.8186554312705994, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 876, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.977103352546692, "incorrect_loss_raw": 1.294378399848938, "correct_loss_per_char": 0.988551676273346, "incorrect_loss_per_char": 0.647189199924469, "correct_loss_per_token": 1.977103352546692, "incorrect_loss_per_token": 1.294378399848938, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0500046014785767, "num_tokens": 1, "num_tokens_all": 1135, "is_greedy": true, "logits_per_token": -1.0500046014785767, "logits_per_char": -0.5250023007392883, "num_chars": 2}, {"sum_logits": -1.1319996118545532, "num_tokens": 1, "num_tokens_all": 1135, "is_greedy": false, "logits_per_token": -1.1319996118545532, "logits_per_char": -0.5659998059272766, "num_chars": 2}, {"sum_logits": -1.977103352546692, "num_tokens": 1, "num_tokens_all": 1135, "is_greedy": false, "logits_per_token": -1.977103352546692, "logits_per_char": -0.988551676273346, "num_chars": 2}, {"sum_logits": -1.701130986213684, "num_tokens": 1, "num_tokens_all": 1135, "is_greedy": false, "logits_per_token": -1.701130986213684, "logits_per_char": -0.850565493106842, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 877, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.592808485031128, "incorrect_loss_raw": 1.4198929866154988, "correct_loss_per_char": 0.796404242515564, "incorrect_loss_per_char": 0.7099464933077494, "correct_loss_per_token": 1.592808485031128, "incorrect_loss_per_token": 1.4198929866154988, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9998210668563843, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": true, "logits_per_token": -0.9998210668563843, "logits_per_char": -0.49991053342819214, "num_chars": 2}, {"sum_logits": -1.2505004405975342, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": false, "logits_per_token": -1.2505004405975342, "logits_per_char": -0.6252502202987671, "num_chars": 2}, {"sum_logits": -2.009357452392578, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": false, "logits_per_token": -2.009357452392578, "logits_per_char": -1.004678726196289, "num_chars": 2}, {"sum_logits": -1.592808485031128, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": false, "logits_per_token": -1.592808485031128, "logits_per_char": -0.796404242515564, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 878, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5224343538284302, "incorrect_loss_raw": 1.402726411819458, "correct_loss_per_char": 0.7612171769142151, "incorrect_loss_per_char": 0.701363205909729, "correct_loss_per_token": 1.5224343538284302, "incorrect_loss_per_token": 1.402726411819458, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1400566101074219, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": true, "logits_per_token": -1.1400566101074219, "logits_per_char": -0.5700283050537109, "num_chars": 2}, {"sum_logits": -1.2024168968200684, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.2024168968200684, "logits_per_char": -0.6012084484100342, "num_chars": 2}, {"sum_logits": -1.8657057285308838, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.8657057285308838, "logits_per_char": -0.9328528642654419, "num_chars": 2}, {"sum_logits": -1.5224343538284302, "num_tokens": 1, "num_tokens_all": 1083, "is_greedy": false, "logits_per_token": -1.5224343538284302, "logits_per_char": -0.7612171769142151, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 879, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.504746675491333, "incorrect_loss_raw": 1.3835020065307617, "correct_loss_per_char": 0.7523733377456665, "incorrect_loss_per_char": 0.6917510032653809, "correct_loss_per_token": 1.504746675491333, "incorrect_loss_per_token": 1.3835020065307617, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0959694385528564, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": true, "logits_per_token": -1.0959694385528564, "logits_per_char": -0.5479847192764282, "num_chars": 2}, {"sum_logits": -1.4175384044647217, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": false, "logits_per_token": -1.4175384044647217, "logits_per_char": -0.7087692022323608, "num_chars": 2}, {"sum_logits": -1.636998176574707, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": false, "logits_per_token": -1.636998176574707, "logits_per_char": -0.8184990882873535, "num_chars": 2}, {"sum_logits": -1.504746675491333, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": false, "logits_per_token": -1.504746675491333, "logits_per_char": -0.7523733377456665, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 880, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.188989520072937, "incorrect_loss_raw": 1.5258800586064656, "correct_loss_per_char": 0.5944947600364685, "incorrect_loss_per_char": 0.7629400293032328, "correct_loss_per_token": 1.188989520072937, "incorrect_loss_per_token": 1.5258800586064656, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1163415908813477, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": true, "logits_per_token": -1.1163415908813477, "logits_per_char": -0.5581707954406738, "num_chars": 2}, {"sum_logits": -1.188989520072937, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -1.188989520072937, "logits_per_char": -0.5944947600364685, "num_chars": 2}, {"sum_logits": -1.8922573328018188, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -1.8922573328018188, "logits_per_char": -0.9461286664009094, "num_chars": 2}, {"sum_logits": -1.5690412521362305, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -1.5690412521362305, "logits_per_char": -0.7845206260681152, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 881, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9142886400222778, "incorrect_loss_raw": 1.2840654055277507, "correct_loss_per_char": 0.9571443200111389, "incorrect_loss_per_char": 0.6420327027638754, "correct_loss_per_token": 1.9142886400222778, "incorrect_loss_per_token": 1.2840654055277507, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1199790239334106, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": true, "logits_per_token": -1.1199790239334106, "logits_per_char": -0.5599895119667053, "num_chars": 2}, {"sum_logits": -1.1965043544769287, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.1965043544769287, "logits_per_char": -0.5982521772384644, "num_chars": 2}, {"sum_logits": -1.9142886400222778, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.9142886400222778, "logits_per_char": -0.9571443200111389, "num_chars": 2}, {"sum_logits": -1.5357128381729126, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.5357128381729126, "logits_per_char": -0.7678564190864563, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 882, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9128941297531128, "incorrect_loss_raw": 1.2908854087193806, "correct_loss_per_char": 0.9564470648765564, "incorrect_loss_per_char": 0.6454427043596903, "correct_loss_per_token": 1.9128941297531128, "incorrect_loss_per_token": 1.2908854087193806, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0831142663955688, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": true, "logits_per_token": -1.0831142663955688, "logits_per_char": -0.5415571331977844, "num_chars": 2}, {"sum_logits": -1.190012812614441, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.190012812614441, "logits_per_char": -0.5950064063072205, "num_chars": 2}, {"sum_logits": -1.9128941297531128, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.9128941297531128, "logits_per_char": -0.9564470648765564, "num_chars": 2}, {"sum_logits": -1.5995291471481323, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.5995291471481323, "logits_per_char": -0.7997645735740662, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 883, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1500816345214844, "incorrect_loss_raw": 1.527524709701538, "correct_loss_per_char": 0.5750408172607422, "incorrect_loss_per_char": 0.763762354850769, "correct_loss_per_token": 1.1500816345214844, "incorrect_loss_per_token": 1.527524709701538, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1500816345214844, "num_tokens": 1, "num_tokens_all": 1042, "is_greedy": true, "logits_per_token": -1.1500816345214844, "logits_per_char": -0.5750408172607422, "num_chars": 2}, {"sum_logits": -1.170422911643982, "num_tokens": 1, "num_tokens_all": 1042, "is_greedy": false, "logits_per_token": -1.170422911643982, "logits_per_char": -0.585211455821991, "num_chars": 2}, {"sum_logits": -1.817755937576294, "num_tokens": 1, "num_tokens_all": 1042, "is_greedy": false, "logits_per_token": -1.817755937576294, "logits_per_char": -0.908877968788147, "num_chars": 2}, {"sum_logits": -1.5943952798843384, "num_tokens": 1, "num_tokens_all": 1042, "is_greedy": false, "logits_per_token": -1.5943952798843384, "logits_per_char": -0.7971976399421692, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 884, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8687524795532227, "incorrect_loss_raw": 1.3069345951080322, "correct_loss_per_char": 0.9343762397766113, "incorrect_loss_per_char": 0.6534672975540161, "correct_loss_per_token": 1.8687524795532227, "incorrect_loss_per_token": 1.3069345951080322, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0752291679382324, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": true, "logits_per_token": -1.0752291679382324, "logits_per_char": -0.5376145839691162, "num_chars": 2}, {"sum_logits": -1.1868922710418701, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": false, "logits_per_token": -1.1868922710418701, "logits_per_char": -0.5934461355209351, "num_chars": 2}, {"sum_logits": -1.8687524795532227, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": false, "logits_per_token": -1.8687524795532227, "logits_per_char": -0.9343762397766113, "num_chars": 2}, {"sum_logits": -1.6586823463439941, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": false, "logits_per_token": -1.6586823463439941, "logits_per_char": -0.8293411731719971, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 885, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4848427772521973, "incorrect_loss_raw": 1.4057294925053914, "correct_loss_per_char": 0.7424213886260986, "incorrect_loss_per_char": 0.7028647462526957, "correct_loss_per_token": 1.4848427772521973, "incorrect_loss_per_token": 1.4057294925053914, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0577729940414429, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -1.0577729940414429, "logits_per_char": -0.5288864970207214, "num_chars": 2}, {"sum_logits": -1.3981456756591797, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.3981456756591797, "logits_per_char": -0.6990728378295898, "num_chars": 2}, {"sum_logits": -1.7612698078155518, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.7612698078155518, "logits_per_char": -0.8806349039077759, "num_chars": 2}, {"sum_logits": -1.4848427772521973, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.4848427772521973, "logits_per_char": -0.7424213886260986, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 886, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4351749420166016, "incorrect_loss_raw": 1.4125620524088542, "correct_loss_per_char": 0.7175874710083008, "incorrect_loss_per_char": 0.7062810262044271, "correct_loss_per_token": 1.4351749420166016, "incorrect_loss_per_token": 1.4125620524088542, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0957714319229126, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": true, "logits_per_token": -1.0957714319229126, "logits_per_char": -0.5478857159614563, "num_chars": 2}, {"sum_logits": -1.4121068716049194, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -1.4121068716049194, "logits_per_char": -0.7060534358024597, "num_chars": 2}, {"sum_logits": -1.7298078536987305, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -1.7298078536987305, "logits_per_char": -0.8649039268493652, "num_chars": 2}, {"sum_logits": -1.4351749420166016, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -1.4351749420166016, "logits_per_char": -0.7175874710083008, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 887, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6518080234527588, "incorrect_loss_raw": 1.3553802967071533, "correct_loss_per_char": 0.8259040117263794, "incorrect_loss_per_char": 0.6776901483535767, "correct_loss_per_token": 1.6518080234527588, "incorrect_loss_per_token": 1.3553802967071533, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1633113622665405, "num_tokens": 1, "num_tokens_all": 1004, "is_greedy": true, "logits_per_token": -1.1633113622665405, "logits_per_char": -0.5816556811332703, "num_chars": 2}, {"sum_logits": -1.1705245971679688, "num_tokens": 1, "num_tokens_all": 1004, "is_greedy": false, "logits_per_token": -1.1705245971679688, "logits_per_char": -0.5852622985839844, "num_chars": 2}, {"sum_logits": -1.7323049306869507, "num_tokens": 1, "num_tokens_all": 1004, "is_greedy": false, "logits_per_token": -1.7323049306869507, "logits_per_char": -0.8661524653434753, "num_chars": 2}, {"sum_logits": -1.6518080234527588, "num_tokens": 1, "num_tokens_all": 1004, "is_greedy": false, "logits_per_token": -1.6518080234527588, "logits_per_char": -0.8259040117263794, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 888, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4289573431015015, "incorrect_loss_raw": 1.4189786116282146, "correct_loss_per_char": 0.7144786715507507, "incorrect_loss_per_char": 0.7094893058141073, "correct_loss_per_token": 1.4289573431015015, "incorrect_loss_per_token": 1.4189786116282146, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1409616470336914, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": true, "logits_per_token": -1.1409616470336914, "logits_per_char": -0.5704808235168457, "num_chars": 2}, {"sum_logits": -1.3348814249038696, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.3348814249038696, "logits_per_char": -0.6674407124519348, "num_chars": 2}, {"sum_logits": -1.7810927629470825, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.7810927629470825, "logits_per_char": -0.8905463814735413, "num_chars": 2}, {"sum_logits": -1.4289573431015015, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.4289573431015015, "logits_per_char": -0.7144786715507507, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 889, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4359711408615112, "incorrect_loss_raw": 1.4124832153320312, "correct_loss_per_char": 0.7179855704307556, "incorrect_loss_per_char": 0.7062416076660156, "correct_loss_per_token": 1.4359711408615112, "incorrect_loss_per_token": 1.4124832153320312, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.080389142036438, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": true, "logits_per_token": -1.080389142036438, "logits_per_char": -0.540194571018219, "num_chars": 2}, {"sum_logits": -1.4359711408615112, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.4359711408615112, "logits_per_char": -0.7179855704307556, "num_chars": 2}, {"sum_logits": -1.707724690437317, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.707724690437317, "logits_per_char": -0.8538623452186584, "num_chars": 2}, {"sum_logits": -1.4493358135223389, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.4493358135223389, "logits_per_char": -0.7246679067611694, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 890, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3119874000549316, "incorrect_loss_raw": 1.4402689933776855, "correct_loss_per_char": 0.6559937000274658, "incorrect_loss_per_char": 0.7201344966888428, "correct_loss_per_token": 1.3119874000549316, "incorrect_loss_per_token": 1.4402689933776855, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2405747175216675, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": true, "logits_per_token": -1.2405747175216675, "logits_per_char": -0.6202873587608337, "num_chars": 2}, {"sum_logits": -1.3119874000549316, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": false, "logits_per_token": -1.3119874000549316, "logits_per_char": -0.6559937000274658, "num_chars": 2}, {"sum_logits": -1.6764519214630127, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": false, "logits_per_token": -1.6764519214630127, "logits_per_char": -0.8382259607315063, "num_chars": 2}, {"sum_logits": -1.4037803411483765, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": false, "logits_per_token": -1.4037803411483765, "logits_per_char": -0.7018901705741882, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 891, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6802211999893188, "incorrect_loss_raw": 1.318192958831787, "correct_loss_per_char": 0.8401105999946594, "incorrect_loss_per_char": 0.6590964794158936, "correct_loss_per_token": 1.6802211999893188, "incorrect_loss_per_token": 1.318192958831787, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.22551429271698, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": true, "logits_per_token": -1.22551429271698, "logits_per_char": -0.61275714635849, "num_chars": 2}, {"sum_logits": -1.2725915908813477, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.2725915908813477, "logits_per_char": -0.6362957954406738, "num_chars": 2}, {"sum_logits": -1.6802211999893188, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.6802211999893188, "logits_per_char": -0.8401105999946594, "num_chars": 2}, {"sum_logits": -1.4564729928970337, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.4564729928970337, "logits_per_char": -0.7282364964485168, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 892, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7094300985336304, "incorrect_loss_raw": 1.3224633137385051, "correct_loss_per_char": 0.8547150492668152, "incorrect_loss_per_char": 0.6612316568692526, "correct_loss_per_token": 1.7094300985336304, "incorrect_loss_per_token": 1.3224633137385051, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.072788953781128, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": true, "logits_per_token": -1.072788953781128, "logits_per_char": -0.536394476890564, "num_chars": 2}, {"sum_logits": -1.4211854934692383, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.4211854934692383, "logits_per_char": -0.7105927467346191, "num_chars": 2}, {"sum_logits": -1.7094300985336304, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.7094300985336304, "logits_per_char": -0.8547150492668152, "num_chars": 2}, {"sum_logits": -1.473415493965149, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.473415493965149, "logits_per_char": -0.7367077469825745, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 893, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9009596109390259, "incorrect_loss_raw": 1.3265281120936077, "correct_loss_per_char": 0.9504798054695129, "incorrect_loss_per_char": 0.6632640560468038, "correct_loss_per_token": 1.9009596109390259, "incorrect_loss_per_token": 1.3265281120936077, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0014381408691406, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": true, "logits_per_token": -1.0014381408691406, "logits_per_char": -0.5007190704345703, "num_chars": 2}, {"sum_logits": -1.1752071380615234, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.1752071380615234, "logits_per_char": -0.5876035690307617, "num_chars": 2}, {"sum_logits": -1.9009596109390259, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.9009596109390259, "logits_per_char": -0.9504798054695129, "num_chars": 2}, {"sum_logits": -1.8029390573501587, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.8029390573501587, "logits_per_char": -0.9014695286750793, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 894, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.484169363975525, "incorrect_loss_raw": 1.4191958904266357, "correct_loss_per_char": 0.7420846819877625, "incorrect_loss_per_char": 0.7095979452133179, "correct_loss_per_token": 1.484169363975525, "incorrect_loss_per_token": 1.4191958904266357, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2229351997375488, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -1.2229351997375488, "logits_per_char": -0.6114675998687744, "num_chars": 2}, {"sum_logits": -1.129520297050476, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": true, "logits_per_token": -1.129520297050476, "logits_per_char": -0.564760148525238, "num_chars": 2}, {"sum_logits": -1.9051321744918823, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -1.9051321744918823, "logits_per_char": -0.9525660872459412, "num_chars": 2}, {"sum_logits": -1.484169363975525, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -1.484169363975525, "logits_per_char": -0.7420846819877625, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 895, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2902557849884033, "incorrect_loss_raw": 1.4468812545140584, "correct_loss_per_char": 0.6451278924942017, "incorrect_loss_per_char": 0.7234406272570292, "correct_loss_per_token": 1.2902557849884033, "incorrect_loss_per_token": 1.4468812545140584, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2657675743103027, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": true, "logits_per_token": -1.2657675743103027, "logits_per_char": -0.6328837871551514, "num_chars": 2}, {"sum_logits": -1.2902557849884033, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": false, "logits_per_token": -1.2902557849884033, "logits_per_char": -0.6451278924942017, "num_chars": 2}, {"sum_logits": -1.689630150794983, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": false, "logits_per_token": -1.689630150794983, "logits_per_char": -0.8448150753974915, "num_chars": 2}, {"sum_logits": -1.3852460384368896, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": false, "logits_per_token": -1.3852460384368896, "logits_per_char": -0.6926230192184448, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 896, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0125503540039062, "incorrect_loss_raw": 1.2801988919576008, "correct_loss_per_char": 1.0062751770019531, "incorrect_loss_per_char": 0.6400994459788004, "correct_loss_per_token": 2.0125503540039062, "incorrect_loss_per_token": 1.2801988919576008, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0023808479309082, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": true, "logits_per_token": -1.0023808479309082, "logits_per_char": -0.5011904239654541, "num_chars": 2}, {"sum_logits": -1.2536166906356812, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -1.2536166906356812, "logits_per_char": -0.6268083453178406, "num_chars": 2}, {"sum_logits": -2.0125503540039062, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -2.0125503540039062, "logits_per_char": -1.0062751770019531, "num_chars": 2}, {"sum_logits": -1.5845991373062134, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -1.5845991373062134, "logits_per_char": -0.7922995686531067, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 897, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4692318439483643, "incorrect_loss_raw": 1.4005622069040935, "correct_loss_per_char": 0.7346159219741821, "incorrect_loss_per_char": 0.7002811034520467, "correct_loss_per_token": 1.4692318439483643, "incorrect_loss_per_token": 1.4005622069040935, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1306930780410767, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": true, "logits_per_token": -1.1306930780410767, "logits_per_char": -0.5653465390205383, "num_chars": 2}, {"sum_logits": -1.3243448734283447, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": false, "logits_per_token": -1.3243448734283447, "logits_per_char": -0.6621724367141724, "num_chars": 2}, {"sum_logits": -1.7466486692428589, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": false, "logits_per_token": -1.7466486692428589, "logits_per_char": -0.8733243346214294, "num_chars": 2}, {"sum_logits": -1.4692318439483643, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": false, "logits_per_token": -1.4692318439483643, "logits_per_char": -0.7346159219741821, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 898, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.145440697669983, "incorrect_loss_raw": 1.5031057993570964, "correct_loss_per_char": 0.5727203488349915, "incorrect_loss_per_char": 0.7515528996785482, "correct_loss_per_token": 1.145440697669983, "incorrect_loss_per_token": 1.5031057993570964, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3128150701522827, "num_tokens": 1, "num_tokens_all": 986, "is_greedy": false, "logits_per_token": -1.3128150701522827, "logits_per_char": -0.6564075350761414, "num_chars": 2}, {"sum_logits": -1.145440697669983, "num_tokens": 1, "num_tokens_all": 986, "is_greedy": true, "logits_per_token": -1.145440697669983, "logits_per_char": -0.5727203488349915, "num_chars": 2}, {"sum_logits": -1.6530306339263916, "num_tokens": 1, "num_tokens_all": 986, "is_greedy": false, "logits_per_token": -1.6530306339263916, "logits_per_char": -0.8265153169631958, "num_chars": 2}, {"sum_logits": -1.5434716939926147, "num_tokens": 1, "num_tokens_all": 986, "is_greedy": false, "logits_per_token": -1.5434716939926147, "logits_per_char": -0.7717358469963074, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 899, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4238238334655762, "incorrect_loss_raw": 1.4068760871887207, "correct_loss_per_char": 0.7119119167327881, "incorrect_loss_per_char": 0.7034380435943604, "correct_loss_per_token": 1.4238238334655762, "incorrect_loss_per_token": 1.4068760871887207, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1802256107330322, "num_tokens": 1, "num_tokens_all": 1056, "is_greedy": true, "logits_per_token": -1.1802256107330322, "logits_per_char": -0.5901128053665161, "num_chars": 2}, {"sum_logits": -1.3872392177581787, "num_tokens": 1, "num_tokens_all": 1056, "is_greedy": false, "logits_per_token": -1.3872392177581787, "logits_per_char": -0.6936196088790894, "num_chars": 2}, {"sum_logits": -1.6531634330749512, "num_tokens": 1, "num_tokens_all": 1056, "is_greedy": false, "logits_per_token": -1.6531634330749512, "logits_per_char": -0.8265817165374756, "num_chars": 2}, {"sum_logits": -1.4238238334655762, "num_tokens": 1, "num_tokens_all": 1056, "is_greedy": false, "logits_per_token": -1.4238238334655762, "logits_per_char": -0.7119119167327881, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 900, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3789260387420654, "incorrect_loss_raw": 1.4467437664667766, "correct_loss_per_char": 0.6894630193710327, "incorrect_loss_per_char": 0.7233718832333883, "correct_loss_per_token": 1.3789260387420654, "incorrect_loss_per_token": 1.4467437664667766, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0319019556045532, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": true, "logits_per_token": -1.0319019556045532, "logits_per_char": -0.5159509778022766, "num_chars": 2}, {"sum_logits": -1.3789260387420654, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": false, "logits_per_token": -1.3789260387420654, "logits_per_char": -0.6894630193710327, "num_chars": 2}, {"sum_logits": -1.7641726732254028, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": false, "logits_per_token": -1.7641726732254028, "logits_per_char": -0.8820863366127014, "num_chars": 2}, {"sum_logits": -1.5441566705703735, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": false, "logits_per_token": -1.5441566705703735, "logits_per_char": -0.7720783352851868, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 901, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1758952140808105, "incorrect_loss_raw": 1.5575517813364665, "correct_loss_per_char": 0.5879476070404053, "incorrect_loss_per_char": 0.7787758906682333, "correct_loss_per_token": 1.1758952140808105, "incorrect_loss_per_token": 1.5575517813364665, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0451610088348389, "num_tokens": 1, "num_tokens_all": 1098, "is_greedy": true, "logits_per_token": -1.0451610088348389, "logits_per_char": -0.5225805044174194, "num_chars": 2}, {"sum_logits": -1.1758952140808105, "num_tokens": 1, "num_tokens_all": 1098, "is_greedy": false, "logits_per_token": -1.1758952140808105, "logits_per_char": -0.5879476070404053, "num_chars": 2}, {"sum_logits": -1.9643902778625488, "num_tokens": 1, "num_tokens_all": 1098, "is_greedy": false, "logits_per_token": -1.9643902778625488, "logits_per_char": -0.9821951389312744, "num_chars": 2}, {"sum_logits": -1.6631040573120117, "num_tokens": 1, "num_tokens_all": 1098, "is_greedy": false, "logits_per_token": -1.6631040573120117, "logits_per_char": -0.8315520286560059, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 902, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6271324157714844, "incorrect_loss_raw": 1.3781154155731201, "correct_loss_per_char": 0.8135662078857422, "incorrect_loss_per_char": 0.6890577077865601, "correct_loss_per_token": 1.6271324157714844, "incorrect_loss_per_token": 1.3781154155731201, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0154614448547363, "num_tokens": 1, "num_tokens_all": 1078, "is_greedy": true, "logits_per_token": -1.0154614448547363, "logits_per_char": -0.5077307224273682, "num_chars": 2}, {"sum_logits": -1.397193193435669, "num_tokens": 1, "num_tokens_all": 1078, "is_greedy": false, "logits_per_token": -1.397193193435669, "logits_per_char": -0.6985965967178345, "num_chars": 2}, {"sum_logits": -1.721691608428955, "num_tokens": 1, "num_tokens_all": 1078, "is_greedy": false, "logits_per_token": -1.721691608428955, "logits_per_char": -0.8608458042144775, "num_chars": 2}, {"sum_logits": -1.6271324157714844, "num_tokens": 1, "num_tokens_all": 1078, "is_greedy": false, "logits_per_token": -1.6271324157714844, "logits_per_char": -0.8135662078857422, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 903, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0398426055908203, "incorrect_loss_raw": 1.6004191239674885, "correct_loss_per_char": 0.5199213027954102, "incorrect_loss_per_char": 0.8002095619837443, "correct_loss_per_token": 1.0398426055908203, "incorrect_loss_per_token": 1.6004191239674885, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0398426055908203, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": true, "logits_per_token": -1.0398426055908203, "logits_per_char": -0.5199213027954102, "num_chars": 2}, {"sum_logits": -1.1921412944793701, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.1921412944793701, "logits_per_char": -0.5960706472396851, "num_chars": 2}, {"sum_logits": -1.993229627609253, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.993229627609253, "logits_per_char": -0.9966148138046265, "num_chars": 2}, {"sum_logits": -1.6158864498138428, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.6158864498138428, "logits_per_char": -0.8079432249069214, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 904, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1638095378875732, "incorrect_loss_raw": 1.6131741205851238, "correct_loss_per_char": 0.5819047689437866, "incorrect_loss_per_char": 0.8065870602925619, "correct_loss_per_token": 1.1638095378875732, "incorrect_loss_per_token": 1.6131741205851238, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9445145130157471, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": true, "logits_per_token": -0.9445145130157471, "logits_per_char": -0.47225725650787354, "num_chars": 2}, {"sum_logits": -1.1638095378875732, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.1638095378875732, "logits_per_char": -0.5819047689437866, "num_chars": 2}, {"sum_logits": -2.114135265350342, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -2.114135265350342, "logits_per_char": -1.057067632675171, "num_chars": 2}, {"sum_logits": -1.7808725833892822, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.7808725833892822, "logits_per_char": -0.8904362916946411, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 905, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9480162858963013, "incorrect_loss_raw": 1.6502877076466878, "correct_loss_per_char": 0.47400814294815063, "incorrect_loss_per_char": 0.8251438538233439, "correct_loss_per_token": 0.9480162858963013, "incorrect_loss_per_token": 1.6502877076466878, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9480162858963013, "num_tokens": 1, "num_tokens_all": 1096, "is_greedy": true, "logits_per_token": -0.9480162858963013, "logits_per_char": -0.47400814294815063, "num_chars": 2}, {"sum_logits": -1.2456088066101074, "num_tokens": 1, "num_tokens_all": 1096, "is_greedy": false, "logits_per_token": -1.2456088066101074, "logits_per_char": -0.6228044033050537, "num_chars": 2}, {"sum_logits": -1.9145402908325195, "num_tokens": 1, "num_tokens_all": 1096, "is_greedy": false, "logits_per_token": -1.9145402908325195, "logits_per_char": -0.9572701454162598, "num_chars": 2}, {"sum_logits": -1.7907140254974365, "num_tokens": 1, "num_tokens_all": 1096, "is_greedy": false, "logits_per_token": -1.7907140254974365, "logits_per_char": -0.8953570127487183, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 906, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.997335433959961, "incorrect_loss_raw": 1.2879606088002522, "correct_loss_per_char": 0.9986677169799805, "incorrect_loss_per_char": 0.6439803044001261, "correct_loss_per_token": 1.997335433959961, "incorrect_loss_per_token": 1.2879606088002522, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9976043701171875, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": true, "logits_per_token": -0.9976043701171875, "logits_per_char": -0.49880218505859375, "num_chars": 2}, {"sum_logits": -1.2318123579025269, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.2318123579025269, "logits_per_char": -0.6159061789512634, "num_chars": 2}, {"sum_logits": -1.997335433959961, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.997335433959961, "logits_per_char": -0.9986677169799805, "num_chars": 2}, {"sum_logits": -1.6344650983810425, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.6344650983810425, "logits_per_char": -0.8172325491905212, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 907, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9832520484924316, "incorrect_loss_raw": 1.2793570359547932, "correct_loss_per_char": 0.9916260242462158, "incorrect_loss_per_char": 0.6396785179773966, "correct_loss_per_token": 1.9832520484924316, "incorrect_loss_per_token": 1.2793570359547932, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1076096296310425, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": true, "logits_per_token": -1.1076096296310425, "logits_per_char": -0.5538048148155212, "num_chars": 2}, {"sum_logits": -1.1408770084381104, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": false, "logits_per_token": -1.1408770084381104, "logits_per_char": -0.5704385042190552, "num_chars": 2}, {"sum_logits": -1.9832520484924316, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": false, "logits_per_token": -1.9832520484924316, "logits_per_char": -0.9916260242462158, "num_chars": 2}, {"sum_logits": -1.589584469795227, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": false, "logits_per_token": -1.589584469795227, "logits_per_char": -0.7947922348976135, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 908, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2082533836364746, "incorrect_loss_raw": 1.4764273564020793, "correct_loss_per_char": 0.6041266918182373, "incorrect_loss_per_char": 0.7382136782010397, "correct_loss_per_token": 1.2082533836364746, "incorrect_loss_per_token": 1.4764273564020793, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2082533836364746, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": true, "logits_per_token": -1.2082533836364746, "logits_per_char": -0.6041266918182373, "num_chars": 2}, {"sum_logits": -1.3031327724456787, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": false, "logits_per_token": -1.3031327724456787, "logits_per_char": -0.6515663862228394, "num_chars": 2}, {"sum_logits": -1.6256351470947266, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": false, "logits_per_token": -1.6256351470947266, "logits_per_char": -0.8128175735473633, "num_chars": 2}, {"sum_logits": -1.5005141496658325, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": false, "logits_per_token": -1.5005141496658325, "logits_per_char": -0.7502570748329163, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 909, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.314210295677185, "incorrect_loss_raw": 1.4767487446467082, "correct_loss_per_char": 0.6571051478385925, "incorrect_loss_per_char": 0.7383743723233541, "correct_loss_per_token": 1.314210295677185, "incorrect_loss_per_token": 1.4767487446467082, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.147643804550171, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": true, "logits_per_token": -1.147643804550171, "logits_per_char": -0.5738219022750854, "num_chars": 2}, {"sum_logits": -1.341546893119812, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": false, "logits_per_token": -1.341546893119812, "logits_per_char": -0.670773446559906, "num_chars": 2}, {"sum_logits": -1.9410555362701416, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": false, "logits_per_token": -1.9410555362701416, "logits_per_char": -0.9705277681350708, "num_chars": 2}, {"sum_logits": -1.314210295677185, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": false, "logits_per_token": -1.314210295677185, "logits_per_char": -0.6571051478385925, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 910, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0322167873382568, "incorrect_loss_raw": 1.557768185933431, "correct_loss_per_char": 0.5161083936691284, "incorrect_loss_per_char": 0.7788840929667155, "correct_loss_per_token": 1.0322167873382568, "incorrect_loss_per_token": 1.557768185933431, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0322167873382568, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -1.0322167873382568, "logits_per_char": -0.5161083936691284, "num_chars": 2}, {"sum_logits": -1.520616054534912, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.520616054534912, "logits_per_char": -0.760308027267456, "num_chars": 2}, {"sum_logits": -1.724172830581665, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.724172830581665, "logits_per_char": -0.8620864152908325, "num_chars": 2}, {"sum_logits": -1.4285156726837158, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.4285156726837158, "logits_per_char": -0.7142578363418579, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 911, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1381959915161133, "incorrect_loss_raw": 1.5899738470713298, "correct_loss_per_char": 0.5690979957580566, "incorrect_loss_per_char": 0.7949869235356649, "correct_loss_per_token": 1.1381959915161133, "incorrect_loss_per_token": 1.5899738470713298, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0068907737731934, "num_tokens": 1, "num_tokens_all": 1099, "is_greedy": true, "logits_per_token": -1.0068907737731934, "logits_per_char": -0.5034453868865967, "num_chars": 2}, {"sum_logits": -1.1381959915161133, "num_tokens": 1, "num_tokens_all": 1099, "is_greedy": false, "logits_per_token": -1.1381959915161133, "logits_per_char": -0.5690979957580566, "num_chars": 2}, {"sum_logits": -2.0122125148773193, "num_tokens": 1, "num_tokens_all": 1099, "is_greedy": false, "logits_per_token": -2.0122125148773193, "logits_per_char": -1.0061062574386597, "num_chars": 2}, {"sum_logits": -1.7508182525634766, "num_tokens": 1, "num_tokens_all": 1099, "is_greedy": false, "logits_per_token": -1.7508182525634766, "logits_per_char": -0.8754091262817383, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 912, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6852977275848389, "incorrect_loss_raw": 1.3821494579315186, "correct_loss_per_char": 0.8426488637924194, "incorrect_loss_per_char": 0.6910747289657593, "correct_loss_per_token": 1.6852977275848389, "incorrect_loss_per_token": 1.3821494579315186, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0632106065750122, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": true, "logits_per_token": -1.0632106065750122, "logits_per_char": -0.5316053032875061, "num_chars": 2}, {"sum_logits": -1.147443175315857, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": false, "logits_per_token": -1.147443175315857, "logits_per_char": -0.5737215876579285, "num_chars": 2}, {"sum_logits": -1.9357945919036865, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": false, "logits_per_token": -1.9357945919036865, "logits_per_char": -0.9678972959518433, "num_chars": 2}, {"sum_logits": -1.6852977275848389, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": false, "logits_per_token": -1.6852977275848389, "logits_per_char": -0.8426488637924194, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 913, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8577065467834473, "incorrect_loss_raw": 1.3167999188105266, "correct_loss_per_char": 0.9288532733917236, "incorrect_loss_per_char": 0.6583999594052633, "correct_loss_per_token": 1.8577065467834473, "incorrect_loss_per_token": 1.3167999188105266, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0287824869155884, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": true, "logits_per_token": -1.0287824869155884, "logits_per_char": -0.5143912434577942, "num_chars": 2}, {"sum_logits": -1.2212557792663574, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -1.2212557792663574, "logits_per_char": -0.6106278896331787, "num_chars": 2}, {"sum_logits": -1.8577065467834473, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -1.8577065467834473, "logits_per_char": -0.9288532733917236, "num_chars": 2}, {"sum_logits": -1.7003614902496338, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -1.7003614902496338, "logits_per_char": -0.8501807451248169, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 914, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.118194341659546, "incorrect_loss_raw": 1.5396597782770793, "correct_loss_per_char": 0.559097170829773, "incorrect_loss_per_char": 0.7698298891385397, "correct_loss_per_token": 1.118194341659546, "incorrect_loss_per_token": 1.5396597782770793, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.118194341659546, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": true, "logits_per_token": -1.118194341659546, "logits_per_char": -0.559097170829773, "num_chars": 2}, {"sum_logits": -1.230642557144165, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.230642557144165, "logits_per_char": -0.6153212785720825, "num_chars": 2}, {"sum_logits": -1.872767448425293, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.872767448425293, "logits_per_char": -0.9363837242126465, "num_chars": 2}, {"sum_logits": -1.5155693292617798, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.5155693292617798, "logits_per_char": -0.7577846646308899, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 915, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.913151741027832, "incorrect_loss_raw": 1.3113920291264851, "correct_loss_per_char": 0.956575870513916, "incorrect_loss_per_char": 0.6556960145632426, "correct_loss_per_token": 1.913151741027832, "incorrect_loss_per_token": 1.3113920291264851, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0367895364761353, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": true, "logits_per_token": -1.0367895364761353, "logits_per_char": -0.5183947682380676, "num_chars": 2}, {"sum_logits": -1.1666579246520996, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": false, "logits_per_token": -1.1666579246520996, "logits_per_char": -0.5833289623260498, "num_chars": 2}, {"sum_logits": -1.913151741027832, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": false, "logits_per_token": -1.913151741027832, "logits_per_char": -0.956575870513916, "num_chars": 2}, {"sum_logits": -1.7307286262512207, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": false, "logits_per_token": -1.7307286262512207, "logits_per_char": -0.8653643131256104, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 916, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1421490907669067, "incorrect_loss_raw": 1.5353639523188274, "correct_loss_per_char": 0.5710745453834534, "incorrect_loss_per_char": 0.7676819761594137, "correct_loss_per_token": 1.1421490907669067, "incorrect_loss_per_token": 1.5353639523188274, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2043864727020264, "num_tokens": 1, "num_tokens_all": 1114, "is_greedy": false, "logits_per_token": -1.2043864727020264, "logits_per_char": -0.6021932363510132, "num_chars": 2}, {"sum_logits": -1.1421490907669067, "num_tokens": 1, "num_tokens_all": 1114, "is_greedy": true, "logits_per_token": -1.1421490907669067, "logits_per_char": -0.5710745453834534, "num_chars": 2}, {"sum_logits": -1.8968605995178223, "num_tokens": 1, "num_tokens_all": 1114, "is_greedy": false, "logits_per_token": -1.8968605995178223, "logits_per_char": -0.9484302997589111, "num_chars": 2}, {"sum_logits": -1.5048447847366333, "num_tokens": 1, "num_tokens_all": 1114, "is_greedy": false, "logits_per_token": -1.5048447847366333, "logits_per_char": -0.7524223923683167, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 917, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.302414059638977, "incorrect_loss_raw": 1.5568986535072327, "correct_loss_per_char": 0.6512070298194885, "incorrect_loss_per_char": 0.7784493267536163, "correct_loss_per_token": 1.302414059638977, "incorrect_loss_per_token": 1.5568986535072327, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8733434081077576, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": true, "logits_per_token": -0.8733434081077576, "logits_per_char": -0.4366717040538788, "num_chars": 2}, {"sum_logits": -1.302414059638977, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.302414059638977, "logits_per_char": -0.6512070298194885, "num_chars": 2}, {"sum_logits": -1.998791217803955, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.998791217803955, "logits_per_char": -0.9993956089019775, "num_chars": 2}, {"sum_logits": -1.7985613346099854, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.7985613346099854, "logits_per_char": -0.8992806673049927, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 918, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2822883129119873, "incorrect_loss_raw": 1.4657598336537678, "correct_loss_per_char": 0.6411441564559937, "incorrect_loss_per_char": 0.7328799168268839, "correct_loss_per_token": 1.2822883129119873, "incorrect_loss_per_token": 1.4657598336537678, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2065558433532715, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": true, "logits_per_token": -1.2065558433532715, "logits_per_char": -0.6032779216766357, "num_chars": 2}, {"sum_logits": -1.2822883129119873, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.2822883129119873, "logits_per_char": -0.6411441564559937, "num_chars": 2}, {"sum_logits": -1.8160030841827393, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.8160030841827393, "logits_per_char": -0.9080015420913696, "num_chars": 2}, {"sum_logits": -1.374720573425293, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.374720573425293, "logits_per_char": -0.6873602867126465, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 919, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1789463758468628, "incorrect_loss_raw": 1.5585053364435832, "correct_loss_per_char": 0.5894731879234314, "incorrect_loss_per_char": 0.7792526682217916, "correct_loss_per_token": 1.1789463758468628, "incorrect_loss_per_token": 1.5585053364435832, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0499608516693115, "num_tokens": 1, "num_tokens_all": 1124, "is_greedy": true, "logits_per_token": -1.0499608516693115, "logits_per_char": -0.5249804258346558, "num_chars": 2}, {"sum_logits": -1.1789463758468628, "num_tokens": 1, "num_tokens_all": 1124, "is_greedy": false, "logits_per_token": -1.1789463758468628, "logits_per_char": -0.5894731879234314, "num_chars": 2}, {"sum_logits": -2.0417284965515137, "num_tokens": 1, "num_tokens_all": 1124, "is_greedy": false, "logits_per_token": -2.0417284965515137, "logits_per_char": -1.0208642482757568, "num_chars": 2}, {"sum_logits": -1.5838266611099243, "num_tokens": 1, "num_tokens_all": 1124, "is_greedy": false, "logits_per_token": -1.5838266611099243, "logits_per_char": -0.7919133305549622, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 920, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0233759880065918, "incorrect_loss_raw": 1.5655380090077717, "correct_loss_per_char": 0.5116879940032959, "incorrect_loss_per_char": 0.7827690045038859, "correct_loss_per_token": 1.0233759880065918, "incorrect_loss_per_token": 1.5655380090077717, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0233759880065918, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": true, "logits_per_token": -1.0233759880065918, "logits_per_char": -0.5116879940032959, "num_chars": 2}, {"sum_logits": -1.493692398071289, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": false, "logits_per_token": -1.493692398071289, "logits_per_char": -0.7468461990356445, "num_chars": 2}, {"sum_logits": -1.7120769023895264, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": false, "logits_per_token": -1.7120769023895264, "logits_per_char": -0.8560384511947632, "num_chars": 2}, {"sum_logits": -1.4908447265625, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": false, "logits_per_token": -1.4908447265625, "logits_per_char": -0.74542236328125, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 921, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6678013801574707, "incorrect_loss_raw": 1.3995401461919148, "correct_loss_per_char": 0.8339006900787354, "incorrect_loss_per_char": 0.6997700730959574, "correct_loss_per_token": 1.6678013801574707, "incorrect_loss_per_token": 1.3995401461919148, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0016016960144043, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": true, "logits_per_token": -1.0016016960144043, "logits_per_char": -0.5008008480072021, "num_chars": 2}, {"sum_logits": -1.2148733139038086, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -1.2148733139038086, "logits_per_char": -0.6074366569519043, "num_chars": 2}, {"sum_logits": -1.9821454286575317, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -1.9821454286575317, "logits_per_char": -0.9910727143287659, "num_chars": 2}, {"sum_logits": -1.6678013801574707, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -1.6678013801574707, "logits_per_char": -0.8339006900787354, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 922, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8140919208526611, "incorrect_loss_raw": 1.766363302866618, "correct_loss_per_char": 0.40704596042633057, "incorrect_loss_per_char": 0.883181651433309, "correct_loss_per_token": 0.8140919208526611, "incorrect_loss_per_token": 1.766363302866618, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8140919208526611, "num_tokens": 1, "num_tokens_all": 1124, "is_greedy": true, "logits_per_token": -0.8140919208526611, "logits_per_char": -0.40704596042633057, "num_chars": 2}, {"sum_logits": -1.3198153972625732, "num_tokens": 1, "num_tokens_all": 1124, "is_greedy": false, "logits_per_token": -1.3198153972625732, "logits_per_char": -0.6599076986312866, "num_chars": 2}, {"sum_logits": -2.1432836055755615, "num_tokens": 1, "num_tokens_all": 1124, "is_greedy": false, "logits_per_token": -2.1432836055755615, "logits_per_char": -1.0716418027877808, "num_chars": 2}, {"sum_logits": -1.8359909057617188, "num_tokens": 1, "num_tokens_all": 1124, "is_greedy": false, "logits_per_token": -1.8359909057617188, "logits_per_char": -0.9179954528808594, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 923, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.126905918121338, "incorrect_loss_raw": 1.5061003764470418, "correct_loss_per_char": 0.563452959060669, "incorrect_loss_per_char": 0.7530501882235209, "correct_loss_per_token": 1.126905918121338, "incorrect_loss_per_token": 1.5061003764470418, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.126905918121338, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": true, "logits_per_token": -1.126905918121338, "logits_per_char": -0.563452959060669, "num_chars": 2}, {"sum_logits": -1.444833517074585, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.444833517074585, "logits_per_char": -0.7224167585372925, "num_chars": 2}, {"sum_logits": -1.6481447219848633, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.6481447219848633, "logits_per_char": -0.8240723609924316, "num_chars": 2}, {"sum_logits": -1.4253228902816772, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.4253228902816772, "logits_per_char": -0.7126614451408386, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 924, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2194814682006836, "incorrect_loss_raw": 1.476122260093689, "correct_loss_per_char": 0.6097407341003418, "incorrect_loss_per_char": 0.7380611300468445, "correct_loss_per_token": 1.2194814682006836, "incorrect_loss_per_token": 1.476122260093689, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.265131950378418, "num_tokens": 1, "num_tokens_all": 995, "is_greedy": false, "logits_per_token": -1.265131950378418, "logits_per_char": -0.632565975189209, "num_chars": 2}, {"sum_logits": -1.2194814682006836, "num_tokens": 1, "num_tokens_all": 995, "is_greedy": true, "logits_per_token": -1.2194814682006836, "logits_per_char": -0.6097407341003418, "num_chars": 2}, {"sum_logits": -1.6839929819107056, "num_tokens": 1, "num_tokens_all": 995, "is_greedy": false, "logits_per_token": -1.6839929819107056, "logits_per_char": -0.8419964909553528, "num_chars": 2}, {"sum_logits": -1.4792418479919434, "num_tokens": 1, "num_tokens_all": 995, "is_greedy": false, "logits_per_token": -1.4792418479919434, "logits_per_char": -0.7396209239959717, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 925, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.225550651550293, "incorrect_loss_raw": 1.5118712186813354, "correct_loss_per_char": 0.6127753257751465, "incorrect_loss_per_char": 0.7559356093406677, "correct_loss_per_token": 1.225550651550293, "incorrect_loss_per_token": 1.5118712186813354, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0768643617630005, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": true, "logits_per_token": -1.0768643617630005, "logits_per_char": -0.5384321808815002, "num_chars": 2}, {"sum_logits": -1.225550651550293, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.225550651550293, "logits_per_char": -0.6127753257751465, "num_chars": 2}, {"sum_logits": -1.8177484273910522, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.8177484273910522, "logits_per_char": -0.9088742136955261, "num_chars": 2}, {"sum_logits": -1.6410008668899536, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.6410008668899536, "logits_per_char": -0.8205004334449768, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 926, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2329177856445312, "incorrect_loss_raw": 1.4848092397054036, "correct_loss_per_char": 0.6164588928222656, "incorrect_loss_per_char": 0.7424046198527018, "correct_loss_per_token": 1.2329177856445312, "incorrect_loss_per_token": 1.4848092397054036, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.167426347732544, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": true, "logits_per_token": -1.167426347732544, "logits_per_char": -0.583713173866272, "num_chars": 2}, {"sum_logits": -1.2329177856445312, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": false, "logits_per_token": -1.2329177856445312, "logits_per_char": -0.6164588928222656, "num_chars": 2}, {"sum_logits": -1.713293433189392, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": false, "logits_per_token": -1.713293433189392, "logits_per_char": -0.856646716594696, "num_chars": 2}, {"sum_logits": -1.573707938194275, "num_tokens": 1, "num_tokens_all": 1064, "is_greedy": false, "logits_per_token": -1.573707938194275, "logits_per_char": -0.7868539690971375, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 927, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7132277488708496, "incorrect_loss_raw": 1.3780192136764526, "correct_loss_per_char": 0.8566138744354248, "incorrect_loss_per_char": 0.6890096068382263, "correct_loss_per_token": 1.7132277488708496, "incorrect_loss_per_token": 1.3780192136764526, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0602954626083374, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": true, "logits_per_token": -1.0602954626083374, "logits_per_char": -0.5301477313041687, "num_chars": 2}, {"sum_logits": -1.1382956504821777, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.1382956504821777, "logits_per_char": -0.5691478252410889, "num_chars": 2}, {"sum_logits": -1.9354665279388428, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.9354665279388428, "logits_per_char": -0.9677332639694214, "num_chars": 2}, {"sum_logits": -1.7132277488708496, "num_tokens": 1, "num_tokens_all": 1079, "is_greedy": false, "logits_per_token": -1.7132277488708496, "logits_per_char": -0.8566138744354248, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 928, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.150100588798523, "incorrect_loss_raw": 1.545350710550944, "correct_loss_per_char": 0.5750502943992615, "incorrect_loss_per_char": 0.772675355275472, "correct_loss_per_token": 1.150100588798523, "incorrect_loss_per_token": 1.545350710550944, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.150100588798523, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -1.150100588798523, "logits_per_char": -0.5750502943992615, "num_chars": 2}, {"sum_logits": -1.1272897720336914, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": true, "logits_per_token": -1.1272897720336914, "logits_per_char": -0.5636448860168457, "num_chars": 2}, {"sum_logits": -1.9199353456497192, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -1.9199353456497192, "logits_per_char": -0.9599676728248596, "num_chars": 2}, {"sum_logits": -1.5888270139694214, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -1.5888270139694214, "logits_per_char": -0.7944135069847107, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 929, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.552016258239746, "incorrect_loss_raw": 1.3995202779769897, "correct_loss_per_char": 0.776008129119873, "incorrect_loss_per_char": 0.6997601389884949, "correct_loss_per_token": 1.552016258239746, "incorrect_loss_per_token": 1.3995202779769897, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1916444301605225, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -1.1916444301605225, "logits_per_char": -0.5958222150802612, "num_chars": 2}, {"sum_logits": -1.1285480260849, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": true, "logits_per_token": -1.1285480260849, "logits_per_char": -0.56427401304245, "num_chars": 2}, {"sum_logits": -1.8783683776855469, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -1.8783683776855469, "logits_per_char": -0.9391841888427734, "num_chars": 2}, {"sum_logits": -1.552016258239746, "num_tokens": 1, "num_tokens_all": 1115, "is_greedy": false, "logits_per_token": -1.552016258239746, "logits_per_char": -0.776008129119873, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 930, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4433598518371582, "incorrect_loss_raw": 1.391445239384969, "correct_loss_per_char": 0.7216799259185791, "incorrect_loss_per_char": 0.6957226196924845, "correct_loss_per_token": 1.4433598518371582, "incorrect_loss_per_token": 1.391445239384969, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2002136707305908, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": true, "logits_per_token": -1.2002136707305908, "logits_per_char": -0.6001068353652954, "num_chars": 2}, {"sum_logits": -1.3613613843917847, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -1.3613613843917847, "logits_per_char": -0.6806806921958923, "num_chars": 2}, {"sum_logits": -1.6127606630325317, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -1.6127606630325317, "logits_per_char": -0.8063803315162659, "num_chars": 2}, {"sum_logits": -1.4433598518371582, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -1.4433598518371582, "logits_per_char": -0.7216799259185791, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 931, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1125357151031494, "incorrect_loss_raw": 1.5476444164911907, "correct_loss_per_char": 0.5562678575515747, "incorrect_loss_per_char": 0.7738222082455953, "correct_loss_per_token": 1.1125357151031494, "incorrect_loss_per_token": 1.5476444164911907, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1125357151031494, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": true, "logits_per_token": -1.1125357151031494, "logits_per_char": -0.5562678575515747, "num_chars": 2}, {"sum_logits": -1.2238847017288208, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": false, "logits_per_token": -1.2238847017288208, "logits_per_char": -0.6119423508644104, "num_chars": 2}, {"sum_logits": -1.8953877687454224, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": false, "logits_per_token": -1.8953877687454224, "logits_per_char": -0.9476938843727112, "num_chars": 2}, {"sum_logits": -1.5236607789993286, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": false, "logits_per_token": -1.5236607789993286, "logits_per_char": -0.7618303894996643, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 932, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6912305355072021, "incorrect_loss_raw": 1.3450019359588623, "correct_loss_per_char": 0.8456152677536011, "incorrect_loss_per_char": 0.6725009679794312, "correct_loss_per_token": 1.6912305355072021, "incorrect_loss_per_token": 1.3450019359588623, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0350842475891113, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": true, "logits_per_token": -1.0350842475891113, "logits_per_char": -0.5175421237945557, "num_chars": 2}, {"sum_logits": -1.3782918453216553, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.3782918453216553, "logits_per_char": -0.6891459226608276, "num_chars": 2}, {"sum_logits": -1.6912305355072021, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.6912305355072021, "logits_per_char": -0.8456152677536011, "num_chars": 2}, {"sum_logits": -1.6216297149658203, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.6216297149658203, "logits_per_char": -0.8108148574829102, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 933, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1619699001312256, "incorrect_loss_raw": 1.5227874517440796, "correct_loss_per_char": 0.5809849500656128, "incorrect_loss_per_char": 0.7613937258720398, "correct_loss_per_token": 1.1619699001312256, "incorrect_loss_per_token": 1.5227874517440796, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1755613088607788, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.1755613088607788, "logits_per_char": -0.5877806544303894, "num_chars": 2}, {"sum_logits": -1.1619699001312256, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": true, "logits_per_token": -1.1619699001312256, "logits_per_char": -0.5809849500656128, "num_chars": 2}, {"sum_logits": -1.836677074432373, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.836677074432373, "logits_per_char": -0.9183385372161865, "num_chars": 2}, {"sum_logits": -1.556123971939087, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.556123971939087, "logits_per_char": -0.7780619859695435, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 934, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.01076340675354, "incorrect_loss_raw": 1.5875958998998005, "correct_loss_per_char": 0.50538170337677, "incorrect_loss_per_char": 0.7937979499499003, "correct_loss_per_token": 1.01076340675354, "incorrect_loss_per_token": 1.5875958998998005, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.01076340675354, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": true, "logits_per_token": -1.01076340675354, "logits_per_char": -0.50538170337677, "num_chars": 2}, {"sum_logits": -1.3369927406311035, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -1.3369927406311035, "logits_per_char": -0.6684963703155518, "num_chars": 2}, {"sum_logits": -1.8077746629714966, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -1.8077746629714966, "logits_per_char": -0.9038873314857483, "num_chars": 2}, {"sum_logits": -1.6180202960968018, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -1.6180202960968018, "logits_per_char": -0.8090101480484009, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 935, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2245697975158691, "incorrect_loss_raw": 1.4729685386021931, "correct_loss_per_char": 0.6122848987579346, "incorrect_loss_per_char": 0.7364842693010966, "correct_loss_per_token": 1.2245697975158691, "incorrect_loss_per_token": 1.4729685386021931, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2245697975158691, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": true, "logits_per_token": -1.2245697975158691, "logits_per_char": -0.6122848987579346, "num_chars": 2}, {"sum_logits": -1.2590258121490479, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": false, "logits_per_token": -1.2590258121490479, "logits_per_char": -0.6295129060745239, "num_chars": 2}, {"sum_logits": -1.6691484451293945, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": false, "logits_per_token": -1.6691484451293945, "logits_per_char": -0.8345742225646973, "num_chars": 2}, {"sum_logits": -1.4907313585281372, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": false, "logits_per_token": -1.4907313585281372, "logits_per_char": -0.7453656792640686, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 936, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0155415534973145, "incorrect_loss_raw": 1.28440260887146, "correct_loss_per_char": 1.0077707767486572, "incorrect_loss_per_char": 0.64220130443573, "correct_loss_per_token": 2.0155415534973145, "incorrect_loss_per_token": 1.28440260887146, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1074638366699219, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": false, "logits_per_token": -1.1074638366699219, "logits_per_char": -0.5537319183349609, "num_chars": 2}, {"sum_logits": -1.0878665447235107, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": true, "logits_per_token": -1.0878665447235107, "logits_per_char": -0.5439332723617554, "num_chars": 2}, {"sum_logits": -2.0155415534973145, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": false, "logits_per_token": -2.0155415534973145, "logits_per_char": -1.0077707767486572, "num_chars": 2}, {"sum_logits": -1.6578774452209473, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": false, "logits_per_token": -1.6578774452209473, "logits_per_char": -0.8289387226104736, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 937, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2759305238723755, "incorrect_loss_raw": 1.4535154898961384, "correct_loss_per_char": 0.6379652619361877, "incorrect_loss_per_char": 0.7267577449480692, "correct_loss_per_token": 1.2759305238723755, "incorrect_loss_per_token": 1.4535154898961384, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2693363428115845, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": true, "logits_per_token": -1.2693363428115845, "logits_per_char": -0.6346681714057922, "num_chars": 2}, {"sum_logits": -1.2759305238723755, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": false, "logits_per_token": -1.2759305238723755, "logits_per_char": -0.6379652619361877, "num_chars": 2}, {"sum_logits": -1.7159383296966553, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": false, "logits_per_token": -1.7159383296966553, "logits_per_char": -0.8579691648483276, "num_chars": 2}, {"sum_logits": -1.3752717971801758, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": false, "logits_per_token": -1.3752717971801758, "logits_per_char": -0.6876358985900879, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 938, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8599302768707275, "incorrect_loss_raw": 1.2985778252283733, "correct_loss_per_char": 0.9299651384353638, "incorrect_loss_per_char": 0.6492889126141866, "correct_loss_per_token": 1.8599302768707275, "incorrect_loss_per_token": 1.2985778252283733, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.045458197593689, "num_tokens": 1, "num_tokens_all": 1051, "is_greedy": true, "logits_per_token": -1.045458197593689, "logits_per_char": -0.5227290987968445, "num_chars": 2}, {"sum_logits": -1.283900260925293, "num_tokens": 1, "num_tokens_all": 1051, "is_greedy": false, "logits_per_token": -1.283900260925293, "logits_per_char": -0.6419501304626465, "num_chars": 2}, {"sum_logits": -1.8599302768707275, "num_tokens": 1, "num_tokens_all": 1051, "is_greedy": false, "logits_per_token": -1.8599302768707275, "logits_per_char": -0.9299651384353638, "num_chars": 2}, {"sum_logits": -1.5663750171661377, "num_tokens": 1, "num_tokens_all": 1051, "is_greedy": false, "logits_per_token": -1.5663750171661377, "logits_per_char": -0.7831875085830688, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 939, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9293591976165771, "incorrect_loss_raw": 1.2935491005579631, "correct_loss_per_char": 0.9646795988082886, "incorrect_loss_per_char": 0.6467745502789816, "correct_loss_per_token": 1.9293591976165771, "incorrect_loss_per_token": 1.2935491005579631, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0194138288497925, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": true, "logits_per_token": -1.0194138288497925, "logits_per_char": -0.5097069144248962, "num_chars": 2}, {"sum_logits": -1.252922773361206, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": false, "logits_per_token": -1.252922773361206, "logits_per_char": -0.626461386680603, "num_chars": 2}, {"sum_logits": -1.9293591976165771, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": false, "logits_per_token": -1.9293591976165771, "logits_per_char": -0.9646795988082886, "num_chars": 2}, {"sum_logits": -1.6083106994628906, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": false, "logits_per_token": -1.6083106994628906, "logits_per_char": -0.8041553497314453, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 940, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2700188159942627, "incorrect_loss_raw": 1.459716002146403, "correct_loss_per_char": 0.6350094079971313, "incorrect_loss_per_char": 0.7298580010732015, "correct_loss_per_token": 1.2700188159942627, "incorrect_loss_per_token": 1.459716002146403, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.235254168510437, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": true, "logits_per_token": -1.235254168510437, "logits_per_char": -0.6176270842552185, "num_chars": 2}, {"sum_logits": -1.2700188159942627, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": false, "logits_per_token": -1.2700188159942627, "logits_per_char": -0.6350094079971313, "num_chars": 2}, {"sum_logits": -1.7224786281585693, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": false, "logits_per_token": -1.7224786281585693, "logits_per_char": -0.8612393140792847, "num_chars": 2}, {"sum_logits": -1.4214152097702026, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": false, "logits_per_token": -1.4214152097702026, "logits_per_char": -0.7107076048851013, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 941, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.1986536979675293, "incorrect_loss_raw": 1.276712695757548, "correct_loss_per_char": 1.0993268489837646, "incorrect_loss_per_char": 0.638356347878774, "correct_loss_per_token": 2.1986536979675293, "incorrect_loss_per_token": 1.276712695757548, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.938970685005188, "num_tokens": 1, "num_tokens_all": 1128, "is_greedy": true, "logits_per_token": -0.938970685005188, "logits_per_char": -0.469485342502594, "num_chars": 2}, {"sum_logits": -1.1806917190551758, "num_tokens": 1, "num_tokens_all": 1128, "is_greedy": false, "logits_per_token": -1.1806917190551758, "logits_per_char": -0.5903458595275879, "num_chars": 2}, {"sum_logits": -2.1986536979675293, "num_tokens": 1, "num_tokens_all": 1128, "is_greedy": false, "logits_per_token": -2.1986536979675293, "logits_per_char": -1.0993268489837646, "num_chars": 2}, {"sum_logits": -1.7104756832122803, "num_tokens": 1, "num_tokens_all": 1128, "is_greedy": false, "logits_per_token": -1.7104756832122803, "logits_per_char": -0.8552378416061401, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 942, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.312374234199524, "incorrect_loss_raw": 1.4506349960962932, "correct_loss_per_char": 0.656187117099762, "incorrect_loss_per_char": 0.7253174980481466, "correct_loss_per_token": 1.312374234199524, "incorrect_loss_per_token": 1.4506349960962932, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1850028038024902, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": true, "logits_per_token": -1.1850028038024902, "logits_per_char": -0.5925014019012451, "num_chars": 2}, {"sum_logits": -1.394099235534668, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.394099235534668, "logits_per_char": -0.697049617767334, "num_chars": 2}, {"sum_logits": -1.7728029489517212, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.7728029489517212, "logits_per_char": -0.8864014744758606, "num_chars": 2}, {"sum_logits": -1.312374234199524, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.312374234199524, "logits_per_char": -0.656187117099762, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 943, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8069193363189697, "incorrect_loss_raw": 1.310786286989848, "correct_loss_per_char": 0.9034596681594849, "incorrect_loss_per_char": 0.655393143494924, "correct_loss_per_token": 1.8069193363189697, "incorrect_loss_per_token": 1.310786286989848, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.183039665222168, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.183039665222168, "logits_per_char": -0.591519832611084, "num_chars": 2}, {"sum_logits": -1.1699479818344116, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": true, "logits_per_token": -1.1699479818344116, "logits_per_char": -0.5849739909172058, "num_chars": 2}, {"sum_logits": -1.8069193363189697, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.8069193363189697, "logits_per_char": -0.9034596681594849, "num_chars": 2}, {"sum_logits": -1.5793712139129639, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.5793712139129639, "logits_per_char": -0.7896856069564819, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 944, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.219551920890808, "incorrect_loss_raw": 1.4676978190739949, "correct_loss_per_char": 0.609775960445404, "incorrect_loss_per_char": 0.7338489095369974, "correct_loss_per_token": 1.219551920890808, "incorrect_loss_per_token": 1.4676978190739949, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.219551920890808, "num_tokens": 1, "num_tokens_all": 948, "is_greedy": true, "logits_per_token": -1.219551920890808, "logits_per_char": -0.609775960445404, "num_chars": 2}, {"sum_logits": -1.410475254058838, "num_tokens": 1, "num_tokens_all": 948, "is_greedy": false, "logits_per_token": -1.410475254058838, "logits_per_char": -0.705237627029419, "num_chars": 2}, {"sum_logits": -1.646478533744812, "num_tokens": 1, "num_tokens_all": 948, "is_greedy": false, "logits_per_token": -1.646478533744812, "logits_per_char": -0.823239266872406, "num_chars": 2}, {"sum_logits": -1.346139669418335, "num_tokens": 1, "num_tokens_all": 948, "is_greedy": false, "logits_per_token": -1.346139669418335, "logits_per_char": -0.6730698347091675, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 945, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.678774356842041, "incorrect_loss_raw": 1.3657035032908122, "correct_loss_per_char": 0.8393871784210205, "incorrect_loss_per_char": 0.6828517516454061, "correct_loss_per_token": 1.678774356842041, "incorrect_loss_per_token": 1.3657035032908122, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.034773588180542, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": true, "logits_per_token": -1.034773588180542, "logits_per_char": -0.517386794090271, "num_chars": 2}, {"sum_logits": -1.276695728302002, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": false, "logits_per_token": -1.276695728302002, "logits_per_char": -0.638347864151001, "num_chars": 2}, {"sum_logits": -1.7856411933898926, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": false, "logits_per_token": -1.7856411933898926, "logits_per_char": -0.8928205966949463, "num_chars": 2}, {"sum_logits": -1.678774356842041, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": false, "logits_per_token": -1.678774356842041, "logits_per_char": -0.8393871784210205, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 946, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9383633136749268, "incorrect_loss_raw": 1.293412168820699, "correct_loss_per_char": 0.9691816568374634, "incorrect_loss_per_char": 0.6467060844103495, "correct_loss_per_token": 1.9383633136749268, "incorrect_loss_per_token": 1.293412168820699, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.062748908996582, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": true, "logits_per_token": -1.062748908996582, "logits_per_char": -0.531374454498291, "num_chars": 2}, {"sum_logits": -1.1755146980285645, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": false, "logits_per_token": -1.1755146980285645, "logits_per_char": -0.5877573490142822, "num_chars": 2}, {"sum_logits": -1.9383633136749268, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": false, "logits_per_token": -1.9383633136749268, "logits_per_char": -0.9691816568374634, "num_chars": 2}, {"sum_logits": -1.6419728994369507, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": false, "logits_per_token": -1.6419728994369507, "logits_per_char": -0.8209864497184753, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 947, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.10176682472229, "incorrect_loss_raw": 1.5911576747894287, "correct_loss_per_char": 0.550883412361145, "incorrect_loss_per_char": 0.7955788373947144, "correct_loss_per_token": 1.10176682472229, "incorrect_loss_per_token": 1.5911576747894287, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0558838844299316, "num_tokens": 1, "num_tokens_all": 1027, "is_greedy": true, "logits_per_token": -1.0558838844299316, "logits_per_char": -0.5279419422149658, "num_chars": 2}, {"sum_logits": -1.10176682472229, "num_tokens": 1, "num_tokens_all": 1027, "is_greedy": false, "logits_per_token": -1.10176682472229, "logits_per_char": -0.550883412361145, "num_chars": 2}, {"sum_logits": -1.8447202444076538, "num_tokens": 1, "num_tokens_all": 1027, "is_greedy": false, "logits_per_token": -1.8447202444076538, "logits_per_char": -0.9223601222038269, "num_chars": 2}, {"sum_logits": -1.8728688955307007, "num_tokens": 1, "num_tokens_all": 1027, "is_greedy": false, "logits_per_token": -1.8728688955307007, "logits_per_char": -0.9364344477653503, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 948, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0893325805664062, "incorrect_loss_raw": 1.5306939284006755, "correct_loss_per_char": 0.5446662902832031, "incorrect_loss_per_char": 0.7653469642003378, "correct_loss_per_token": 1.0893325805664062, "incorrect_loss_per_token": 1.5306939284006755, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0893325805664062, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -1.0893325805664062, "logits_per_char": -0.5446662902832031, "num_chars": 2}, {"sum_logits": -1.4089909791946411, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.4089909791946411, "logits_per_char": -0.7044954895973206, "num_chars": 2}, {"sum_logits": -1.7397633790969849, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.7397633790969849, "logits_per_char": -0.8698816895484924, "num_chars": 2}, {"sum_logits": -1.4433274269104004, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.4433274269104004, "logits_per_char": -0.7216637134552002, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 949, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5822566747665405, "incorrect_loss_raw": 1.3417328198750813, "correct_loss_per_char": 0.7911283373832703, "incorrect_loss_per_char": 0.6708664099375407, "correct_loss_per_token": 1.5822566747665405, "incorrect_loss_per_token": 1.3417328198750813, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2583425045013428, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": true, "logits_per_token": -1.2583425045013428, "logits_per_char": -0.6291712522506714, "num_chars": 2}, {"sum_logits": -1.2938072681427002, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.2938072681427002, "logits_per_char": -0.6469036340713501, "num_chars": 2}, {"sum_logits": -1.5822566747665405, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.5822566747665405, "logits_per_char": -0.7911283373832703, "num_chars": 2}, {"sum_logits": -1.4730486869812012, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.4730486869812012, "logits_per_char": -0.7365243434906006, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 950, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1555057764053345, "incorrect_loss_raw": 1.5663768847783406, "correct_loss_per_char": 0.5777528882026672, "incorrect_loss_per_char": 0.7831884423891703, "correct_loss_per_token": 1.1555057764053345, "incorrect_loss_per_token": 1.5663768847783406, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0632997751235962, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": true, "logits_per_token": -1.0632997751235962, "logits_per_char": -0.5316498875617981, "num_chars": 2}, {"sum_logits": -1.1555057764053345, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": false, "logits_per_token": -1.1555057764053345, "logits_per_char": -0.5777528882026672, "num_chars": 2}, {"sum_logits": -2.031561851501465, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": false, "logits_per_token": -2.031561851501465, "logits_per_char": -1.0157809257507324, "num_chars": 2}, {"sum_logits": -1.604269027709961, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": false, "logits_per_token": -1.604269027709961, "logits_per_char": -0.8021345138549805, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 951, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7786235809326172, "incorrect_loss_raw": 1.3288873036702473, "correct_loss_per_char": 0.8893117904663086, "incorrect_loss_per_char": 0.6644436518351237, "correct_loss_per_token": 1.7786235809326172, "incorrect_loss_per_token": 1.3288873036702473, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0490562915802002, "num_tokens": 1, "num_tokens_all": 1048, "is_greedy": true, "logits_per_token": -1.0490562915802002, "logits_per_char": -0.5245281457901001, "num_chars": 2}, {"sum_logits": -1.2528762817382812, "num_tokens": 1, "num_tokens_all": 1048, "is_greedy": false, "logits_per_token": -1.2528762817382812, "logits_per_char": -0.6264381408691406, "num_chars": 2}, {"sum_logits": -1.7786235809326172, "num_tokens": 1, "num_tokens_all": 1048, "is_greedy": false, "logits_per_token": -1.7786235809326172, "logits_per_char": -0.8893117904663086, "num_chars": 2}, {"sum_logits": -1.6847293376922607, "num_tokens": 1, "num_tokens_all": 1048, "is_greedy": false, "logits_per_token": -1.6847293376922607, "logits_per_char": -0.8423646688461304, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 952, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1600518226623535, "incorrect_loss_raw": 1.5342101256052654, "correct_loss_per_char": 0.5800259113311768, "incorrect_loss_per_char": 0.7671050628026327, "correct_loss_per_token": 1.1600518226623535, "incorrect_loss_per_token": 1.5342101256052654, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1600518226623535, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": true, "logits_per_token": -1.1600518226623535, "logits_per_char": -0.5800259113311768, "num_chars": 2}, {"sum_logits": -1.2172621488571167, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -1.2172621488571167, "logits_per_char": -0.6086310744285583, "num_chars": 2}, {"sum_logits": -1.9688609838485718, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -1.9688609838485718, "logits_per_char": -0.9844304919242859, "num_chars": 2}, {"sum_logits": -1.4165072441101074, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -1.4165072441101074, "logits_per_char": -0.7082536220550537, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 953, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1966012716293335, "incorrect_loss_raw": 1.514292558034261, "correct_loss_per_char": 0.5983006358146667, "incorrect_loss_per_char": 0.7571462790171305, "correct_loss_per_token": 1.1966012716293335, "incorrect_loss_per_token": 1.514292558034261, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1966012716293335, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": true, "logits_per_token": -1.1966012716293335, "logits_per_char": -0.5983006358146667, "num_chars": 2}, {"sum_logits": -1.2838448286056519, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.2838448286056519, "logits_per_char": -0.6419224143028259, "num_chars": 2}, {"sum_logits": -1.955488920211792, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.955488920211792, "logits_per_char": -0.977744460105896, "num_chars": 2}, {"sum_logits": -1.3035439252853394, "num_tokens": 1, "num_tokens_all": 1076, "is_greedy": false, "logits_per_token": -1.3035439252853394, "logits_per_char": -0.6517719626426697, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 954, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3052401542663574, "incorrect_loss_raw": 1.44113290309906, "correct_loss_per_char": 0.6526200771331787, "incorrect_loss_per_char": 0.72056645154953, "correct_loss_per_token": 1.3052401542663574, "incorrect_loss_per_token": 1.44113290309906, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2167857885360718, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": true, "logits_per_token": -1.2167857885360718, "logits_per_char": -0.6083928942680359, "num_chars": 2}, {"sum_logits": -1.3052401542663574, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.3052401542663574, "logits_per_char": -0.6526200771331787, "num_chars": 2}, {"sum_logits": -1.6423088312149048, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.6423088312149048, "logits_per_char": -0.8211544156074524, "num_chars": 2}, {"sum_logits": -1.4643040895462036, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.4643040895462036, "logits_per_char": -0.7321520447731018, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 955, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6734765768051147, "incorrect_loss_raw": 1.3330168724060059, "correct_loss_per_char": 0.8367382884025574, "incorrect_loss_per_char": 0.6665084362030029, "correct_loss_per_token": 1.6734765768051147, "incorrect_loss_per_token": 1.3330168724060059, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0777608156204224, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": true, "logits_per_token": -1.0777608156204224, "logits_per_char": -0.5388804078102112, "num_chars": 2}, {"sum_logits": -1.4210494756698608, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": false, "logits_per_token": -1.4210494756698608, "logits_per_char": -0.7105247378349304, "num_chars": 2}, {"sum_logits": -1.6734765768051147, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": false, "logits_per_token": -1.6734765768051147, "logits_per_char": -0.8367382884025574, "num_chars": 2}, {"sum_logits": -1.5002403259277344, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": false, "logits_per_token": -1.5002403259277344, "logits_per_char": -0.7501201629638672, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 956, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.163399338722229, "incorrect_loss_raw": 1.542158842086792, "correct_loss_per_char": 0.5816996693611145, "incorrect_loss_per_char": 0.771079421043396, "correct_loss_per_token": 1.163399338722229, "incorrect_loss_per_token": 1.542158842086792, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1007345914840698, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": true, "logits_per_token": -1.1007345914840698, "logits_per_char": -0.5503672957420349, "num_chars": 2}, {"sum_logits": -1.163399338722229, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.163399338722229, "logits_per_char": -0.5816996693611145, "num_chars": 2}, {"sum_logits": -1.8932017087936401, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.8932017087936401, "logits_per_char": -0.9466008543968201, "num_chars": 2}, {"sum_logits": -1.632540225982666, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.632540225982666, "logits_per_char": -0.816270112991333, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 957, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1189486980438232, "incorrect_loss_raw": 1.5605664650599163, "correct_loss_per_char": 0.5594743490219116, "incorrect_loss_per_char": 0.7802832325299581, "correct_loss_per_token": 1.1189486980438232, "incorrect_loss_per_token": 1.5605664650599163, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.148114800453186, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -1.148114800453186, "logits_per_char": -0.574057400226593, "num_chars": 2}, {"sum_logits": -1.1189486980438232, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": true, "logits_per_token": -1.1189486980438232, "logits_per_char": -0.5594743490219116, "num_chars": 2}, {"sum_logits": -1.9524825811386108, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -1.9524825811386108, "logits_per_char": -0.9762412905693054, "num_chars": 2}, {"sum_logits": -1.5811020135879517, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -1.5811020135879517, "logits_per_char": -0.7905510067939758, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 958, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0861376523971558, "incorrect_loss_raw": 1.571916103363037, "correct_loss_per_char": 0.5430688261985779, "incorrect_loss_per_char": 0.7859580516815186, "correct_loss_per_token": 1.0861376523971558, "incorrect_loss_per_token": 1.571916103363037, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0861376523971558, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": true, "logits_per_token": -1.0861376523971558, "logits_per_char": -0.5430688261985779, "num_chars": 2}, {"sum_logits": -1.183954119682312, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.183954119682312, "logits_per_char": -0.591977059841156, "num_chars": 2}, {"sum_logits": -1.9395394325256348, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.9395394325256348, "logits_per_char": -0.9697697162628174, "num_chars": 2}, {"sum_logits": -1.5922547578811646, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.5922547578811646, "logits_per_char": -0.7961273789405823, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 959, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6953911781311035, "incorrect_loss_raw": 1.344200591246287, "correct_loss_per_char": 0.8476955890655518, "incorrect_loss_per_char": 0.6721002956231436, "correct_loss_per_token": 1.6953911781311035, "incorrect_loss_per_token": 1.344200591246287, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9993273615837097, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": true, "logits_per_token": -0.9993273615837097, "logits_per_char": -0.49966368079185486, "num_chars": 2}, {"sum_logits": -1.4361345767974854, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": false, "logits_per_token": -1.4361345767974854, "logits_per_char": -0.7180672883987427, "num_chars": 2}, {"sum_logits": -1.6953911781311035, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": false, "logits_per_token": -1.6953911781311035, "logits_per_char": -0.8476955890655518, "num_chars": 2}, {"sum_logits": -1.597139835357666, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": false, "logits_per_token": -1.597139835357666, "logits_per_char": -0.798569917678833, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 960, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.682743787765503, "incorrect_loss_raw": 1.3978453477223713, "correct_loss_per_char": 0.8413718938827515, "incorrect_loss_per_char": 0.6989226738611857, "correct_loss_per_token": 1.682743787765503, "incorrect_loss_per_token": 1.3978453477223713, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0681556463241577, "num_tokens": 1, "num_tokens_all": 1106, "is_greedy": true, "logits_per_token": -1.0681556463241577, "logits_per_char": -0.5340778231620789, "num_chars": 2}, {"sum_logits": -1.126561164855957, "num_tokens": 1, "num_tokens_all": 1106, "is_greedy": false, "logits_per_token": -1.126561164855957, "logits_per_char": -0.5632805824279785, "num_chars": 2}, {"sum_logits": -1.9988192319869995, "num_tokens": 1, "num_tokens_all": 1106, "is_greedy": false, "logits_per_token": -1.9988192319869995, "logits_per_char": -0.9994096159934998, "num_chars": 2}, {"sum_logits": -1.682743787765503, "num_tokens": 1, "num_tokens_all": 1106, "is_greedy": false, "logits_per_token": -1.682743787765503, "logits_per_char": -0.8413718938827515, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 961, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1994528770446777, "incorrect_loss_raw": 1.5562711556752522, "correct_loss_per_char": 0.5997264385223389, "incorrect_loss_per_char": 0.7781355778376261, "correct_loss_per_token": 1.1994528770446777, "incorrect_loss_per_token": 1.5562711556752522, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0166456699371338, "num_tokens": 1, "num_tokens_all": 1129, "is_greedy": true, "logits_per_token": -1.0166456699371338, "logits_per_char": -0.5083228349685669, "num_chars": 2}, {"sum_logits": -1.1994528770446777, "num_tokens": 1, "num_tokens_all": 1129, "is_greedy": false, "logits_per_token": -1.1994528770446777, "logits_per_char": -0.5997264385223389, "num_chars": 2}, {"sum_logits": -2.018648862838745, "num_tokens": 1, "num_tokens_all": 1129, "is_greedy": false, "logits_per_token": -2.018648862838745, "logits_per_char": -1.0093244314193726, "num_chars": 2}, {"sum_logits": -1.633518934249878, "num_tokens": 1, "num_tokens_all": 1129, "is_greedy": false, "logits_per_token": -1.633518934249878, "logits_per_char": -0.816759467124939, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 962, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7789028882980347, "incorrect_loss_raw": 1.320198655128479, "correct_loss_per_char": 0.8894514441490173, "incorrect_loss_per_char": 0.6600993275642395, "correct_loss_per_token": 1.7789028882980347, "incorrect_loss_per_token": 1.320198655128479, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1408076286315918, "num_tokens": 1, "num_tokens_all": 1046, "is_greedy": true, "logits_per_token": -1.1408076286315918, "logits_per_char": -0.5704038143157959, "num_chars": 2}, {"sum_logits": -1.1662628650665283, "num_tokens": 1, "num_tokens_all": 1046, "is_greedy": false, "logits_per_token": -1.1662628650665283, "logits_per_char": -0.5831314325332642, "num_chars": 2}, {"sum_logits": -1.7789028882980347, "num_tokens": 1, "num_tokens_all": 1046, "is_greedy": false, "logits_per_token": -1.7789028882980347, "logits_per_char": -0.8894514441490173, "num_chars": 2}, {"sum_logits": -1.653525471687317, "num_tokens": 1, "num_tokens_all": 1046, "is_greedy": false, "logits_per_token": -1.653525471687317, "logits_per_char": -0.8267627358436584, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 963, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1323837041854858, "incorrect_loss_raw": 1.5198884805043538, "correct_loss_per_char": 0.5661918520927429, "incorrect_loss_per_char": 0.7599442402521769, "correct_loss_per_token": 1.1323837041854858, "incorrect_loss_per_token": 1.5198884805043538, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1323837041854858, "num_tokens": 1, "num_tokens_all": 1047, "is_greedy": true, "logits_per_token": -1.1323837041854858, "logits_per_char": -0.5661918520927429, "num_chars": 2}, {"sum_logits": -1.2509746551513672, "num_tokens": 1, "num_tokens_all": 1047, "is_greedy": false, "logits_per_token": -1.2509746551513672, "logits_per_char": -0.6254873275756836, "num_chars": 2}, {"sum_logits": -1.6783076524734497, "num_tokens": 1, "num_tokens_all": 1047, "is_greedy": false, "logits_per_token": -1.6783076524734497, "logits_per_char": -0.8391538262367249, "num_chars": 2}, {"sum_logits": -1.6303831338882446, "num_tokens": 1, "num_tokens_all": 1047, "is_greedy": false, "logits_per_token": -1.6303831338882446, "logits_per_char": -0.8151915669441223, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 964, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7372181415557861, "incorrect_loss_raw": 1.3626103003819783, "correct_loss_per_char": 0.8686090707778931, "incorrect_loss_per_char": 0.6813051501909891, "correct_loss_per_token": 1.7372181415557861, "incorrect_loss_per_token": 1.3626103003819783, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.001705527305603, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": true, "logits_per_token": -1.001705527305603, "logits_per_char": -0.5008527636528015, "num_chars": 2}, {"sum_logits": -1.2555711269378662, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": false, "logits_per_token": -1.2555711269378662, "logits_per_char": -0.6277855634689331, "num_chars": 2}, {"sum_logits": -1.8305542469024658, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": false, "logits_per_token": -1.8305542469024658, "logits_per_char": -0.9152771234512329, "num_chars": 2}, {"sum_logits": -1.7372181415557861, "num_tokens": 1, "num_tokens_all": 1061, "is_greedy": false, "logits_per_token": -1.7372181415557861, "logits_per_char": -0.8686090707778931, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 965, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8826290369033813, "incorrect_loss_raw": 1.3123647371927898, "correct_loss_per_char": 0.9413145184516907, "incorrect_loss_per_char": 0.6561823685963949, "correct_loss_per_token": 1.8826290369033813, "incorrect_loss_per_token": 1.3123647371927898, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0298575162887573, "num_tokens": 1, "num_tokens_all": 1074, "is_greedy": true, "logits_per_token": -1.0298575162887573, "logits_per_char": -0.5149287581443787, "num_chars": 2}, {"sum_logits": -1.206394910812378, "num_tokens": 1, "num_tokens_all": 1074, "is_greedy": false, "logits_per_token": -1.206394910812378, "logits_per_char": -0.603197455406189, "num_chars": 2}, {"sum_logits": -1.8826290369033813, "num_tokens": 1, "num_tokens_all": 1074, "is_greedy": false, "logits_per_token": -1.8826290369033813, "logits_per_char": -0.9413145184516907, "num_chars": 2}, {"sum_logits": -1.7008417844772339, "num_tokens": 1, "num_tokens_all": 1074, "is_greedy": false, "logits_per_token": -1.7008417844772339, "logits_per_char": -0.8504208922386169, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 966, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9688540697097778, "incorrect_loss_raw": 1.663530667622884, "correct_loss_per_char": 0.4844270348548889, "incorrect_loss_per_char": 0.831765333811442, "correct_loss_per_token": 0.9688540697097778, "incorrect_loss_per_token": 1.663530667622884, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9688540697097778, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": true, "logits_per_token": -0.9688540697097778, "logits_per_char": -0.4844270348548889, "num_chars": 2}, {"sum_logits": -1.158632516860962, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.158632516860962, "logits_per_char": -0.579316258430481, "num_chars": 2}, {"sum_logits": -2.048867702484131, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -2.048867702484131, "logits_per_char": -1.0244338512420654, "num_chars": 2}, {"sum_logits": -1.7830917835235596, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.7830917835235596, "logits_per_char": -0.8915458917617798, "num_chars": 2}], "label": 0, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 967, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3628792762756348, "incorrect_loss_raw": 1.4228593111038208, "correct_loss_per_char": 0.6814396381378174, "incorrect_loss_per_char": 0.7114296555519104, "correct_loss_per_token": 1.3628792762756348, "incorrect_loss_per_token": 1.4228593111038208, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1911081075668335, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": true, "logits_per_token": -1.1911081075668335, "logits_per_char": -0.5955540537834167, "num_chars": 2}, {"sum_logits": -1.3628792762756348, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": false, "logits_per_token": -1.3628792762756348, "logits_per_char": -0.6814396381378174, "num_chars": 2}, {"sum_logits": -1.677612543106079, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": false, "logits_per_token": -1.677612543106079, "logits_per_char": -0.8388062715530396, "num_chars": 2}, {"sum_logits": -1.3998572826385498, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": false, "logits_per_token": -1.3998572826385498, "logits_per_char": -0.6999286413192749, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 968, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.455262541770935, "incorrect_loss_raw": 1.4055004914601643, "correct_loss_per_char": 0.7276312708854675, "incorrect_loss_per_char": 0.7027502457300822, "correct_loss_per_token": 1.455262541770935, "incorrect_loss_per_token": 1.4055004914601643, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.104570746421814, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": true, "logits_per_token": -1.104570746421814, "logits_per_char": -0.552285373210907, "num_chars": 2}, {"sum_logits": -1.455262541770935, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -1.455262541770935, "logits_per_char": -0.7276312708854675, "num_chars": 2}, {"sum_logits": -1.7305679321289062, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -1.7305679321289062, "logits_per_char": -0.8652839660644531, "num_chars": 2}, {"sum_logits": -1.381362795829773, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -1.381362795829773, "logits_per_char": -0.6906813979148865, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 969, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8031812906265259, "incorrect_loss_raw": 1.3211056391398113, "correct_loss_per_char": 0.9015906453132629, "incorrect_loss_per_char": 0.6605528195699056, "correct_loss_per_token": 1.8031812906265259, "incorrect_loss_per_token": 1.3211056391398113, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0939639806747437, "num_tokens": 1, "num_tokens_all": 1058, "is_greedy": true, "logits_per_token": -1.0939639806747437, "logits_per_char": -0.5469819903373718, "num_chars": 2}, {"sum_logits": -1.199784517288208, "num_tokens": 1, "num_tokens_all": 1058, "is_greedy": false, "logits_per_token": -1.199784517288208, "logits_per_char": -0.599892258644104, "num_chars": 2}, {"sum_logits": -1.8031812906265259, "num_tokens": 1, "num_tokens_all": 1058, "is_greedy": false, "logits_per_token": -1.8031812906265259, "logits_per_char": -0.9015906453132629, "num_chars": 2}, {"sum_logits": -1.669568419456482, "num_tokens": 1, "num_tokens_all": 1058, "is_greedy": false, "logits_per_token": -1.669568419456482, "logits_per_char": -0.834784209728241, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 970, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.122931718826294, "incorrect_loss_raw": 1.5626731316248577, "correct_loss_per_char": 0.561465859413147, "incorrect_loss_per_char": 0.7813365658124288, "correct_loss_per_token": 1.122931718826294, "incorrect_loss_per_token": 1.5626731316248577, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.119741439819336, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": true, "logits_per_token": -1.119741439819336, "logits_per_char": -0.559870719909668, "num_chars": 2}, {"sum_logits": -1.122931718826294, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": false, "logits_per_token": -1.122931718826294, "logits_per_char": -0.561465859413147, "num_chars": 2}, {"sum_logits": -1.8624123334884644, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": false, "logits_per_token": -1.8624123334884644, "logits_per_char": -0.9312061667442322, "num_chars": 2}, {"sum_logits": -1.7058656215667725, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": false, "logits_per_token": -1.7058656215667725, "logits_per_char": -0.8529328107833862, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 971, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9525842666625977, "incorrect_loss_raw": 1.321912129720052, "correct_loss_per_char": 0.9762921333312988, "incorrect_loss_per_char": 0.660956064860026, "correct_loss_per_token": 1.9525842666625977, "incorrect_loss_per_token": 1.321912129720052, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.967268705368042, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": true, "logits_per_token": -0.967268705368042, "logits_per_char": -0.483634352684021, "num_chars": 2}, {"sum_logits": -1.2011616230010986, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -1.2011616230010986, "logits_per_char": -0.6005808115005493, "num_chars": 2}, {"sum_logits": -1.9525842666625977, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -1.9525842666625977, "logits_per_char": -0.9762921333312988, "num_chars": 2}, {"sum_logits": -1.7973060607910156, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -1.7973060607910156, "logits_per_char": -0.8986530303955078, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 972, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3165227174758911, "incorrect_loss_raw": 1.4517483313878377, "correct_loss_per_char": 0.6582613587379456, "incorrect_loss_per_char": 0.7258741656939188, "correct_loss_per_token": 1.3165227174758911, "incorrect_loss_per_token": 1.4517483313878377, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1368954181671143, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": true, "logits_per_token": -1.1368954181671143, "logits_per_char": -0.5684477090835571, "num_chars": 2}, {"sum_logits": -1.3165227174758911, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": false, "logits_per_token": -1.3165227174758911, "logits_per_char": -0.6582613587379456, "num_chars": 2}, {"sum_logits": -1.7175225019454956, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": false, "logits_per_token": -1.7175225019454956, "logits_per_char": -0.8587612509727478, "num_chars": 2}, {"sum_logits": -1.5008270740509033, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": false, "logits_per_token": -1.5008270740509033, "logits_per_char": -0.7504135370254517, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 973, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2502630949020386, "incorrect_loss_raw": 1.6040101051330566, "correct_loss_per_char": 0.6251315474510193, "incorrect_loss_per_char": 0.8020050525665283, "correct_loss_per_token": 1.2502630949020386, "incorrect_loss_per_token": 1.6040101051330566, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8513574600219727, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": true, "logits_per_token": -0.8513574600219727, "logits_per_char": -0.42567873001098633, "num_chars": 2}, {"sum_logits": -1.2502630949020386, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": false, "logits_per_token": -1.2502630949020386, "logits_per_char": -0.6251315474510193, "num_chars": 2}, {"sum_logits": -2.0820577144622803, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": false, "logits_per_token": -2.0820577144622803, "logits_per_char": -1.0410288572311401, "num_chars": 2}, {"sum_logits": -1.878615140914917, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": false, "logits_per_token": -1.878615140914917, "logits_per_char": -0.9393075704574585, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 974, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8499032258987427, "incorrect_loss_raw": 1.295046091079712, "correct_loss_per_char": 0.9249516129493713, "incorrect_loss_per_char": 0.647523045539856, "correct_loss_per_token": 1.8499032258987427, "incorrect_loss_per_token": 1.295046091079712, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1343339681625366, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": true, "logits_per_token": -1.1343339681625366, "logits_per_char": -0.5671669840812683, "num_chars": 2}, {"sum_logits": -1.3088796138763428, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": false, "logits_per_token": -1.3088796138763428, "logits_per_char": -0.6544398069381714, "num_chars": 2}, {"sum_logits": -1.8499032258987427, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": false, "logits_per_token": -1.8499032258987427, "logits_per_char": -0.9249516129493713, "num_chars": 2}, {"sum_logits": -1.4419246912002563, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": false, "logits_per_token": -1.4419246912002563, "logits_per_char": -0.7209623456001282, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 975, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6579445600509644, "incorrect_loss_raw": 1.3629088401794434, "correct_loss_per_char": 0.8289722800254822, "incorrect_loss_per_char": 0.6814544200897217, "correct_loss_per_token": 1.6579445600509644, "incorrect_loss_per_token": 1.3629088401794434, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0815988779067993, "num_tokens": 1, "num_tokens_all": 1034, "is_greedy": true, "logits_per_token": -1.0815988779067993, "logits_per_char": -0.5407994389533997, "num_chars": 2}, {"sum_logits": -1.2287517786026, "num_tokens": 1, "num_tokens_all": 1034, "is_greedy": false, "logits_per_token": -1.2287517786026, "logits_per_char": -0.6143758893013, "num_chars": 2}, {"sum_logits": -1.7783758640289307, "num_tokens": 1, "num_tokens_all": 1034, "is_greedy": false, "logits_per_token": -1.7783758640289307, "logits_per_char": -0.8891879320144653, "num_chars": 2}, {"sum_logits": -1.6579445600509644, "num_tokens": 1, "num_tokens_all": 1034, "is_greedy": false, "logits_per_token": -1.6579445600509644, "logits_per_char": -0.8289722800254822, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 976, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1584901809692383, "incorrect_loss_raw": 1.652218182881673, "correct_loss_per_char": 0.5792450904846191, "incorrect_loss_per_char": 0.8261090914408366, "correct_loss_per_token": 1.1584901809692383, "incorrect_loss_per_token": 1.652218182881673, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8925752639770508, "num_tokens": 1, "num_tokens_all": 1119, "is_greedy": true, "logits_per_token": -0.8925752639770508, "logits_per_char": -0.4462876319885254, "num_chars": 2}, {"sum_logits": -1.1584901809692383, "num_tokens": 1, "num_tokens_all": 1119, "is_greedy": false, "logits_per_token": -1.1584901809692383, "logits_per_char": -0.5792450904846191, "num_chars": 2}, {"sum_logits": -2.2595674991607666, "num_tokens": 1, "num_tokens_all": 1119, "is_greedy": false, "logits_per_token": -2.2595674991607666, "logits_per_char": -1.1297837495803833, "num_chars": 2}, {"sum_logits": -1.8045117855072021, "num_tokens": 1, "num_tokens_all": 1119, "is_greedy": false, "logits_per_token": -1.8045117855072021, "logits_per_char": -0.9022558927536011, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 977, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6549304723739624, "incorrect_loss_raw": 1.3674356540044148, "correct_loss_per_char": 0.8274652361869812, "incorrect_loss_per_char": 0.6837178270022074, "correct_loss_per_token": 1.6549304723739624, "incorrect_loss_per_token": 1.3674356540044148, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0725526809692383, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": true, "logits_per_token": -1.0725526809692383, "logits_per_char": -0.5362763404846191, "num_chars": 2}, {"sum_logits": -1.2330821752548218, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.2330821752548218, "logits_per_char": -0.6165410876274109, "num_chars": 2}, {"sum_logits": -1.7966721057891846, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.7966721057891846, "logits_per_char": -0.8983360528945923, "num_chars": 2}, {"sum_logits": -1.6549304723739624, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.6549304723739624, "logits_per_char": -0.8274652361869812, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 978, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4754832983016968, "incorrect_loss_raw": 1.4028895298639934, "correct_loss_per_char": 0.7377416491508484, "incorrect_loss_per_char": 0.7014447649319967, "correct_loss_per_token": 1.4754832983016968, "incorrect_loss_per_token": 1.4028895298639934, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1539767980575562, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": true, "logits_per_token": -1.1539767980575562, "logits_per_char": -0.5769883990287781, "num_chars": 2}, {"sum_logits": -1.2702091932296753, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -1.2702091932296753, "logits_per_char": -0.6351045966148376, "num_chars": 2}, {"sum_logits": -1.7844825983047485, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -1.7844825983047485, "logits_per_char": -0.8922412991523743, "num_chars": 2}, {"sum_logits": -1.4754832983016968, "num_tokens": 1, "num_tokens_all": 1084, "is_greedy": false, "logits_per_token": -1.4754832983016968, "logits_per_char": -0.7377416491508484, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 979, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.257204532623291, "incorrect_loss_raw": 1.530713180700938, "correct_loss_per_char": 0.6286022663116455, "incorrect_loss_per_char": 0.765356590350469, "correct_loss_per_token": 1.257204532623291, "incorrect_loss_per_token": 1.530713180700938, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9766035676002502, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": true, "logits_per_token": -0.9766035676002502, "logits_per_char": -0.4883017838001251, "num_chars": 2}, {"sum_logits": -1.257204532623291, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": false, "logits_per_token": -1.257204532623291, "logits_per_char": -0.6286022663116455, "num_chars": 2}, {"sum_logits": -1.8946418762207031, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": false, "logits_per_token": -1.8946418762207031, "logits_per_char": -0.9473209381103516, "num_chars": 2}, {"sum_logits": -1.7208940982818604, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": false, "logits_per_token": -1.7208940982818604, "logits_per_char": -0.8604470491409302, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 980, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1949158906936646, "incorrect_loss_raw": 1.5346466700236003, "correct_loss_per_char": 0.5974579453468323, "incorrect_loss_per_char": 0.7673233350118002, "correct_loss_per_token": 1.1949158906936646, "incorrect_loss_per_token": 1.5346466700236003, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1096508502960205, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": true, "logits_per_token": -1.1096508502960205, "logits_per_char": -0.5548254251480103, "num_chars": 2}, {"sum_logits": -1.1949158906936646, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -1.1949158906936646, "logits_per_char": -0.5974579453468323, "num_chars": 2}, {"sum_logits": -1.9133727550506592, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -1.9133727550506592, "logits_per_char": -0.9566863775253296, "num_chars": 2}, {"sum_logits": -1.580916404724121, "num_tokens": 1, "num_tokens_all": 1088, "is_greedy": false, "logits_per_token": -1.580916404724121, "logits_per_char": -0.7904582023620605, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 981, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5917015075683594, "incorrect_loss_raw": 1.3910481135050456, "correct_loss_per_char": 0.7958507537841797, "incorrect_loss_per_char": 0.6955240567525228, "correct_loss_per_token": 1.5917015075683594, "incorrect_loss_per_token": 1.3910481135050456, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0679597854614258, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": true, "logits_per_token": -1.0679597854614258, "logits_per_char": -0.5339798927307129, "num_chars": 2}, {"sum_logits": -1.240756630897522, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.240756630897522, "logits_per_char": -0.620378315448761, "num_chars": 2}, {"sum_logits": -1.864427924156189, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.864427924156189, "logits_per_char": -0.9322139620780945, "num_chars": 2}, {"sum_logits": -1.5917015075683594, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.5917015075683594, "logits_per_char": -0.7958507537841797, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 982, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8224565982818604, "incorrect_loss_raw": 1.390745222568512, "correct_loss_per_char": 0.9112282991409302, "incorrect_loss_per_char": 0.695372611284256, "correct_loss_per_token": 1.8224565982818604, "incorrect_loss_per_token": 1.390745222568512, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9659052491188049, "num_tokens": 1, "num_tokens_all": 1127, "is_greedy": true, "logits_per_token": -0.9659052491188049, "logits_per_char": -0.48295262455940247, "num_chars": 2}, {"sum_logits": -1.1382683515548706, "num_tokens": 1, "num_tokens_all": 1127, "is_greedy": false, "logits_per_token": -1.1382683515548706, "logits_per_char": -0.5691341757774353, "num_chars": 2}, {"sum_logits": -2.0680620670318604, "num_tokens": 1, "num_tokens_all": 1127, "is_greedy": false, "logits_per_token": -2.0680620670318604, "logits_per_char": -1.0340310335159302, "num_chars": 2}, {"sum_logits": -1.8224565982818604, "num_tokens": 1, "num_tokens_all": 1127, "is_greedy": false, "logits_per_token": -1.8224565982818604, "logits_per_char": -0.9112282991409302, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 983, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4706470966339111, "incorrect_loss_raw": 1.4101770718892415, "correct_loss_per_char": 0.7353235483169556, "incorrect_loss_per_char": 0.7050885359446207, "correct_loss_per_token": 1.4706470966339111, "incorrect_loss_per_token": 1.4101770718892415, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1439533233642578, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": true, "logits_per_token": -1.1439533233642578, "logits_per_char": -0.5719766616821289, "num_chars": 2}, {"sum_logits": -1.4706470966339111, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": false, "logits_per_token": -1.4706470966339111, "logits_per_char": -0.7353235483169556, "num_chars": 2}, {"sum_logits": -1.7419824600219727, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": false, "logits_per_token": -1.7419824600219727, "logits_per_char": -0.8709912300109863, "num_chars": 2}, {"sum_logits": -1.3445954322814941, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": false, "logits_per_token": -1.3445954322814941, "logits_per_char": -0.6722977161407471, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 984, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2724281549453735, "incorrect_loss_raw": 1.483147382736206, "correct_loss_per_char": 0.6362140774726868, "incorrect_loss_per_char": 0.741573691368103, "correct_loss_per_token": 1.2724281549453735, "incorrect_loss_per_token": 1.483147382736206, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.094998836517334, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": true, "logits_per_token": -1.094998836517334, "logits_per_char": -0.547499418258667, "num_chars": 2}, {"sum_logits": -1.2724281549453735, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.2724281549453735, "logits_per_char": -0.6362140774726868, "num_chars": 2}, {"sum_logits": -1.7978847026824951, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.7978847026824951, "logits_per_char": -0.8989423513412476, "num_chars": 2}, {"sum_logits": -1.556558609008789, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.556558609008789, "logits_per_char": -0.7782793045043945, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 985, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3130658864974976, "incorrect_loss_raw": 1.5208417177200317, "correct_loss_per_char": 0.6565329432487488, "incorrect_loss_per_char": 0.7604208588600159, "correct_loss_per_token": 1.3130658864974976, "incorrect_loss_per_token": 1.5208417177200317, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9419143199920654, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": true, "logits_per_token": -0.9419143199920654, "logits_per_char": -0.4709571599960327, "num_chars": 2}, {"sum_logits": -1.3130658864974976, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": false, "logits_per_token": -1.3130658864974976, "logits_per_char": -0.6565329432487488, "num_chars": 2}, {"sum_logits": -1.9783462285995483, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": false, "logits_per_token": -1.9783462285995483, "logits_per_char": -0.9891731142997742, "num_chars": 2}, {"sum_logits": -1.6422646045684814, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": false, "logits_per_token": -1.6422646045684814, "logits_per_char": -0.8211323022842407, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 986, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.40738046169281, "incorrect_loss_raw": 1.4089080890019734, "correct_loss_per_char": 0.703690230846405, "incorrect_loss_per_char": 0.7044540445009867, "correct_loss_per_token": 1.40738046169281, "incorrect_loss_per_token": 1.4089080890019734, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.144050121307373, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": true, "logits_per_token": -1.144050121307373, "logits_per_char": -0.5720250606536865, "num_chars": 2}, {"sum_logits": -1.40738046169281, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.40738046169281, "logits_per_char": -0.703690230846405, "num_chars": 2}, {"sum_logits": -1.637062907218933, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.637062907218933, "logits_per_char": -0.8185314536094666, "num_chars": 2}, {"sum_logits": -1.4456112384796143, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.4456112384796143, "logits_per_char": -0.7228056192398071, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 987, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2367100715637207, "incorrect_loss_raw": 1.5174763202667236, "correct_loss_per_char": 0.6183550357818604, "incorrect_loss_per_char": 0.7587381601333618, "correct_loss_per_token": 1.2367100715637207, "incorrect_loss_per_token": 1.5174763202667236, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0447458028793335, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": true, "logits_per_token": -1.0447458028793335, "logits_per_char": -0.5223729014396667, "num_chars": 2}, {"sum_logits": -1.2367100715637207, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": false, "logits_per_token": -1.2367100715637207, "logits_per_char": -0.6183550357818604, "num_chars": 2}, {"sum_logits": -1.850970983505249, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": false, "logits_per_token": -1.850970983505249, "logits_per_char": -0.9254854917526245, "num_chars": 2}, {"sum_logits": -1.6567121744155884, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": false, "logits_per_token": -1.6567121744155884, "logits_per_char": -0.8283560872077942, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 988, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4702893495559692, "incorrect_loss_raw": 1.4061862627665203, "correct_loss_per_char": 0.7351446747779846, "incorrect_loss_per_char": 0.7030931313832601, "correct_loss_per_token": 1.4702893495559692, "incorrect_loss_per_token": 1.4061862627665203, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1963117122650146, "num_tokens": 1, "num_tokens_all": 1010, "is_greedy": true, "logits_per_token": -1.1963117122650146, "logits_per_char": -0.5981558561325073, "num_chars": 2}, {"sum_logits": -1.2201370000839233, "num_tokens": 1, "num_tokens_all": 1010, "is_greedy": false, "logits_per_token": -1.2201370000839233, "logits_per_char": -0.6100685000419617, "num_chars": 2}, {"sum_logits": -1.8021100759506226, "num_tokens": 1, "num_tokens_all": 1010, "is_greedy": false, "logits_per_token": -1.8021100759506226, "logits_per_char": -0.9010550379753113, "num_chars": 2}, {"sum_logits": -1.4702893495559692, "num_tokens": 1, "num_tokens_all": 1010, "is_greedy": false, "logits_per_token": -1.4702893495559692, "logits_per_char": -0.7351446747779846, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 989, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8168797492980957, "incorrect_loss_raw": 1.3093568881352742, "correct_loss_per_char": 0.9084398746490479, "incorrect_loss_per_char": 0.6546784440676371, "correct_loss_per_token": 1.8168797492980957, "incorrect_loss_per_token": 1.3093568881352742, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1182303428649902, "num_tokens": 1, "num_tokens_all": 1078, "is_greedy": true, "logits_per_token": -1.1182303428649902, "logits_per_char": -0.5591151714324951, "num_chars": 2}, {"sum_logits": -1.2002557516098022, "num_tokens": 1, "num_tokens_all": 1078, "is_greedy": false, "logits_per_token": -1.2002557516098022, "logits_per_char": -0.6001278758049011, "num_chars": 2}, {"sum_logits": -1.8168797492980957, "num_tokens": 1, "num_tokens_all": 1078, "is_greedy": false, "logits_per_token": -1.8168797492980957, "logits_per_char": -0.9084398746490479, "num_chars": 2}, {"sum_logits": -1.6095845699310303, "num_tokens": 1, "num_tokens_all": 1078, "is_greedy": false, "logits_per_token": -1.6095845699310303, "logits_per_char": -0.8047922849655151, "num_chars": 2}], "label": 2, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 990, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3697291612625122, "incorrect_loss_raw": 1.4253493547439575, "correct_loss_per_char": 0.6848645806312561, "incorrect_loss_per_char": 0.7126746773719788, "correct_loss_per_token": 1.3697291612625122, "incorrect_loss_per_token": 1.4253493547439575, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.156821846961975, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": true, "logits_per_token": -1.156821846961975, "logits_per_char": -0.5784109234809875, "num_chars": 2}, {"sum_logits": -1.4215292930603027, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.4215292930603027, "logits_per_char": -0.7107646465301514, "num_chars": 2}, {"sum_logits": -1.6976969242095947, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.6976969242095947, "logits_per_char": -0.8488484621047974, "num_chars": 2}, {"sum_logits": -1.3697291612625122, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.3697291612625122, "logits_per_char": -0.6848645806312561, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 991, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5374938249588013, "incorrect_loss_raw": 1.411831299463908, "correct_loss_per_char": 0.7687469124794006, "incorrect_loss_per_char": 0.705915649731954, "correct_loss_per_token": 1.5374938249588013, "incorrect_loss_per_token": 1.411831299463908, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.131359577178955, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": true, "logits_per_token": -1.131359577178955, "logits_per_char": -0.5656797885894775, "num_chars": 2}, {"sum_logits": -1.1788643598556519, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.1788643598556519, "logits_per_char": -0.5894321799278259, "num_chars": 2}, {"sum_logits": -1.9252699613571167, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.9252699613571167, "logits_per_char": -0.9626349806785583, "num_chars": 2}, {"sum_logits": -1.5374938249588013, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.5374938249588013, "logits_per_char": -0.7687469124794006, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 992, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4629805088043213, "incorrect_loss_raw": 1.4183155695597331, "correct_loss_per_char": 0.7314902544021606, "incorrect_loss_per_char": 0.7091577847798666, "correct_loss_per_token": 1.4629805088043213, "incorrect_loss_per_token": 1.4183155695597331, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.035653829574585, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": true, "logits_per_token": -1.035653829574585, "logits_per_char": -0.5178269147872925, "num_chars": 2}, {"sum_logits": -1.4629805088043213, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.4629805088043213, "logits_per_char": -0.7314902544021606, "num_chars": 2}, {"sum_logits": -1.7857775688171387, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.7857775688171387, "logits_per_char": -0.8928887844085693, "num_chars": 2}, {"sum_logits": -1.4335153102874756, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.4335153102874756, "logits_per_char": -0.7167576551437378, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 993, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.669682502746582, "incorrect_loss_raw": 1.3661915461222331, "correct_loss_per_char": 0.834841251373291, "incorrect_loss_per_char": 0.6830957730611166, "correct_loss_per_token": 1.669682502746582, "incorrect_loss_per_token": 1.3661915461222331, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0266127586364746, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": true, "logits_per_token": -1.0266127586364746, "logits_per_char": -0.5133063793182373, "num_chars": 2}, {"sum_logits": -1.3129982948303223, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.3129982948303223, "logits_per_char": -0.6564991474151611, "num_chars": 2}, {"sum_logits": -1.7589635848999023, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.7589635848999023, "logits_per_char": -0.8794817924499512, "num_chars": 2}, {"sum_logits": -1.669682502746582, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.669682502746582, "logits_per_char": -0.834841251373291, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 994, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1921424865722656, "incorrect_loss_raw": 1.5136959155400593, "correct_loss_per_char": 0.5960712432861328, "incorrect_loss_per_char": 0.7568479577700297, "correct_loss_per_token": 1.1921424865722656, "incorrect_loss_per_token": 1.5136959155400593, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.109665870666504, "num_tokens": 1, "num_tokens_all": 1023, "is_greedy": true, "logits_per_token": -1.109665870666504, "logits_per_char": -0.554832935333252, "num_chars": 2}, {"sum_logits": -1.1921424865722656, "num_tokens": 1, "num_tokens_all": 1023, "is_greedy": false, "logits_per_token": -1.1921424865722656, "logits_per_char": -0.5960712432861328, "num_chars": 2}, {"sum_logits": -1.702924370765686, "num_tokens": 1, "num_tokens_all": 1023, "is_greedy": false, "logits_per_token": -1.702924370765686, "logits_per_char": -0.851462185382843, "num_chars": 2}, {"sum_logits": -1.7284975051879883, "num_tokens": 1, "num_tokens_all": 1023, "is_greedy": false, "logits_per_token": -1.7284975051879883, "logits_per_char": -0.8642487525939941, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 995, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5678951740264893, "incorrect_loss_raw": 1.4076854785283406, "correct_loss_per_char": 0.7839475870132446, "incorrect_loss_per_char": 0.7038427392641703, "correct_loss_per_token": 1.5678951740264893, "incorrect_loss_per_token": 1.4076854785283406, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.038690447807312, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": true, "logits_per_token": -1.038690447807312, "logits_per_char": -0.519345223903656, "num_chars": 2}, {"sum_logits": -1.2647910118103027, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": false, "logits_per_token": -1.2647910118103027, "logits_per_char": -0.6323955059051514, "num_chars": 2}, {"sum_logits": -1.9195749759674072, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": false, "logits_per_token": -1.9195749759674072, "logits_per_char": -0.9597874879837036, "num_chars": 2}, {"sum_logits": -1.5678951740264893, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": false, "logits_per_token": -1.5678951740264893, "logits_per_char": -0.7839475870132446, "num_chars": 2}], "label": 3, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 996, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.269236445426941, "incorrect_loss_raw": 1.501857320467631, "correct_loss_per_char": 0.6346182227134705, "incorrect_loss_per_char": 0.7509286602338155, "correct_loss_per_token": 1.269236445426941, "incorrect_loss_per_token": 1.501857320467631, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0482882261276245, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": true, "logits_per_token": -1.0482882261276245, "logits_per_char": -0.5241441130638123, "num_chars": 2}, {"sum_logits": -1.269236445426941, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": false, "logits_per_token": -1.269236445426941, "logits_per_char": -0.6346182227134705, "num_chars": 2}, {"sum_logits": -1.8689813613891602, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": false, "logits_per_token": -1.8689813613891602, "logits_per_char": -0.9344906806945801, "num_chars": 2}, {"sum_logits": -1.5883023738861084, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": false, "logits_per_token": -1.5883023738861084, "logits_per_char": -0.7941511869430542, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 997, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3750468492507935, "incorrect_loss_raw": 1.4297823111216228, "correct_loss_per_char": 0.6875234246253967, "incorrect_loss_per_char": 0.7148911555608114, "correct_loss_per_token": 1.3750468492507935, "incorrect_loss_per_token": 1.4297823111216228, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1097580194473267, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": true, "logits_per_token": -1.1097580194473267, "logits_per_char": -0.5548790097236633, "num_chars": 2}, {"sum_logits": -1.3750468492507935, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -1.3750468492507935, "logits_per_char": -0.6875234246253967, "num_chars": 2}, {"sum_logits": -1.7123664617538452, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -1.7123664617538452, "logits_per_char": -0.8561832308769226, "num_chars": 2}, {"sum_logits": -1.4672224521636963, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -1.4672224521636963, "logits_per_char": -0.7336112260818481, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 998, "native_id": null, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.223707675933838, "incorrect_loss_raw": 1.4855020840962727, "correct_loss_per_char": 0.611853837966919, "incorrect_loss_per_char": 0.7427510420481364, "correct_loss_per_token": 1.223707675933838, "incorrect_loss_per_token": 1.4855020840962727, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3159409761428833, "num_tokens": 1, "num_tokens_all": 1046, "is_greedy": false, "logits_per_token": -1.3159409761428833, "logits_per_char": -0.6579704880714417, "num_chars": 2}, {"sum_logits": -1.223707675933838, "num_tokens": 1, "num_tokens_all": 1046, "is_greedy": true, "logits_per_token": -1.223707675933838, "logits_per_char": -0.611853837966919, "num_chars": 2}, {"sum_logits": -1.8018975257873535, "num_tokens": 1, "num_tokens_all": 1046, "is_greedy": false, "logits_per_token": -1.8018975257873535, "logits_per_char": -0.9009487628936768, "num_chars": 2}, {"sum_logits": -1.3386677503585815, "num_tokens": 1, "num_tokens_all": 1046, "is_greedy": false, "logits_per_token": -1.3386677503585815, "logits_per_char": -0.6693338751792908, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 999, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1539318561553955, "incorrect_loss_raw": 1.545491059621175, "correct_loss_per_char": 0.5769659280776978, "incorrect_loss_per_char": 0.7727455298105875, "correct_loss_per_token": 1.1539318561553955, "incorrect_loss_per_token": 1.545491059621175, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.111760139465332, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": true, "logits_per_token": -1.111760139465332, "logits_per_char": -0.555880069732666, "num_chars": 2}, {"sum_logits": -1.1539318561553955, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.1539318561553955, "logits_per_char": -0.5769659280776978, "num_chars": 2}, {"sum_logits": -1.8901124000549316, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.8901124000549316, "logits_per_char": -0.9450562000274658, "num_chars": 2}, {"sum_logits": -1.6346006393432617, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.6346006393432617, "logits_per_char": -0.8173003196716309, "num_chars": 2}], "label": 1, "task_hash": "75631579605ae5f677bf3e10716878f8", "model_hash": "03418cf8091a9882619950ffb07429a5"}