{"doc_id": 0, "native_id": "8-343", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 15.798331260681152, "incorrect_loss_raw": 16.896308581034344, "correct_loss_per_char": 0.7181059663945978, "incorrect_loss_per_char": 0.6561847253362296, "correct_loss_per_token": 3.949582815170288, "incorrect_loss_per_token": 3.781272617975871, "correct_loss_uncond": -13.96152400970459, "incorrect_loss_uncond": -11.182297070821127}, "model_output": [{"sum_logits": -12.508220672607422, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -22.788204193115234, "logits_per_token": -3.1270551681518555, "logits_per_char": -0.5685554851185192, "num_chars": 22}, {"sum_logits": -15.798331260681152, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -29.759855270385742, "logits_per_token": -3.949582815170288, "logits_per_char": -0.7181059663945978, "num_chars": 22}, {"sum_logits": -26.56827163696289, "num_tokens": 5, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -38.516231536865234, "logits_per_token": -5.3136543273925785, "logits_per_char": -0.9161472978263066, "num_chars": 29}, {"sum_logits": -11.612433433532715, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -22.931381225585938, "logits_per_token": -2.9031083583831787, "logits_per_char": -0.4838513930638631, "num_chars": 24}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 1, "native_id": "1129", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.481931686401367, "incorrect_loss_raw": 9.831347942352295, "correct_loss_per_char": 1.435241460800171, "incorrect_loss_per_char": 1.0271111803022699, "correct_loss_per_token": 5.740965843200684, "incorrect_loss_per_token": 3.9238614241282144, "correct_loss_uncond": -4.837440490722656, "incorrect_loss_uncond": -7.002623081207275}, "model_output": [{"sum_logits": -11.481931686401367, "num_tokens": 2, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -16.319372177124023, "logits_per_token": -5.740965843200684, "logits_per_char": -1.435241460800171, "num_chars": 8}, {"sum_logits": -11.901750564575195, "num_tokens": 4, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -18.59776496887207, "logits_per_token": -2.975437641143799, "logits_per_char": -1.322416729397244, "num_chars": 9}, {"sum_logits": -9.691230773925781, "num_tokens": 2, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -16.89789581298828, "logits_per_token": -4.845615386962891, "logits_per_char": -0.8810209794477983, "num_chars": 11}, {"sum_logits": -7.901062488555908, "num_tokens": 2, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -15.00625228881836, "logits_per_token": -3.950531244277954, "logits_per_char": -0.8778958320617676, "num_chars": 9}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 2, "native_id": "880", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.837567329406738, "incorrect_loss_raw": 7.174546241760254, "correct_loss_per_char": 1.2296959161758423, "incorrect_loss_per_char": 1.1414747843666684, "correct_loss_per_token": 3.279189109802246, "incorrect_loss_per_token": 7.174546241760254, "correct_loss_uncond": -7.090764045715332, "incorrect_loss_uncond": -5.637444178263347}, "model_output": [{"sum_logits": -8.749526023864746, "num_tokens": 1, "num_tokens_all": 104, "is_greedy": false, "sum_logits_uncond": -14.098634719848633, "logits_per_token": -8.749526023864746, "logits_per_char": -1.458254337310791, "num_chars": 6}, {"sum_logits": -6.839648246765137, "num_tokens": 1, "num_tokens_all": 104, "is_greedy": false, "sum_logits_uncond": -12.13295841217041, "logits_per_token": -6.839648246765137, "logits_per_char": -0.9770926066807338, "num_chars": 7}, {"sum_logits": -9.837567329406738, "num_tokens": 3, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -16.92833137512207, "logits_per_token": -3.279189109802246, "logits_per_char": -1.2296959161758423, "num_chars": 8}, {"sum_logits": -5.934464454650879, "num_tokens": 1, "num_tokens_all": 104, "is_greedy": false, "sum_logits_uncond": -12.204378128051758, "logits_per_token": -5.934464454650879, "logits_per_char": -0.9890774091084799, "num_chars": 6}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 3, "native_id": "7-999", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 27.89640235900879, "incorrect_loss_raw": 19.68403434753418, "correct_loss_per_char": 0.9619449089313376, "incorrect_loss_per_char": 0.9430508080303359, "correct_loss_per_token": 5.5792804718017575, "incorrect_loss_per_token": 4.601308472951254, "correct_loss_uncond": -12.330240249633789, "incorrect_loss_uncond": -11.111185709635416}, "model_output": [{"sum_logits": -19.266935348510742, "num_tokens": 4, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -27.861066818237305, "logits_per_token": -4.8167338371276855, "logits_per_char": -1.0140492288689864, "num_chars": 19}, {"sum_logits": -19.1820068359375, "num_tokens": 5, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -32.208621978759766, "logits_per_token": -3.8364013671875, "logits_per_char": -0.834000297214674, "num_chars": 23}, {"sum_logits": -27.89640235900879, "num_tokens": 5, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -40.22664260864258, "logits_per_token": -5.5792804718017575, "logits_per_char": -0.9619449089313376, "num_chars": 29}, {"sum_logits": -20.603160858154297, "num_tokens": 4, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -32.31597137451172, "logits_per_token": -5.150790214538574, "logits_per_char": -0.9811028980073475, "num_chars": 21}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 4, "native_id": "8-464", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 9.77810287475586, "incorrect_loss_raw": 7.73634401957194, "correct_loss_per_char": 0.44445922157981177, "incorrect_loss_per_char": 0.8993264966540866, "correct_loss_per_token": 4.88905143737793, "incorrect_loss_per_token": 5.8240383995903855, "correct_loss_uncond": -7.861026763916016, "incorrect_loss_uncond": -6.780950546264648}, "model_output": [{"sum_logits": -8.556612968444824, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -13.260916709899902, "logits_per_token": -8.556612968444824, "logits_per_char": -0.9507347742716471, "num_chars": 9}, {"sum_logits": -8.605375289916992, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -17.17884635925293, "logits_per_token": -2.8684584299723306, "logits_per_char": -0.537835955619812, "num_chars": 16}, {"sum_logits": -9.77810287475586, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -17.639129638671875, "logits_per_token": -4.88905143737793, "logits_per_char": -0.44445922157981177, "num_chars": 22}, {"sum_logits": -6.047043800354004, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -13.112120628356934, "logits_per_token": -6.047043800354004, "logits_per_char": -1.2094087600708008, "num_chars": 5}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 5, "native_id": "9-794", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.4623374938964844, "incorrect_loss_raw": 5.3551249504089355, "correct_loss_per_char": 0.20890535627092635, "incorrect_loss_per_char": 0.8205799057370141, "correct_loss_per_token": 1.4623374938964844, "incorrect_loss_per_token": 5.3551249504089355, "correct_loss_uncond": -9.567106246948242, "incorrect_loss_uncond": -7.4889248212178545}, "model_output": [{"sum_logits": -4.371356964111328, "num_tokens": 1, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -12.376604080200195, "logits_per_token": -4.371356964111328, "logits_per_char": -0.6244795663016183, "num_chars": 7}, {"sum_logits": -5.642363548278809, "num_tokens": 1, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -13.184395790100098, "logits_per_token": -5.642363548278809, "logits_per_char": -0.6269292831420898, "num_chars": 9}, {"sum_logits": -1.4623374938964844, "num_tokens": 1, "num_tokens_all": 114, "is_greedy": true, "sum_logits_uncond": -11.029443740844727, "logits_per_token": -1.4623374938964844, "logits_per_char": -0.20890535627092635, "num_chars": 7}, {"sum_logits": -6.05165433883667, "num_tokens": 1, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -12.971149444580078, "logits_per_token": -6.05165433883667, "logits_per_char": -1.210330867767334, "num_chars": 5}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 6, "native_id": "9-1163", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.957889556884766, "incorrect_loss_raw": 5.317884047826131, "correct_loss_per_char": 1.591577911376953, "incorrect_loss_per_char": 0.799825398127238, "correct_loss_per_token": 7.957889556884766, "incorrect_loss_per_token": 4.058289448420207, "correct_loss_uncond": -7.396228790283203, "incorrect_loss_uncond": -6.956809123357137}, "model_output": [{"sum_logits": -5.117912292480469, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -12.094867706298828, "logits_per_token": -5.117912292480469, "logits_per_char": -1.2794780731201172, "num_chars": 4}, {"sum_logits": -3.278172254562378, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -11.722733497619629, "logits_per_token": -3.278172254562378, "logits_per_char": -0.364241361618042, "num_chars": 9}, {"sum_logits": -7.957889556884766, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -15.354118347167969, "logits_per_token": -7.957889556884766, "logits_per_char": -1.591577911376953, "num_chars": 5}, {"sum_logits": -7.557567596435547, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -13.006478309631348, "logits_per_token": -3.7787837982177734, "logits_per_char": -0.7557567596435547, "num_chars": 10}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 7, "native_id": "9-322", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 26.823802947998047, "incorrect_loss_raw": 16.136656443277996, "correct_loss_per_char": 1.2192637703635476, "incorrect_loss_per_char": 1.1300351112607925, "correct_loss_per_token": 8.941267649332682, "incorrect_loss_per_token": 7.260873635609944, "correct_loss_uncond": -4.090669631958008, "incorrect_loss_uncond": -4.355982144673665}, "model_output": [{"sum_logits": -14.53418254852295, "num_tokens": 3, "num_tokens_all": 123, "is_greedy": false, "sum_logits_uncond": -24.209192276000977, "logits_per_token": -4.844727516174316, "logits_per_char": -1.0381558963230677, "num_chars": 14}, {"sum_logits": -26.823802947998047, "num_tokens": 3, "num_tokens_all": 123, "is_greedy": false, "sum_logits_uncond": -30.914472579956055, "logits_per_token": -8.941267649332682, "logits_per_char": -1.2192637703635476, "num_chars": 22}, {"sum_logits": -14.22741985321045, "num_tokens": 2, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -13.115376472473145, "logits_per_token": -7.113709926605225, "logits_per_char": -0.9484946568806966, "num_chars": 15}, {"sum_logits": -19.648366928100586, "num_tokens": 2, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -24.15334701538086, "logits_per_token": -9.824183464050293, "logits_per_char": -1.4034547805786133, "num_chars": 14}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 8, "native_id": "7-1140", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.722074508666992, "incorrect_loss_raw": 5.526426951090495, "correct_loss_per_char": 0.715259313583374, "incorrect_loss_per_char": 1.1350283039940727, "correct_loss_per_token": 5.722074508666992, "incorrect_loss_per_token": 5.526426951090495, "correct_loss_uncond": -5.234963417053223, "incorrect_loss_uncond": -4.327120463053386}, "model_output": [{"sum_logits": -5.546814918518066, "num_tokens": 1, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -8.620218276977539, "logits_per_token": -5.546814918518066, "logits_per_char": -1.3867037296295166, "num_chars": 4}, {"sum_logits": -5.389105796813965, "num_tokens": 1, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -10.634567260742188, "logits_per_token": -5.389105796813965, "logits_per_char": -1.0778211593627929, "num_chars": 5}, {"sum_logits": -5.643360137939453, "num_tokens": 1, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -10.305856704711914, "logits_per_token": -5.643360137939453, "logits_per_char": -0.9405600229899088, "num_chars": 6}, {"sum_logits": -5.722074508666992, "num_tokens": 1, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -10.957037925720215, "logits_per_token": -5.722074508666992, "logits_per_char": -0.715259313583374, "num_chars": 8}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 9, "native_id": "7-903", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 13.096744537353516, "incorrect_loss_raw": 7.737703959147136, "correct_loss_per_char": 0.9354817526681083, "incorrect_loss_per_char": 0.711565113067627, "correct_loss_per_token": 6.548372268676758, "incorrect_loss_per_token": 7.737703959147136, "correct_loss_uncond": -10.11026382446289, "incorrect_loss_uncond": -5.810784975687663}, "model_output": [{"sum_logits": -7.723290920257568, "num_tokens": 1, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -14.832596778869629, "logits_per_token": -7.723290920257568, "logits_per_char": -0.5148860613505045, "num_chars": 15}, {"sum_logits": -13.096744537353516, "num_tokens": 2, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -23.207008361816406, "logits_per_token": -6.548372268676758, "logits_per_char": -0.9354817526681083, "num_chars": 14}, {"sum_logits": -9.115374565124512, "num_tokens": 1, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -14.19232177734375, "logits_per_token": -9.115374565124512, "logits_per_char": -0.9115374565124512, "num_chars": 10}, {"sum_logits": -6.374446392059326, "num_tokens": 1, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -11.620548248291016, "logits_per_token": -6.374446392059326, "logits_per_char": -0.7082718213399252, "num_chars": 9}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 10, "native_id": "7-511", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.58182144165039, "incorrect_loss_raw": 10.137641429901123, "correct_loss_per_char": 1.5116887773786272, "incorrect_loss_per_char": 1.3834023069681713, "correct_loss_per_token": 5.290910720825195, "incorrect_loss_per_token": 6.807335138320923, "correct_loss_uncond": -3.1087512969970703, "incorrect_loss_uncond": -4.742268403371175}, "model_output": [{"sum_logits": -6.565486431121826, "num_tokens": 2, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -15.231433868408203, "logits_per_token": -3.282743215560913, "logits_per_char": -0.9379266330174038, "num_chars": 7}, {"sum_logits": -10.062263488769531, "num_tokens": 3, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -16.09549903869629, "logits_per_token": -3.3540878295898438, "logits_per_char": -0.9147512262517755, "num_chars": 11}, {"sum_logits": -10.58182144165039, "num_tokens": 2, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -13.690572738647461, "logits_per_token": -5.290910720825195, "logits_per_char": -1.5116887773786272, "num_chars": 7}, {"sum_logits": -13.785174369812012, "num_tokens": 1, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -13.312796592712402, "logits_per_token": -13.785174369812012, "logits_per_char": -2.2975290616353354, "num_chars": 6}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 11, "native_id": "9-937", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.087921619415283, "incorrect_loss_raw": 6.264830430348714, "correct_loss_per_char": 0.7268459456307548, "incorrect_loss_per_char": 1.0116391938830178, "correct_loss_per_token": 5.087921619415283, "incorrect_loss_per_token": 6.264830430348714, "correct_loss_uncond": -8.093353748321533, "incorrect_loss_uncond": -7.3083427747090655}, "model_output": [{"sum_logits": -6.63177490234375, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -12.342708587646484, "logits_per_token": -6.63177490234375, "logits_per_char": -1.1052958170572917, "num_chars": 6}, {"sum_logits": -5.087921619415283, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -13.181275367736816, "logits_per_token": -5.087921619415283, "logits_per_char": -0.7268459456307548, "num_chars": 7}, {"sum_logits": -3.3615899085998535, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -11.676148414611816, "logits_per_token": -3.3615899085998535, "logits_per_char": -0.6723179817199707, "num_chars": 5}, {"sum_logits": -8.801126480102539, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -16.70066261291504, "logits_per_token": -8.801126480102539, "logits_per_char": -1.2573037828717912, "num_chars": 7}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 12, "native_id": "8-201", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.791596412658691, "incorrect_loss_raw": 5.4649637540181475, "correct_loss_per_char": 1.0989495515823364, "incorrect_loss_per_char": 0.7583968601529562, "correct_loss_per_token": 2.930532137552897, "incorrect_loss_per_token": 2.7324818770090737, "correct_loss_uncond": -8.689383506774902, "incorrect_loss_uncond": -9.411204814910889}, "model_output": [{"sum_logits": -6.114744186401367, "num_tokens": 2, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -16.03952980041504, "logits_per_token": -3.0573720932006836, "logits_per_char": -0.6794160207112631, "num_chars": 9}, {"sum_logits": -5.341649055480957, "num_tokens": 2, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -13.756900787353516, "logits_per_token": -2.6708245277404785, "logits_per_char": -0.8902748425801595, "num_chars": 6}, {"sum_logits": -8.791596412658691, "num_tokens": 3, "num_tokens_all": 123, "is_greedy": false, "sum_logits_uncond": -17.480979919433594, "logits_per_token": -2.930532137552897, "logits_per_char": -1.0989495515823364, "num_chars": 8}, {"sum_logits": -4.938498020172119, "num_tokens": 2, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -14.832075119018555, "logits_per_token": -2.4692490100860596, "logits_per_char": -0.7054997171674456, "num_chars": 7}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 13, "native_id": "1618", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 16.646394729614258, "incorrect_loss_raw": 15.580127398173014, "correct_loss_per_char": 1.0403996706008911, "incorrect_loss_per_char": 0.8849640108377507, "correct_loss_per_token": 5.548798243204753, "incorrect_loss_per_token": 5.966518825954861, "correct_loss_uncond": -8.002714157104492, "incorrect_loss_uncond": -8.275649070739746}, "model_output": [{"sum_logits": -16.646394729614258, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -24.64910888671875, "logits_per_token": -5.548798243204753, "logits_per_char": -1.0403996706008911, "num_chars": 16}, {"sum_logits": -12.063733100891113, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -20.190601348876953, "logits_per_token": -4.021244366963704, "logits_per_char": -0.7539833188056946, "num_chars": 16}, {"sum_logits": -13.916574478149414, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -20.08193588256836, "logits_per_token": -6.958287239074707, "logits_per_char": -1.0705057290884166, "num_chars": 13}, {"sum_logits": -20.760074615478516, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -31.29479217529297, "logits_per_token": -6.920024871826172, "logits_per_char": -0.8304029846191406, "num_chars": 25}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 14, "native_id": "758", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 23.641498565673828, "incorrect_loss_raw": 22.29454771677653, "correct_loss_per_char": 0.7880499521891277, "incorrect_loss_per_char": 0.5416295337768633, "correct_loss_per_token": 3.377356937953404, "incorrect_loss_per_token": 2.7347621826898485, "correct_loss_uncond": -7.671764373779297, "incorrect_loss_uncond": -11.123655001322428}, "model_output": [{"sum_logits": -24.91931915283203, "num_tokens": 7, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -36.79258728027344, "logits_per_token": -3.5599027361188615, "logits_per_char": -0.6557715566534745, "num_chars": 38}, {"sum_logits": -9.834486961364746, "num_tokens": 5, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -20.920146942138672, "logits_per_token": -1.9668973922729491, "logits_per_char": -0.3391202400470602, "num_chars": 29}, {"sum_logits": -23.641498565673828, "num_tokens": 7, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -31.313262939453125, "logits_per_token": -3.377356937953404, "logits_per_char": -0.7880499521891277, "num_chars": 30}, {"sum_logits": -32.12983703613281, "num_tokens": 12, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -42.541873931884766, "logits_per_token": -2.6774864196777344, "logits_per_char": -0.6299968046300551, "num_chars": 51}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 15, "native_id": "7-414", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 12.681730270385742, "incorrect_loss_raw": 16.014752705891926, "correct_loss_per_char": 0.4373010438064049, "incorrect_loss_per_char": 0.6015969714899172, "correct_loss_per_token": 2.5363460540771485, "incorrect_loss_per_token": 3.3620127042134604, "correct_loss_uncond": -21.79850959777832, "incorrect_loss_uncond": -13.122095743815104}, "model_output": [{"sum_logits": -17.750635147094727, "num_tokens": 5, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -28.195865631103516, "logits_per_token": -3.5501270294189453, "logits_per_char": -0.6120908671411974, "num_chars": 29}, {"sum_logits": -9.543729782104492, "num_tokens": 4, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -25.823871612548828, "logits_per_token": -2.385932445526123, "logits_per_char": -0.4771864891052246, "num_chars": 20}, {"sum_logits": -20.749893188476562, "num_tokens": 5, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -33.39080810546875, "logits_per_token": -4.1499786376953125, "logits_per_char": -0.7155135582233297, "num_chars": 29}, {"sum_logits": -12.681730270385742, "num_tokens": 5, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -34.48023986816406, "logits_per_token": -2.5363460540771485, "logits_per_char": -0.4373010438064049, "num_chars": 29}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 16, "native_id": "9-675", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 21.24706268310547, "incorrect_loss_raw": 20.596444447835285, "correct_loss_per_char": 0.7869282475224247, "incorrect_loss_per_char": 1.64350178492473, "correct_loss_per_token": 7.082354227701823, "incorrect_loss_per_token": 10.298222223917643, "correct_loss_uncond": -10.052066802978516, "incorrect_loss_uncond": -5.509318033854167}, "model_output": [{"sum_logits": -24.52741050720215, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -28.128448486328125, "logits_per_token": -12.263705253601074, "logits_per_char": -2.2297645915638316, "num_chars": 11}, {"sum_logits": -21.402360916137695, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -26.494476318359375, "logits_per_token": -10.701180458068848, "logits_per_char": -1.2589624068316292, "num_chars": 17}, {"sum_logits": -21.24706268310547, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -31.299129486083984, "logits_per_token": -7.082354227701823, "logits_per_char": -0.7869282475224247, "num_chars": 27}, {"sum_logits": -15.859561920166016, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -23.69436264038086, "logits_per_token": -7.929780960083008, "logits_per_char": -1.4417783563787288, "num_chars": 11}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 17, "native_id": "9-163", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.569721221923828, "incorrect_loss_raw": 13.58422565460205, "correct_loss_per_char": 0.7549800872802734, "incorrect_loss_per_char": 0.9586825205650165, "correct_loss_per_token": 3.523240407307943, "incorrect_loss_per_token": 5.145683076646594, "correct_loss_uncond": -5.144881248474121, "incorrect_loss_uncond": -1.1047480901082356}, "model_output": [{"sum_logits": -11.116941452026367, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -13.49217700958252, "logits_per_token": -5.558470726013184, "logits_per_char": -0.794067246573312, "num_chars": 14}, {"sum_logits": -13.467904090881348, "num_tokens": 3, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -12.933377265930176, "logits_per_token": -4.489301363627116, "logits_per_char": -0.6121774586764249, "num_chars": 22}, {"sum_logits": -10.569721221923828, "num_tokens": 3, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -15.71460247039795, "logits_per_token": -3.523240407307943, "logits_per_char": -0.7549800872802734, "num_chars": 14}, {"sum_logits": -16.167831420898438, "num_tokens": 3, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -17.641366958618164, "logits_per_token": -5.3892771402994795, "logits_per_char": -1.4698028564453125, "num_chars": 11}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 18, "native_id": "1032", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 16.43233871459961, "incorrect_loss_raw": 14.993998209635416, "correct_loss_per_char": 2.3474769592285156, "incorrect_loss_per_char": 1.9830234928048533, "correct_loss_per_token": 5.47744623819987, "incorrect_loss_per_token": 6.602563116285537, "correct_loss_uncond": 0.1941070556640625, "incorrect_loss_uncond": 0.49463431040445965}, "model_output": [{"sum_logits": -16.43233871459961, "num_tokens": 3, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -16.238231658935547, "logits_per_token": -5.47744623819987, "logits_per_char": -2.3474769592285156, "num_chars": 7}, {"sum_logits": -13.786213874816895, "num_tokens": 2, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -14.37261962890625, "logits_per_token": -6.893106937408447, "logits_per_char": -1.969459124973842, "num_chars": 7}, {"sum_logits": -15.095932960510254, "num_tokens": 2, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -13.384912490844727, "logits_per_token": -7.547966480255127, "logits_per_char": -2.515988826751709, "num_chars": 6}, {"sum_logits": -16.0998477935791, "num_tokens": 3, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -15.740559577941895, "logits_per_token": -5.366615931193034, "logits_per_char": -1.4636225266890093, "num_chars": 11}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 19, "native_id": "889", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.23222827911377, "incorrect_loss_raw": 11.551811854044596, "correct_loss_per_char": 2.246445655822754, "incorrect_loss_per_char": 2.2342062574444395, "correct_loss_per_token": 11.23222827911377, "incorrect_loss_per_token": 8.469863096872965, "correct_loss_uncond": -2.3002986907958984, "incorrect_loss_uncond": -2.2057116826375327}, "model_output": [{"sum_logits": -11.679017066955566, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -13.125970840454102, "logits_per_token": -11.679017066955566, "logits_per_char": -2.9197542667388916, "num_chars": 4}, {"sum_logits": -11.23222827911377, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -13.532526969909668, "logits_per_token": -11.23222827911377, "logits_per_char": -2.246445655822754, "num_chars": 5}, {"sum_logits": -12.327795028686523, "num_tokens": 4, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -14.873926162719727, "logits_per_token": -3.081948757171631, "logits_per_char": -1.1207086389715022, "num_chars": 11}, {"sum_logits": -10.6486234664917, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -13.272673606872559, "logits_per_token": -10.6486234664917, "logits_per_char": -2.662155866622925, "num_chars": 4}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 20, "native_id": "1160", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.943181991577148, "incorrect_loss_raw": 8.070032278696695, "correct_loss_per_char": 1.8238636652628581, "incorrect_loss_per_char": 1.0627993600709098, "correct_loss_per_token": 5.471590995788574, "incorrect_loss_per_token": 8.070032278696695, "correct_loss_uncond": -1.3422060012817383, "incorrect_loss_uncond": -4.421484470367432}, "model_output": [{"sum_logits": -10.943181991577148, "num_tokens": 2, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -12.285387992858887, "logits_per_token": -5.471590995788574, "logits_per_char": -1.8238636652628581, "num_chars": 6}, {"sum_logits": -9.079614639282227, "num_tokens": 1, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -12.813969612121582, "logits_per_token": -9.079614639282227, "logits_per_char": -1.2970878056117467, "num_chars": 7}, {"sum_logits": -10.312981605529785, "num_tokens": 1, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -12.716553688049316, "logits_per_token": -10.312981605529785, "logits_per_char": -1.2891227006912231, "num_chars": 8}, {"sum_logits": -4.817500591278076, "num_tokens": 1, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -11.944026947021484, "logits_per_token": -4.817500591278076, "logits_per_char": -0.6021875739097595, "num_chars": 8}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 21, "native_id": "9-298", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.115965843200684, "incorrect_loss_raw": 6.354333718617757, "correct_loss_per_char": 0.45732953813340926, "incorrect_loss_per_char": 1.1795046289761861, "correct_loss_per_token": 4.115965843200684, "incorrect_loss_per_token": 6.354333718617757, "correct_loss_uncond": -9.144950866699219, "incorrect_loss_uncond": -6.430652777353923}, "model_output": [{"sum_logits": -4.115965843200684, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -13.260916709899902, "logits_per_token": -4.115965843200684, "logits_per_char": -0.45732953813340926, "num_chars": 9}, {"sum_logits": -4.632349014282227, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -11.172591209411621, "logits_per_token": -4.632349014282227, "logits_per_char": -1.1580872535705566, "num_chars": 4}, {"sum_logits": -7.687934875488281, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -13.236160278320312, "logits_per_token": -7.687934875488281, "logits_per_char": -1.5375869750976563, "num_chars": 5}, {"sum_logits": -6.742717266082764, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -13.946208000183105, "logits_per_token": -6.742717266082764, "logits_per_char": -0.8428396582603455, "num_chars": 8}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 22, "native_id": "1189", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.098850727081299, "incorrect_loss_raw": 5.740141073862712, "correct_loss_per_char": 0.5460654405447153, "incorrect_loss_per_char": 0.7480449651284191, "correct_loss_per_token": 3.5494253635406494, "incorrect_loss_per_token": 5.740141073862712, "correct_loss_uncond": -12.50675916671753, "incorrect_loss_uncond": -6.649556001027425}, "model_output": [{"sum_logits": -7.098850727081299, "num_tokens": 2, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -19.605609893798828, "logits_per_token": -3.5494253635406494, "logits_per_char": -0.5460654405447153, "num_chars": 13}, {"sum_logits": -4.7874555587768555, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -13.184250831604004, "logits_per_token": -4.7874555587768555, "logits_per_char": -0.6839222226824079, "num_chars": 7}, {"sum_logits": -6.801655292510986, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -12.41329574584961, "logits_per_token": -6.801655292510986, "logits_per_char": -0.7557394769456651, "num_chars": 9}, {"sum_logits": -5.631312370300293, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -11.571544647216797, "logits_per_token": -5.631312370300293, "logits_per_char": -0.8044731957571847, "num_chars": 7}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 23, "native_id": "8-395", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 27.238344192504883, "incorrect_loss_raw": 24.27900759379069, "correct_loss_per_char": 0.7782384055001396, "incorrect_loss_per_char": 0.7025617960667495, "correct_loss_per_token": 3.8911920275006975, "incorrect_loss_per_token": 3.6191029208047047, "correct_loss_uncond": -3.8310070037841797, "incorrect_loss_uncond": -7.530866622924805}, "model_output": [{"sum_logits": -23.617610931396484, "num_tokens": 7, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -31.032146453857422, "logits_per_token": -3.373944418770926, "logits_per_char": -0.6383138089566618, "num_chars": 37}, {"sum_logits": -27.238344192504883, "num_tokens": 7, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -31.069351196289062, "logits_per_token": -3.8911920275006975, "logits_per_char": -0.7782384055001396, "num_chars": 35}, {"sum_logits": -17.745838165283203, "num_tokens": 5, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -23.94793701171875, "logits_per_token": -3.5491676330566406, "logits_per_char": -0.6825322371262771, "num_chars": 26}, {"sum_logits": -31.473573684692383, "num_tokens": 8, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -40.44953918457031, "logits_per_token": -3.934196710586548, "logits_per_char": -0.7868393421173095, "num_chars": 40}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 24, "native_id": "7-238", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 35.384151458740234, "incorrect_loss_raw": 18.351330757141113, "correct_loss_per_char": 1.0722470139012192, "incorrect_loss_per_char": 0.8265849484337702, "correct_loss_per_token": 5.897358576456706, "incorrect_loss_per_token": 3.4358166694641112, "correct_loss_uncond": -8.296905517578125, "incorrect_loss_uncond": -8.02960173288981}, "model_output": [{"sum_logits": -24.986940383911133, "num_tokens": 7, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -33.864505767822266, "logits_per_token": -3.5695629119873047, "logits_per_char": -0.7349100112915039, "num_chars": 34}, {"sum_logits": -35.384151458740234, "num_tokens": 6, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -43.68105697631836, "logits_per_token": -5.897358576456706, "logits_per_char": -1.0722470139012192, "num_chars": 33}, {"sum_logits": -14.489534378051758, "num_tokens": 4, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -25.407936096191406, "logits_per_token": -3.6223835945129395, "logits_per_char": -0.9659689585367839, "num_chars": 15}, {"sum_logits": -15.57751750946045, "num_tokens": 5, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -19.8703556060791, "logits_per_token": -3.11550350189209, "logits_per_char": -0.7788758754730225, "num_chars": 20}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 25, "native_id": "7-372", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.163585186004639, "incorrect_loss_raw": 6.097134908040364, "correct_loss_per_char": 0.41635851860046386, "incorrect_loss_per_char": 0.6289798012486211, "correct_loss_per_token": 1.387861728668213, "incorrect_loss_per_token": 2.6809316741095652, "correct_loss_uncond": -20.916164875030518, "incorrect_loss_uncond": -15.545368194580078}, "model_output": [{"sum_logits": -6.617444038391113, "num_tokens": 3, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -24.497135162353516, "logits_per_token": -2.2058146794637046, "logits_per_char": -0.6617444038391114, "num_chars": 10}, {"sum_logits": -4.163585186004639, "num_tokens": 3, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -25.079750061035156, "logits_per_token": -1.387861728668213, "logits_per_char": -0.41635851860046386, "num_chars": 10}, {"sum_logits": -5.201903820037842, "num_tokens": 2, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -19.939292907714844, "logits_per_token": -2.600951910018921, "logits_per_char": -0.577989313337538, "num_chars": 9}, {"sum_logits": -6.472056865692139, "num_tokens": 2, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -20.49108123779297, "logits_per_token": -3.2360284328460693, "logits_per_char": -0.6472056865692138, "num_chars": 10}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 26, "native_id": "8-35", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 27.250999450683594, "incorrect_loss_raw": 22.12046496073405, "correct_loss_per_char": 0.973249980381557, "incorrect_loss_per_char": 0.788376845952724, "correct_loss_per_token": 4.541833241780599, "incorrect_loss_per_token": 3.8951008054945206, "correct_loss_uncond": -10.24908447265625, "incorrect_loss_uncond": -10.291055043538412}, "model_output": [{"sum_logits": -23.47369956970215, "num_tokens": 6, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -34.27033996582031, "logits_per_token": -3.912283261617025, "logits_per_char": -0.8383464132036481, "num_chars": 28}, {"sum_logits": -18.752098083496094, "num_tokens": 5, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -30.064279556274414, "logits_per_token": -3.7504196166992188, "logits_per_char": -0.694522151240596, "num_chars": 27}, {"sum_logits": -24.135597229003906, "num_tokens": 6, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -32.899940490722656, "logits_per_token": -4.022599538167317, "logits_per_char": -0.8322619734139278, "num_chars": 29}, {"sum_logits": -27.250999450683594, "num_tokens": 6, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -37.500083923339844, "logits_per_token": -4.541833241780599, "logits_per_char": -0.973249980381557, "num_chars": 28}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 27, "native_id": "9-271", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.742868900299072, "incorrect_loss_raw": 10.410080591837565, "correct_loss_per_char": 0.2931682130564814, "incorrect_loss_per_char": 0.5217476639222188, "correct_loss_per_token": 1.685717225074768, "incorrect_loss_per_token": 3.4700268639458547, "correct_loss_uncond": -9.997449398040771, "incorrect_loss_uncond": -6.228944778442383}, "model_output": [{"sum_logits": -6.742868900299072, "num_tokens": 4, "num_tokens_all": 123, "is_greedy": false, "sum_logits_uncond": -16.740318298339844, "logits_per_token": -1.685717225074768, "logits_per_char": -0.2931682130564814, "num_chars": 23}, {"sum_logits": -6.175650596618652, "num_tokens": 3, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -15.809520721435547, "logits_per_token": -2.0585501988728843, "logits_per_char": -0.29407859983898343, "num_chars": 21}, {"sum_logits": -15.055883407592773, "num_tokens": 3, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -17.233901977539062, "logits_per_token": -5.018627802530925, "logits_per_char": -0.8364379670884874, "num_chars": 18}, {"sum_logits": -9.99870777130127, "num_tokens": 3, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -16.873653411865234, "logits_per_token": -3.3329025904337564, "logits_per_char": -0.4347264248391856, "num_chars": 23}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 28, "native_id": "9-409", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.153411865234375, "incorrect_loss_raw": 8.36428133646647, "correct_loss_per_char": 0.743560791015625, "incorrect_loss_per_char": 1.1877617129573117, "correct_loss_per_token": 5.5767059326171875, "incorrect_loss_per_token": 8.36428133646647, "correct_loss_uncond": -8.079774856567383, "incorrect_loss_uncond": -5.311646143595378}, "model_output": [{"sum_logits": -7.590804100036621, "num_tokens": 1, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -14.0293607711792, "logits_per_token": -7.590804100036621, "logits_per_char": -1.5181608200073242, "num_chars": 5}, {"sum_logits": -10.269408226013184, "num_tokens": 1, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -13.6660795211792, "logits_per_token": -10.269408226013184, "logits_per_char": -1.1410453584459093, "num_chars": 9}, {"sum_logits": -7.232631683349609, "num_tokens": 1, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -13.332342147827148, "logits_per_token": -7.232631683349609, "logits_per_char": -0.9040789604187012, "num_chars": 8}, {"sum_logits": -11.153411865234375, "num_tokens": 2, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -19.233186721801758, "logits_per_token": -5.5767059326171875, "logits_per_char": -0.743560791015625, "num_chars": 15}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 29, "native_id": "530", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 2.7797598838806152, "incorrect_loss_raw": 5.0205932060877485, "correct_loss_per_char": 0.3971085548400879, "incorrect_loss_per_char": 0.7850260739288633, "correct_loss_per_token": 2.7797598838806152, "incorrect_loss_per_token": 5.0205932060877485, "correct_loss_uncond": -9.740970134735107, "incorrect_loss_uncond": -7.653271158536275}, "model_output": [{"sum_logits": -7.073792934417725, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -14.104264259338379, "logits_per_token": -7.073792934417725, "logits_per_char": -1.1789654890696208, "num_chars": 6}, {"sum_logits": -2.7797598838806152, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -12.520730018615723, "logits_per_token": -2.7797598838806152, "logits_per_char": -0.3971085548400879, "num_chars": 7}, {"sum_logits": -0.32640326023101807, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": true, "sum_logits_uncond": -11.172591209411621, "logits_per_token": -0.32640326023101807, "logits_per_char": -0.08160081505775452, "num_chars": 4}, {"sum_logits": -7.661583423614502, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -12.74473762512207, "logits_per_token": -7.661583423614502, "logits_per_char": -1.0945119176592146, "num_chars": 7}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 30, "native_id": "1426", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 15.49294662475586, "incorrect_loss_raw": 14.696477890014648, "correct_loss_per_char": 0.8154182434082031, "incorrect_loss_per_char": 1.2273185176766794, "correct_loss_per_token": 3.873236656188965, "incorrect_loss_per_token": 7.730680783589681, "correct_loss_uncond": -10.335676193237305, "incorrect_loss_uncond": -5.3333314259847}, "model_output": [{"sum_logits": -12.002012252807617, "num_tokens": 2, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -14.556058883666992, "logits_per_token": -6.001006126403809, "logits_per_char": -1.2002012252807617, "num_chars": 10}, {"sum_logits": -9.742843627929688, "num_tokens": 1, "num_tokens_all": 105, "is_greedy": false, "sum_logits_uncond": -14.95370101928711, "logits_per_token": -9.742843627929688, "logits_per_char": -0.8857130570845171, "num_chars": 11}, {"sum_logits": -15.49294662475586, "num_tokens": 4, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -25.828622817993164, "logits_per_token": -3.873236656188965, "logits_per_char": -0.8154182434082031, "num_chars": 19}, {"sum_logits": -22.34457778930664, "num_tokens": 3, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -30.579668045043945, "logits_per_token": -7.448192596435547, "logits_per_char": -1.59604127066476, "num_chars": 14}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 31, "native_id": "8-466", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 37.14653778076172, "incorrect_loss_raw": 25.73596954345703, "correct_loss_per_char": 0.952475327711839, "incorrect_loss_per_char": 0.911092037159008, "correct_loss_per_token": 4.643317222595215, "incorrect_loss_per_token": 4.434611456734793, "correct_loss_uncond": 2.4658775329589844, "incorrect_loss_uncond": -4.314613342285156}, "model_output": [{"sum_logits": -23.88839340209961, "num_tokens": 7, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -31.474149703979492, "logits_per_token": -3.412627628871373, "logits_per_char": -0.6825255257742745, "num_chars": 35}, {"sum_logits": -30.138626098632812, "num_tokens": 5, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -35.386043548583984, "logits_per_token": -6.0277252197265625, "logits_per_char": -1.1591779268704927, "num_chars": 26}, {"sum_logits": -23.180889129638672, "num_tokens": 6, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -23.291555404663086, "logits_per_token": -3.8634815216064453, "logits_per_char": -0.8915726588322566, "num_chars": 26}, {"sum_logits": -37.14653778076172, "num_tokens": 8, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -34.680660247802734, "logits_per_token": -4.643317222595215, "logits_per_char": -0.952475327711839, "num_chars": 39}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 32, "native_id": "1577", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.557315826416016, "incorrect_loss_raw": 10.477371215820312, "correct_loss_per_char": 1.6510451180594308, "incorrect_loss_per_char": 1.5428131002597707, "correct_loss_per_token": 5.778657913208008, "incorrect_loss_per_token": 4.507419056362576, "correct_loss_uncond": -4.2574310302734375, "incorrect_loss_uncond": -5.397995630900065}, "model_output": [{"sum_logits": -8.539246559143066, "num_tokens": 2, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -15.271268844604492, "logits_per_token": -4.269623279571533, "logits_per_char": -1.2198923655918665, "num_chars": 7}, {"sum_logits": -11.557315826416016, "num_tokens": 2, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -15.814746856689453, "logits_per_token": -5.778657913208008, "logits_per_char": -1.6510451180594308, "num_chars": 7}, {"sum_logits": -9.730069160461426, "num_tokens": 2, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -15.661674499511719, "logits_per_token": -4.865034580230713, "logits_per_char": -1.946013832092285, "num_chars": 5}, {"sum_logits": -13.162797927856445, "num_tokens": 3, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -16.693157196044922, "logits_per_token": -4.3875993092854815, "logits_per_char": -1.4625331030951605, "num_chars": 9}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 33, "native_id": "8-257", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 27.30961799621582, "incorrect_loss_raw": 9.146713256835938, "correct_loss_per_char": 0.4404777096163842, "incorrect_loss_per_char": 0.3370444886777042, "correct_loss_per_token": 2.730961799621582, "incorrect_loss_per_token": 1.6618598680647592, "correct_loss_uncond": -6.942255020141602, "incorrect_loss_uncond": -7.407662709554036}, "model_output": [{"sum_logits": -27.30961799621582, "num_tokens": 10, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -34.25187301635742, "logits_per_token": -2.730961799621582, "logits_per_char": -0.4404777096163842, "num_chars": 62}, {"sum_logits": -7.365787506103516, "num_tokens": 4, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -14.224950790405273, "logits_per_token": -1.841446876525879, "logits_per_char": -0.38767302663702713, "num_chars": 19}, {"sum_logits": -8.466891288757324, "num_tokens": 7, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -15.368913650512695, "logits_per_token": -1.2095558983939034, "logits_per_char": -0.2490262143752154, "num_chars": 34}, {"sum_logits": -11.607460975646973, "num_tokens": 6, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -20.069263458251953, "logits_per_token": -1.9345768292744954, "logits_per_char": -0.3744342250208701, "num_chars": 31}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 34, "native_id": "378", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 24.081960678100586, "incorrect_loss_raw": 25.12961260477702, "correct_loss_per_char": 0.7082929611206055, "incorrect_loss_per_char": 0.7039815045564805, "correct_loss_per_token": 4.013660113016765, "incorrect_loss_per_token": 3.7150566948784722, "correct_loss_uncond": -11.26689338684082, "incorrect_loss_uncond": -13.678062438964844}, "model_output": [{"sum_logits": -20.17792320251465, "num_tokens": 5, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -31.28693962097168, "logits_per_token": -4.03558464050293, "logits_per_char": -0.8071169281005859, "num_chars": 25}, {"sum_logits": -24.081960678100586, "num_tokens": 6, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -35.348854064941406, "logits_per_token": -4.013660113016765, "logits_per_char": -0.7082929611206055, "num_chars": 34}, {"sum_logits": -17.550708770751953, "num_tokens": 6, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -33.95355987548828, "logits_per_token": -2.9251181284586587, "logits_per_char": -0.40815601792446404, "num_chars": 43}, {"sum_logits": -37.66020584106445, "num_tokens": 9, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -51.182525634765625, "logits_per_token": -4.184467315673828, "logits_per_char": -0.8966715676443917, "num_chars": 42}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 35, "native_id": "8-41", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.040252685546875, "incorrect_loss_raw": 10.098430474599203, "correct_loss_per_char": 0.9314466203962054, "incorrect_loss_per_char": 1.2090723209131777, "correct_loss_per_token": 6.5201263427734375, "incorrect_loss_per_token": 5.0492152372996015, "correct_loss_uncond": -5.458713531494141, "incorrect_loss_uncond": -2.9404753049214682}, "model_output": [{"sum_logits": -13.040252685546875, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -18.498966217041016, "logits_per_token": -6.5201263427734375, "logits_per_char": -0.9314466203962054, "num_chars": 14}, {"sum_logits": -5.357978343963623, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -11.817448616027832, "logits_per_token": -2.6789891719818115, "logits_per_char": -0.8929963906606039, "num_chars": 6}, {"sum_logits": -8.976848602294922, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -11.189876556396484, "logits_per_token": -4.488424301147461, "logits_per_char": -1.7953697204589845, "num_chars": 5}, {"sum_logits": -15.960464477539062, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -16.109392166137695, "logits_per_token": -7.980232238769531, "logits_per_char": -0.9388508516199449, "num_chars": 17}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 36, "native_id": "9-540", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 23.970958709716797, "incorrect_loss_raw": 20.301669120788574, "correct_loss_per_char": 0.6308147028872841, "incorrect_loss_per_char": 0.6999530803796018, "correct_loss_per_token": 3.4244226728166853, "incorrect_loss_per_token": 3.2761708736419677, "correct_loss_uncond": -16.962509155273438, "incorrect_loss_uncond": -15.768471717834473}, "model_output": [{"sum_logits": -23.970958709716797, "num_tokens": 7, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -40.933467864990234, "logits_per_token": -3.4244226728166853, "logits_per_char": -0.6308147028872841, "num_chars": 38}, {"sum_logits": -12.509575843811035, "num_tokens": 5, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -28.80228042602539, "logits_per_token": -2.501915168762207, "logits_per_char": -0.5003830337524414, "num_chars": 25}, {"sum_logits": -31.366518020629883, "num_tokens": 8, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -47.34712219238281, "logits_per_token": -3.9208147525787354, "logits_per_char": -0.825434684753418, "num_chars": 38}, {"sum_logits": -17.028913497924805, "num_tokens": 5, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -32.06101989746094, "logits_per_token": -3.405782699584961, "logits_per_char": -0.7740415226329457, "num_chars": 22}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 37, "native_id": "266", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.322784423828125, "incorrect_loss_raw": 12.001676241556803, "correct_loss_per_char": 1.046112060546875, "incorrect_loss_per_char": 1.7562179338364377, "correct_loss_per_token": 3.6613922119140625, "incorrect_loss_per_token": 6.94247563680013, "correct_loss_uncond": -7.244015693664551, "incorrect_loss_uncond": -4.211686770121257}, "model_output": [{"sum_logits": -5.649825096130371, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -15.251566886901855, "logits_per_token": -5.649825096130371, "logits_per_char": -0.8071178708757673, "num_chars": 7}, {"sum_logits": -16.01125144958496, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -15.059367179870605, "logits_per_token": -8.00562572479248, "logits_per_char": -2.66854190826416, "num_chars": 6}, {"sum_logits": -14.343952178955078, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -18.32915496826172, "logits_per_token": -7.171976089477539, "logits_per_char": -1.7929940223693848, "num_chars": 8}, {"sum_logits": -7.322784423828125, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -14.566800117492676, "logits_per_token": -3.6613922119140625, "logits_per_char": -1.046112060546875, "num_chars": 7}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 38, "native_id": "1309", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.3097164630889893, "incorrect_loss_raw": 8.004405657450357, "correct_loss_per_char": 0.2545935740837684, "incorrect_loss_per_char": 1.041622835492331, "correct_loss_per_token": 1.6548582315444946, "incorrect_loss_per_token": 6.620571613311768, "correct_loss_uncond": -11.341759443283081, "incorrect_loss_uncond": -5.403251012166341}, "model_output": [{"sum_logits": -7.787526607513428, "num_tokens": 1, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -12.304662704467773, "logits_per_token": -7.787526607513428, "logits_per_char": -1.1125038010733468, "num_chars": 7}, {"sum_logits": -8.303004264831543, "num_tokens": 2, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -14.325052261352539, "logits_per_token": -4.1515021324157715, "logits_per_char": -0.6919170220692953, "num_chars": 12}, {"sum_logits": -7.9226861000061035, "num_tokens": 1, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -13.593255043029785, "logits_per_token": -7.9226861000061035, "logits_per_char": -1.3204476833343506, "num_chars": 6}, {"sum_logits": -3.3097164630889893, "num_tokens": 2, "num_tokens_all": 108, "is_greedy": true, "sum_logits_uncond": -14.65147590637207, "logits_per_token": -1.6548582315444946, "logits_per_char": -0.2545935740837684, "num_chars": 13}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 39, "native_id": "7-1197", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 2.5419182777404785, "incorrect_loss_raw": 4.674737294514974, "correct_loss_per_char": 0.19553217521080604, "incorrect_loss_per_char": 0.8454063415527343, "correct_loss_per_token": 2.5419182777404785, "incorrect_loss_per_token": 4.674737294514974, "correct_loss_uncond": -10.660192966461182, "incorrect_loss_uncond": -8.853817303975424}, "model_output": [{"sum_logits": -2.5419182777404785, "num_tokens": 1, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -13.20211124420166, "logits_per_token": -2.5419182777404785, "logits_per_char": -0.19553217521080604, "num_chars": 13}, {"sum_logits": -6.688876152038574, "num_tokens": 1, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -12.509245872497559, "logits_per_token": -6.688876152038574, "logits_per_char": -1.3377752304077148, "num_chars": 5}, {"sum_logits": -4.700908660888672, "num_tokens": 1, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -14.275960922241211, "logits_per_token": -4.700908660888672, "logits_per_char": -0.6715583801269531, "num_chars": 7}, {"sum_logits": -2.634427070617676, "num_tokens": 1, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -13.800457000732422, "logits_per_token": -2.634427070617676, "logits_per_char": -0.5268854141235352, "num_chars": 5}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 40, "native_id": "7-891", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.127843856811523, "incorrect_loss_raw": 6.662937482198079, "correct_loss_per_char": 0.5704902410507202, "incorrect_loss_per_char": 0.7429819750407386, "correct_loss_per_token": 3.0426146189371743, "incorrect_loss_per_token": 5.880050500233968, "correct_loss_uncond": -14.785909652709961, "incorrect_loss_uncond": -9.034671465555826}, "model_output": [{"sum_logits": -5.829536437988281, "num_tokens": 1, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -14.837685585021973, "logits_per_token": -5.829536437988281, "logits_per_char": -0.4857947031656901, "num_chars": 12}, {"sum_logits": -9.461954116821289, "num_tokens": 1, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -15.60252857208252, "logits_per_token": -9.461954116821289, "logits_per_char": -1.3517077309744698, "num_chars": 7}, {"sum_logits": -4.697321891784668, "num_tokens": 2, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -16.652612686157227, "logits_per_token": -2.348660945892334, "logits_per_char": -0.39144349098205566, "num_chars": 12}, {"sum_logits": -9.127843856811523, "num_tokens": 3, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -23.913753509521484, "logits_per_token": -3.0426146189371743, "logits_per_char": -0.5704902410507202, "num_chars": 16}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 41, "native_id": "1180", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 13.262944221496582, "incorrect_loss_raw": 8.905987103780111, "correct_loss_per_char": 0.8841962814331055, "incorrect_loss_per_char": 1.0834860915229434, "correct_loss_per_token": 4.420981407165527, "incorrect_loss_per_token": 7.143874486287435, "correct_loss_uncond": -6.44901180267334, "incorrect_loss_uncond": -2.2576796213785806}, "model_output": [{"sum_logits": -7.535115718841553, "num_tokens": 1, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -9.944474220275879, "logits_per_token": -7.535115718841553, "logits_per_char": -1.0764451026916504, "num_chars": 7}, {"sum_logits": -7.929506778717041, "num_tokens": 3, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -13.190885543823242, "logits_per_token": -2.6431689262390137, "logits_per_char": -0.5663933413369315, "num_chars": 14}, {"sum_logits": -11.253338813781738, "num_tokens": 1, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -10.355640411376953, "logits_per_token": -11.253338813781738, "logits_per_char": -1.6076198305402483, "num_chars": 7}, {"sum_logits": -13.262944221496582, "num_tokens": 3, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -19.711956024169922, "logits_per_token": -4.420981407165527, "logits_per_char": -0.8841962814331055, "num_chars": 15}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 42, "native_id": "1204", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 13.893390655517578, "incorrect_loss_raw": 23.348604838053387, "correct_loss_per_char": 0.39695401872907365, "incorrect_loss_per_char": 0.6756406905719206, "correct_loss_per_token": 2.3155651092529297, "incorrect_loss_per_token": 3.33551497686477, "correct_loss_uncond": -14.476266860961914, "incorrect_loss_uncond": -8.433220545450846}, "model_output": [{"sum_logits": -13.893390655517578, "num_tokens": 6, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -28.369657516479492, "logits_per_token": -2.3155651092529297, "logits_per_char": -0.39695401872907365, "num_chars": 35}, {"sum_logits": -27.041095733642578, "num_tokens": 7, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -34.52988815307617, "logits_per_token": -3.863013676234654, "logits_per_char": -0.8194271434437145, "num_chars": 33}, {"sum_logits": -24.04865837097168, "num_tokens": 7, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -35.11238098144531, "logits_per_token": -3.4355226244245256, "logits_per_char": -0.6499637397559913, "num_chars": 37}, {"sum_logits": -18.9560604095459, "num_tokens": 7, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -25.70320701599121, "logits_per_token": -2.7080086299351285, "logits_per_char": -0.5575311885160559, "num_chars": 34}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 43, "native_id": "7-52", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 8.738701820373535, "incorrect_loss_raw": 17.975755373636883, "correct_loss_per_char": 1.2483859743390764, "incorrect_loss_per_char": 1.4299270104961235, "correct_loss_per_token": 4.369350910186768, "incorrect_loss_per_token": 7.872726387447781, "correct_loss_uncond": -8.877655982971191, "incorrect_loss_uncond": -3.8771066665649414}, "model_output": [{"sum_logits": -20.072723388671875, "num_tokens": 3, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -23.492473602294922, "logits_per_token": -6.690907796223958, "logits_per_char": -1.1807484346277572, "num_chars": 17}, {"sum_logits": -21.495670318603516, "num_tokens": 2, "num_tokens_all": 105, "is_greedy": false, "sum_logits_uncond": -26.024045944213867, "logits_per_token": -10.747835159301758, "logits_per_char": -1.3434793949127197, "num_chars": 16}, {"sum_logits": -12.358872413635254, "num_tokens": 2, "num_tokens_all": 105, "is_greedy": false, "sum_logits_uncond": -16.04206657409668, "logits_per_token": -6.179436206817627, "logits_per_char": -1.7655532019478934, "num_chars": 7}, {"sum_logits": -8.738701820373535, "num_tokens": 2, "num_tokens_all": 105, "is_greedy": false, "sum_logits_uncond": -17.616357803344727, "logits_per_token": -4.369350910186768, "logits_per_char": -1.2483859743390764, "num_chars": 7}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 44, "native_id": "1759", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 22.934423446655273, "incorrect_loss_raw": 23.424781163533527, "correct_loss_per_char": 0.790842187815699, "incorrect_loss_per_char": 0.7568237678483984, "correct_loss_per_token": 3.822403907775879, "incorrect_loss_per_token": 3.728366506667365, "correct_loss_uncond": -15.556497573852539, "incorrect_loss_uncond": -12.862462361653646}, "model_output": [{"sum_logits": -22.934423446655273, "num_tokens": 6, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -38.49092102050781, "logits_per_token": -3.822403907775879, "logits_per_char": -0.790842187815699, "num_chars": 29}, {"sum_logits": -27.77897071838379, "num_tokens": 7, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -36.498435974121094, "logits_per_token": -3.9684243883405412, "logits_per_char": -0.6944742679595948, "num_chars": 40}, {"sum_logits": -22.44198989868164, "num_tokens": 7, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -42.77164840698242, "logits_per_token": -3.20599855695452, "logits_per_char": -0.7738617206441945, "num_chars": 29}, {"sum_logits": -20.053382873535156, "num_tokens": 5, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -29.591646194458008, "logits_per_token": -4.010676574707031, "logits_per_char": -0.8021353149414062, "num_chars": 25}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 45, "native_id": "9-655", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 26.308448791503906, "incorrect_loss_raw": 19.81149673461914, "correct_loss_per_char": 0.6923275997764186, "incorrect_loss_per_char": 0.5655664962435526, "correct_loss_per_token": 3.758349827357701, "incorrect_loss_per_token": 3.002076572842068, "correct_loss_uncond": -15.188896179199219, "incorrect_loss_uncond": -13.321096420288086}, "model_output": [{"sum_logits": -26.308448791503906, "num_tokens": 7, "num_tokens_all": 125, "is_greedy": false, "sum_logits_uncond": -41.497344970703125, "logits_per_token": -3.758349827357701, "logits_per_char": -0.6923275997764186, "num_chars": 38}, {"sum_logits": -21.58844757080078, "num_tokens": 8, "num_tokens_all": 126, "is_greedy": false, "sum_logits_uncond": -36.48777770996094, "logits_per_token": -2.6985559463500977, "logits_per_char": -0.5140106564476377, "num_chars": 42}, {"sum_logits": -20.228195190429688, "num_tokens": 6, "num_tokens_all": 124, "is_greedy": false, "sum_logits_uncond": -33.03603744506836, "logits_per_token": -3.3713658650716147, "logits_per_char": -0.6321310997009277, "num_chars": 32}, {"sum_logits": -17.617847442626953, "num_tokens": 6, "num_tokens_all": 124, "is_greedy": false, "sum_logits_uncond": -29.873964309692383, "logits_per_token": -2.936307907104492, "logits_per_char": -0.5505577325820923, "num_chars": 32}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 46, "native_id": "132", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.762186050415039, "incorrect_loss_raw": 8.702167987823486, "correct_loss_per_char": 0.36013662815093994, "incorrect_loss_per_char": 0.5487343937742944, "correct_loss_per_token": 1.9207286834716797, "incorrect_loss_per_token": 3.5177287525600858, "correct_loss_uncond": -13.603328704833984, "incorrect_loss_uncond": -9.476770559946695}, "model_output": [{"sum_logits": -5.762186050415039, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -19.365514755249023, "logits_per_token": -1.9207286834716797, "logits_per_char": -0.36013662815093994, "num_chars": 16}, {"sum_logits": -6.114257335662842, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -19.538177490234375, "logits_per_token": -2.0380857785542807, "logits_per_char": -0.3596621962154613, "num_chars": 17}, {"sum_logits": -11.106109619140625, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -15.368156433105469, "logits_per_token": -5.5530548095703125, "logits_per_char": -0.6941318511962891, "num_chars": 16}, {"sum_logits": -8.886137008666992, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -19.630481719970703, "logits_per_token": -2.962045669555664, "logits_per_char": -0.5924091339111328, "num_chars": 15}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 47, "native_id": "8-79", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.761826515197754, "incorrect_loss_raw": 6.42547067006429, "correct_loss_per_char": 0.39454664502825054, "incorrect_loss_per_char": 1.162622660682315, "correct_loss_per_token": 2.761826515197754, "incorrect_loss_per_token": 6.42547067006429, "correct_loss_uncond": -9.645588874816895, "incorrect_loss_uncond": -7.074474016825358}, "model_output": [{"sum_logits": -2.761826515197754, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.407415390014648, "logits_per_token": -2.761826515197754, "logits_per_char": -0.39454664502825054, "num_chars": 7}, {"sum_logits": -7.130558490753174, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.137742042541504, "logits_per_token": -7.130558490753174, "logits_per_char": -1.4261116981506348, "num_chars": 5}, {"sum_logits": -6.429752349853516, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.554417610168457, "logits_per_token": -6.429752349853516, "logits_per_char": -0.9185360499790737, "num_chars": 7}, {"sum_logits": -5.716101169586182, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.807674407958984, "logits_per_token": -5.716101169586182, "logits_per_char": -1.1432202339172364, "num_chars": 5}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 48, "native_id": "1835", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.019117832183838, "incorrect_loss_raw": 6.046499888102214, "correct_loss_per_char": 1.0027311188834054, "incorrect_loss_per_char": 0.6118677558091582, "correct_loss_per_token": 3.509558916091919, "incorrect_loss_per_token": 6.046499888102214, "correct_loss_uncond": -8.523587703704834, "incorrect_loss_uncond": -6.839956283569336}, "model_output": [{"sum_logits": -4.5348334312438965, "num_tokens": 1, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -12.858726501464844, "logits_per_token": -4.5348334312438965, "logits_per_char": -0.6478333473205566, "num_chars": 7}, {"sum_logits": -7.019117832183838, "num_tokens": 2, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -15.542705535888672, "logits_per_token": -3.509558916091919, "logits_per_char": -1.0027311188834054, "num_chars": 7}, {"sum_logits": -8.161263465881348, "num_tokens": 1, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -12.254865646362305, "logits_per_token": -8.161263465881348, "logits_per_char": -0.5829473904200962, "num_chars": 14}, {"sum_logits": -5.4434027671813965, "num_tokens": 1, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -13.5457763671875, "logits_per_token": -5.4434027671813965, "logits_per_char": -0.6048225296868218, "num_chars": 9}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 49, "native_id": "9-149", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.489509105682373, "incorrect_loss_raw": 10.22707430521647, "correct_loss_per_char": 0.8979018211364747, "incorrect_loss_per_char": 1.8628662472679502, "correct_loss_per_token": 4.489509105682373, "incorrect_loss_per_token": 10.22707430521647, "correct_loss_uncond": -8.348811626434326, "incorrect_loss_uncond": -3.0538148880004883}, "model_output": [{"sum_logits": -9.583802223205566, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -13.267616271972656, "logits_per_token": -9.583802223205566, "logits_per_char": -1.369114603315081, "num_chars": 7}, {"sum_logits": -12.82076358795166, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -13.944449424743652, "logits_per_token": -12.82076358795166, "logits_per_char": -2.564152717590332, "num_chars": 5}, {"sum_logits": -4.489509105682373, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -12.8383207321167, "logits_per_token": -4.489509105682373, "logits_per_char": -0.8979018211364747, "num_chars": 5}, {"sum_logits": -8.276657104492188, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -12.63060188293457, "logits_per_token": -8.276657104492188, "logits_per_char": -1.6553314208984375, "num_chars": 5}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 50, "native_id": "695", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 17.054527282714844, "incorrect_loss_raw": 15.2892697652181, "correct_loss_per_char": 1.0032074872185202, "incorrect_loss_per_char": 0.9926469299528334, "correct_loss_per_token": 4.263631820678711, "incorrect_loss_per_token": 3.822317441304525, "correct_loss_uncond": -8.152530670166016, "incorrect_loss_uncond": -8.022186279296875}, "model_output": [{"sum_logits": -17.054527282714844, "num_tokens": 4, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -25.20705795288086, "logits_per_token": -4.263631820678711, "logits_per_char": -1.0032074872185202, "num_chars": 17}, {"sum_logits": -19.17915916442871, "num_tokens": 4, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -26.25333023071289, "logits_per_token": -4.794789791107178, "logits_per_char": -1.1986974477767944, "num_chars": 16}, {"sum_logits": -14.511451721191406, "num_tokens": 4, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -21.97997283935547, "logits_per_token": -3.6278629302978516, "logits_per_char": -0.9674301147460938, "num_chars": 15}, {"sum_logits": -12.17719841003418, "num_tokens": 4, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -21.701065063476562, "logits_per_token": -3.044299602508545, "logits_per_char": -0.811813227335612, "num_chars": 15}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 51, "native_id": "8-179", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 14.116862297058105, "incorrect_loss_raw": 8.050187746683756, "correct_loss_per_char": 1.2833511179143733, "incorrect_loss_per_char": 1.0357802362038344, "correct_loss_per_token": 3.5292155742645264, "incorrect_loss_per_token": 8.050187746683756, "correct_loss_uncond": -5.126439094543457, "incorrect_loss_uncond": -6.15117867787679}, "model_output": [{"sum_logits": -9.94141960144043, "num_tokens": 1, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -14.312512397766113, "logits_per_token": -9.94141960144043, "logits_per_char": -1.4202028002057756, "num_chars": 7}, {"sum_logits": -7.800780296325684, "num_tokens": 1, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -13.698051452636719, "logits_per_token": -7.800780296325684, "logits_per_char": -0.9750975370407104, "num_chars": 8}, {"sum_logits": -6.408363342285156, "num_tokens": 1, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -14.593535423278809, "logits_per_token": -6.408363342285156, "logits_per_char": -0.7120403713650174, "num_chars": 9}, {"sum_logits": -14.116862297058105, "num_tokens": 4, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -19.243301391601562, "logits_per_token": -3.5292155742645264, "logits_per_char": -1.2833511179143733, "num_chars": 11}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 52, "native_id": "7-50", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 14.786426544189453, "incorrect_loss_raw": 17.249632835388184, "correct_loss_per_char": 0.39963314984295817, "incorrect_loss_per_char": 0.5034882006191072, "correct_loss_per_token": 2.4644044240315757, "incorrect_loss_per_token": 3.0881563716464573, "correct_loss_uncond": -22.77511978149414, "incorrect_loss_uncond": -18.20858923594157}, "model_output": [{"sum_logits": -14.786426544189453, "num_tokens": 6, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -37.561546325683594, "logits_per_token": -2.4644044240315757, "logits_per_char": -0.39963314984295817, "num_chars": 37}, {"sum_logits": -24.539783477783203, "num_tokens": 6, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -45.42766189575195, "logits_per_token": -4.089963912963867, "logits_per_char": -0.6816606521606445, "num_chars": 36}, {"sum_logits": -8.01953411102295, "num_tokens": 6, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -29.823183059692383, "logits_per_token": -1.336589018503825, "logits_per_char": -0.2291295460292271, "num_chars": 35}, {"sum_logits": -19.1895809173584, "num_tokens": 5, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -31.123821258544922, "logits_per_token": -3.8379161834716795, "logits_per_char": -0.59967440366745, "num_chars": 32}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 53, "native_id": "508", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 18.748592376708984, "incorrect_loss_raw": 18.017905235290527, "correct_loss_per_char": 1.7044174887917258, "incorrect_loss_per_char": 1.0950454331579662, "correct_loss_per_token": 6.249530792236328, "incorrect_loss_per_token": 7.859304057227241, "correct_loss_uncond": -3.81463623046875, "incorrect_loss_uncond": -1.7122780481974285}, "model_output": [{"sum_logits": -20.693674087524414, "num_tokens": 3, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -22.708560943603516, "logits_per_token": -6.897891362508138, "logits_per_char": -1.0346837043762207, "num_chars": 20}, {"sum_logits": -14.829642295837402, "num_tokens": 2, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -17.851303100585938, "logits_per_token": -7.414821147918701, "logits_per_char": -0.9268526434898376, "num_chars": 16}, {"sum_logits": -18.530399322509766, "num_tokens": 2, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -18.630685806274414, "logits_per_token": -9.265199661254883, "logits_per_char": -1.3235999516078405, "num_chars": 14}, {"sum_logits": -18.748592376708984, "num_tokens": 3, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -22.563228607177734, "logits_per_token": -6.249530792236328, "logits_per_char": -1.7044174887917258, "num_chars": 11}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 54, "native_id": "1674", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 14.191915512084961, "incorrect_loss_raw": 8.781911055246988, "correct_loss_per_char": 0.7469429216886821, "incorrect_loss_per_char": 0.8951452081165616, "correct_loss_per_token": 2.838383102416992, "incorrect_loss_per_token": 5.027572472890218, "correct_loss_uncond": -10.199052810668945, "incorrect_loss_uncond": -7.968225320180257}, "model_output": [{"sum_logits": -10.813911437988281, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -16.80889892578125, "logits_per_token": -3.6046371459960938, "logits_per_char": -0.7724222455705915, "num_chars": 14}, {"sum_logits": -7.4243388175964355, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -13.677475929260254, "logits_per_token": -7.4243388175964355, "logits_per_char": -1.2373898029327393, "num_chars": 6}, {"sum_logits": -14.191915512084961, "num_tokens": 5, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -24.390968322753906, "logits_per_token": -2.838383102416992, "logits_per_char": -0.7469429216886821, "num_chars": 19}, {"sum_logits": -8.10748291015625, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -19.764034271240234, "logits_per_token": -4.053741455078125, "logits_per_char": -0.6756235758463541, "num_chars": 12}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 55, "native_id": "163", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.25837230682373, "incorrect_loss_raw": 8.446534156799316, "correct_loss_per_char": 0.6034336651072782, "incorrect_loss_per_char": 0.910563313771808, "correct_loss_per_token": 5.129186153411865, "incorrect_loss_per_token": 4.971840699513753, "correct_loss_uncond": -10.023690223693848, "incorrect_loss_uncond": -8.702864646911621}, "model_output": [{"sum_logits": -10.25837230682373, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -20.282062530517578, "logits_per_token": -5.129186153411865, "logits_per_char": -0.6034336651072782, "num_chars": 17}, {"sum_logits": -10.795446395874023, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -19.302349090576172, "logits_per_token": -5.397723197937012, "logits_per_char": -0.7711033139910016, "num_chars": 14}, {"sum_logits": -4.49144172668457, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -12.813919067382812, "logits_per_token": -4.49144172668457, "logits_per_char": -1.1228604316711426, "num_chars": 4}, {"sum_logits": -10.052714347839355, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -19.331928253173828, "logits_per_token": -5.026357173919678, "logits_per_char": -0.8377261956532797, "num_chars": 12}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 56, "native_id": "7-49", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.017491340637207, "incorrect_loss_raw": 8.412738800048828, "correct_loss_per_char": 1.8034982681274414, "incorrect_loss_per_char": 1.213341236114502, "correct_loss_per_token": 9.017491340637207, "incorrect_loss_per_token": 6.368804719712998, "correct_loss_uncond": -6.023556709289551, "incorrect_loss_uncond": -5.44346809387207}, "model_output": [{"sum_logits": -6.4699554443359375, "num_tokens": 1, "num_tokens_all": 105, "is_greedy": false, "sum_logits_uncond": -13.276829719543457, "logits_per_token": -6.4699554443359375, "logits_per_char": -1.2939910888671875, "num_chars": 5}, {"sum_logits": -9.570557594299316, "num_tokens": 1, "num_tokens_all": 105, "is_greedy": false, "sum_logits_uncond": -12.821043968200684, "logits_per_token": -9.570557594299316, "logits_per_char": -1.1963196992874146, "num_chars": 8}, {"sum_logits": -9.017491340637207, "num_tokens": 1, "num_tokens_all": 105, "is_greedy": false, "sum_logits_uncond": -15.041048049926758, "logits_per_token": -9.017491340637207, "logits_per_char": -1.8034982681274414, "num_chars": 5}, {"sum_logits": -9.19770336151123, "num_tokens": 3, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -15.470746994018555, "logits_per_token": -3.0659011205037436, "logits_per_char": -1.1497129201889038, "num_chars": 8}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 57, "native_id": "8-393", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 16.909954071044922, "incorrect_loss_raw": 18.709760030110676, "correct_loss_per_char": 0.4449987913432874, "incorrect_loss_per_char": 0.6777336483909969, "correct_loss_per_token": 3.3819908142089843, "incorrect_loss_per_token": 3.1061502638317293, "correct_loss_uncond": -16.680614471435547, "incorrect_loss_uncond": -15.396607081095377}, "model_output": [{"sum_logits": -16.400421142578125, "num_tokens": 7, "num_tokens_all": 174, "is_greedy": false, "sum_logits_uncond": -35.21554183959961, "logits_per_token": -2.3429173060825894, "logits_per_char": -0.5857293265206474, "num_chars": 28}, {"sum_logits": -16.075408935546875, "num_tokens": 4, "num_tokens_all": 171, "is_greedy": false, "sum_logits_uncond": -28.003416061401367, "logits_per_token": -4.018852233886719, "logits_per_char": -0.7307004061612216, "num_chars": 22}, {"sum_logits": -23.65345001220703, "num_tokens": 8, "num_tokens_all": 175, "is_greedy": false, "sum_logits_uncond": -39.10014343261719, "logits_per_token": -2.956681251525879, "logits_per_char": -0.7167712124911222, "num_chars": 33}, {"sum_logits": -16.909954071044922, "num_tokens": 5, "num_tokens_all": 172, "is_greedy": false, "sum_logits_uncond": -33.59056854248047, "logits_per_token": -3.3819908142089843, "logits_per_char": -0.4449987913432874, "num_chars": 38}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 58, "native_id": "788", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.57086181640625, "incorrect_loss_raw": 10.121868451436361, "correct_loss_per_char": 1.8927154541015625, "incorrect_loss_per_char": 2.5304671128590903, "correct_loss_per_token": 3.785430908203125, "incorrect_loss_per_token": 5.060934225718181, "correct_loss_uncond": -6.444002151489258, "incorrect_loss_uncond": -5.303422609965007}, "model_output": [{"sum_logits": -9.267698287963867, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -15.458850860595703, "logits_per_token": -4.633849143981934, "logits_per_char": -2.316924571990967, "num_chars": 4}, {"sum_logits": -7.57086181640625, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -14.014863967895508, "logits_per_token": -3.785430908203125, "logits_per_char": -1.8927154541015625, "num_chars": 4}, {"sum_logits": -10.6183500289917, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -15.71992301940918, "logits_per_token": -5.30917501449585, "logits_per_char": -2.654587507247925, "num_chars": 4}, {"sum_logits": -10.479557037353516, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -15.097099304199219, "logits_per_token": -5.239778518676758, "logits_per_char": -2.619889259338379, "num_chars": 4}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 59, "native_id": "9-29", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 32.14459228515625, "incorrect_loss_raw": 38.167676289876304, "correct_loss_per_char": 0.4797700341068097, "incorrect_loss_per_char": 0.6522558721457037, "correct_loss_per_token": 2.472660945012019, "incorrect_loss_per_token": 3.3867627409295573, "correct_loss_uncond": -16.149051666259766, "incorrect_loss_uncond": -5.299619674682617}, "model_output": [{"sum_logits": -32.14459228515625, "num_tokens": 13, "num_tokens_all": 126, "is_greedy": false, "sum_logits_uncond": -48.293643951416016, "logits_per_token": -2.472660945012019, "logits_per_char": -0.4797700341068097, "num_chars": 67}, {"sum_logits": -44.734710693359375, "num_tokens": 17, "num_tokens_all": 130, "is_greedy": false, "sum_logits_uncond": -58.474483489990234, "logits_per_token": -2.6314535701976105, "logits_per_char": -0.5325560796828497, "num_chars": 84}, {"sum_logits": -46.93280792236328, "num_tokens": 11, "num_tokens_all": 124, "is_greedy": false, "sum_logits_uncond": -46.90849685668945, "logits_per_token": -4.266618902033025, "logits_per_char": -0.8533237804066051, "num_chars": 55}, {"sum_logits": -22.83551025390625, "num_tokens": 7, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -25.01890754699707, "logits_per_token": -3.2622157505580356, "logits_per_char": -0.5708877563476562, "num_chars": 40}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 60, "native_id": "9-368", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.127935409545898, "incorrect_loss_raw": 7.068505525588989, "correct_loss_per_char": 0.41279354095458987, "incorrect_loss_per_char": 0.9127084016799927, "correct_loss_per_token": 4.127935409545898, "incorrect_loss_per_token": 5.908955335617065, "correct_loss_uncond": -11.525105476379395, "incorrect_loss_uncond": -6.63231348991394}, "model_output": [{"sum_logits": -3.4849088191986084, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -11.61186408996582, "logits_per_token": -3.4849088191986084, "logits_per_char": -0.6969817638397217, "num_chars": 5}, {"sum_logits": -6.957301139831543, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -15.713788986206055, "logits_per_token": -3.4786505699157715, "logits_per_char": -0.6957301139831543, "num_chars": 10}, {"sum_logits": -4.127935409545898, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -15.653040885925293, "logits_per_token": -4.127935409545898, "logits_per_char": -0.41279354095458987, "num_chars": 10}, {"sum_logits": -10.763306617736816, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -13.776803970336914, "logits_per_token": -10.763306617736816, "logits_per_char": -1.345413327217102, "num_chars": 8}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 61, "native_id": "7-671", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.46187973022461, "incorrect_loss_raw": 14.362203598022461, "correct_loss_per_char": 0.36790781435759173, "incorrect_loss_per_char": 0.7004433740285587, "correct_loss_per_token": 2.1154699325561523, "incorrect_loss_per_token": 3.287589295705159, "correct_loss_uncond": -18.22023582458496, "incorrect_loss_uncond": -13.985263188680014}, "model_output": [{"sum_logits": -14.077372550964355, "num_tokens": 4, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -27.614585876464844, "logits_per_token": -3.519343137741089, "logits_per_char": -0.7409143447875977, "num_chars": 19}, {"sum_logits": -18.177696228027344, "num_tokens": 5, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -30.962684631347656, "logits_per_token": -3.635539245605469, "logits_per_char": -0.7903346186098845, "num_chars": 23}, {"sum_logits": -8.46187973022461, "num_tokens": 4, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -26.68211555480957, "logits_per_token": -2.1154699325561523, "logits_per_char": -0.36790781435759173, "num_chars": 23}, {"sum_logits": -10.831542015075684, "num_tokens": 4, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -26.465129852294922, "logits_per_token": -2.707885503768921, "logits_per_char": -0.5700811586881939, "num_chars": 19}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 62, "native_id": "1272", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 36.467437744140625, "incorrect_loss_raw": 27.97188440958659, "correct_loss_per_char": 1.1763689594884073, "incorrect_loss_per_char": 1.0810902359139207, "correct_loss_per_token": 6.0779062906901045, "incorrect_loss_per_token": 5.475599119398329, "correct_loss_uncond": -0.4398994445800781, "incorrect_loss_uncond": -8.926914850870768}, "model_output": [{"sum_logits": -36.467437744140625, "num_tokens": 6, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -36.9073371887207, "logits_per_token": -6.0779062906901045, "logits_per_char": -1.1763689594884073, "num_chars": 31}, {"sum_logits": -18.347183227539062, "num_tokens": 4, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -29.30255699157715, "logits_per_token": -4.586795806884766, "logits_per_char": -0.9173591613769532, "num_chars": 20}, {"sum_logits": -38.21077346801758, "num_tokens": 6, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -45.07155227661133, "logits_per_token": -6.368462244669597, "logits_per_char": -1.2736924489339192, "num_chars": 30}, {"sum_logits": -27.357696533203125, "num_tokens": 5, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -36.322288513183594, "logits_per_token": -5.471539306640625, "logits_per_char": -1.0522190974308894, "num_chars": 26}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 63, "native_id": "648", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 15.797403335571289, "incorrect_loss_raw": 13.100343704223633, "correct_loss_per_char": 0.752257301693871, "incorrect_loss_per_char": 0.5580820655125187, "correct_loss_per_token": 3.9493508338928223, "incorrect_loss_per_token": 4.366781234741211, "correct_loss_uncond": -12.48580551147461, "incorrect_loss_uncond": -8.57005500793457}, "model_output": [{"sum_logits": -15.776201248168945, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -24.2926025390625, "logits_per_token": -5.258733749389648, "logits_per_char": -0.5440069395920326, "num_chars": 29}, {"sum_logits": -15.797403335571289, "num_tokens": 4, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -28.2832088470459, "logits_per_token": -3.9493508338928223, "logits_per_char": -0.752257301693871, "num_chars": 21}, {"sum_logits": -10.120491981506348, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -22.778644561767578, "logits_per_token": -3.3734973271687827, "logits_per_char": -0.4600223627957431, "num_chars": 22}, {"sum_logits": -13.404337882995605, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -17.93994903564453, "logits_per_token": -4.468112627665202, "logits_per_char": -0.6702168941497803, "num_chars": 20}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 64, "native_id": "9-1180", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.600296497344971, "incorrect_loss_raw": 12.369792302449545, "correct_loss_per_char": 0.7667160828908285, "incorrect_loss_per_char": 1.3095739132318742, "correct_loss_per_token": 4.600296497344971, "incorrect_loss_per_token": 8.0246950785319, "correct_loss_uncond": -7.855026721954346, "incorrect_loss_uncond": -5.367151260375977}, "model_output": [{"sum_logits": -11.038793563842773, "num_tokens": 1, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -14.090011596679688, "logits_per_token": -11.038793563842773, "logits_per_char": -1.839798927307129, "num_chars": 6}, {"sum_logits": -4.600296497344971, "num_tokens": 1, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -12.455323219299316, "logits_per_token": -4.600296497344971, "logits_per_char": -0.7667160828908285, "num_chars": 6}, {"sum_logits": -13.045624732971191, "num_tokens": 2, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -20.3870906829834, "logits_per_token": -6.522812366485596, "logits_per_char": -1.0035095948439379, "num_chars": 13}, {"sum_logits": -13.024958610534668, "num_tokens": 2, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -18.733728408813477, "logits_per_token": -6.512479305267334, "logits_per_char": -1.0854132175445557, "num_chars": 12}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 65, "native_id": "9-227", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 12.935951232910156, "incorrect_loss_raw": 9.4195237159729, "correct_loss_per_char": 1.175995566628196, "incorrect_loss_per_char": 1.6308238839346265, "correct_loss_per_token": 6.467975616455078, "incorrect_loss_per_token": 9.4195237159729, "correct_loss_uncond": -7.419132232666016, "incorrect_loss_uncond": -2.2539219856262207}, "model_output": [{"sum_logits": -13.654504776000977, "num_tokens": 1, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -12.99838638305664, "logits_per_token": -13.654504776000977, "logits_per_char": -2.7309009552001955, "num_chars": 5}, {"sum_logits": -3.1615710258483887, "num_tokens": 1, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -9.888992309570312, "logits_per_token": -3.1615710258483887, "logits_per_char": -0.5269285043080648, "num_chars": 6}, {"sum_logits": -12.935951232910156, "num_tokens": 2, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -20.355083465576172, "logits_per_token": -6.467975616455078, "logits_per_char": -1.175995566628196, "num_chars": 11}, {"sum_logits": -11.442495346069336, "num_tokens": 1, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -12.13295841217041, "logits_per_token": -11.442495346069336, "logits_per_char": -1.6346421922956194, "num_chars": 7}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 66, "native_id": "1582", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.858644485473633, "incorrect_loss_raw": 10.296673138936361, "correct_loss_per_char": 0.8053313168612394, "incorrect_loss_per_char": 0.9647829320695664, "correct_loss_per_token": 4.429322242736816, "incorrect_loss_per_token": 5.148336569468181, "correct_loss_uncond": -9.749435424804688, "incorrect_loss_uncond": -8.969964663187662}, "model_output": [{"sum_logits": -8.858644485473633, "num_tokens": 2, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -18.60807991027832, "logits_per_token": -4.429322242736816, "logits_per_char": -0.8053313168612394, "num_chars": 11}, {"sum_logits": -11.67918872833252, "num_tokens": 2, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -20.709646224975586, "logits_per_token": -5.83959436416626, "logits_per_char": -0.9732657273610433, "num_chars": 12}, {"sum_logits": -8.468324661254883, "num_tokens": 2, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -17.073888778686523, "logits_per_token": -4.234162330627441, "logits_per_char": -0.8468324661254882, "num_chars": 10}, {"sum_logits": -10.74250602722168, "num_tokens": 2, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -20.01637840270996, "logits_per_token": -5.37125301361084, "logits_per_char": -1.0742506027221679, "num_chars": 10}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 67, "native_id": "8-125", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.029870986938477, "incorrect_loss_raw": 14.282329241434732, "correct_loss_per_char": 0.7164193562098912, "incorrect_loss_per_char": 0.8588297314114041, "correct_loss_per_token": 3.343290328979492, "incorrect_loss_per_token": 4.760776413811578, "correct_loss_uncond": -14.299829483032227, "incorrect_loss_uncond": -10.055558204650879}, "model_output": [{"sum_logits": -10.029870986938477, "num_tokens": 3, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -24.329700469970703, "logits_per_token": -3.343290328979492, "logits_per_char": -0.7164193562098912, "num_chars": 14}, {"sum_logits": -16.798599243164062, "num_tokens": 3, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -25.99802589416504, "logits_per_token": -5.5995330810546875, "logits_per_char": -0.8399299621582031, "num_chars": 20}, {"sum_logits": -10.722004890441895, "num_tokens": 3, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -22.29334259033203, "logits_per_token": -3.5740016301472983, "logits_per_char": -0.7148003260294596, "num_chars": 15}, {"sum_logits": -15.326383590698242, "num_tokens": 3, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -24.722293853759766, "logits_per_token": -5.108794530232747, "logits_per_char": -1.0217589060465495, "num_chars": 15}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 68, "native_id": "1923", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 4.168135643005371, "incorrect_loss_raw": 17.392874717712402, "correct_loss_per_char": 0.6946892738342285, "incorrect_loss_per_char": 1.165597436734534, "correct_loss_per_token": 4.168135643005371, "incorrect_loss_per_token": 8.696437358856201, "correct_loss_uncond": -9.425119400024414, "incorrect_loss_uncond": -3.084890683492025}, "model_output": [{"sum_logits": -4.168135643005371, "num_tokens": 1, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -13.593255043029785, "logits_per_token": -4.168135643005371, "logits_per_char": -0.6946892738342285, "num_chars": 6}, {"sum_logits": -10.925528526306152, "num_tokens": 2, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -17.468643188476562, "logits_per_token": -5.462764263153076, "logits_per_char": -0.6426781486062443, "num_chars": 17}, {"sum_logits": -21.01117706298828, "num_tokens": 2, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -22.686946868896484, "logits_per_token": -10.50558853149414, "logits_per_char": -1.16728761461046, "num_chars": 18}, {"sum_logits": -20.241918563842773, "num_tokens": 2, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -21.277706146240234, "logits_per_token": -10.120959281921387, "logits_per_char": -1.6868265469868977, "num_chars": 12}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 69, "native_id": "9-229", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 29.61822509765625, "incorrect_loss_raw": 31.655453364054363, "correct_loss_per_char": 0.987274169921875, "incorrect_loss_per_char": 1.5827726682027183, "correct_loss_per_token": 5.92364501953125, "incorrect_loss_per_token": 7.369510237375895, "correct_loss_uncond": -8.1978759765625, "incorrect_loss_uncond": -1.4081617991129558}, "model_output": [{"sum_logits": -32.66118621826172, "num_tokens": 5, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -41.227760314941406, "logits_per_token": -6.532237243652344, "logits_per_char": -1.633059310913086, "num_chars": 20}, {"sum_logits": -36.79602813720703, "num_tokens": 4, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -35.025978088378906, "logits_per_token": -9.199007034301758, "logits_per_char": -1.8398014068603517, "num_chars": 20}, {"sum_logits": -25.509145736694336, "num_tokens": 4, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -22.93710708618164, "logits_per_token": -6.377286434173584, "logits_per_char": -1.2754572868347167, "num_chars": 20}, {"sum_logits": -29.61822509765625, "num_tokens": 5, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -37.81610107421875, "logits_per_token": -5.92364501953125, "logits_per_char": -0.987274169921875, "num_chars": 30}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 70, "native_id": "1702", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 18.280275344848633, "incorrect_loss_raw": 21.167513529459637, "correct_loss_per_char": 0.5712586045265198, "incorrect_loss_per_char": 0.837096097956541, "correct_loss_per_token": 2.6114679064069475, "incorrect_loss_per_token": 4.086467954847548, "correct_loss_uncond": -11.03042984008789, "incorrect_loss_uncond": -8.147952397664389}, "model_output": [{"sum_logits": -19.554546356201172, "num_tokens": 5, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -27.321582794189453, "logits_per_token": -3.9109092712402345, "logits_per_char": -0.8888430161909624, "num_chars": 22}, {"sum_logits": -18.280275344848633, "num_tokens": 7, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -29.310705184936523, "logits_per_token": -2.6114679064069475, "logits_per_char": -0.5712586045265198, "num_chars": 32}, {"sum_logits": -13.23312759399414, "num_tokens": 6, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -25.523977279663086, "logits_per_token": -2.20552126566569, "logits_per_char": -0.441104253133138, "num_chars": 30}, {"sum_logits": -30.714866638183594, "num_tokens": 5, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -35.10083770751953, "logits_per_token": -6.142973327636719, "logits_per_char": -1.181341024545523, "num_chars": 26}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 71, "native_id": "8-260", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 32.90617752075195, "incorrect_loss_raw": 34.286481857299805, "correct_loss_per_char": 0.8659520400197882, "incorrect_loss_per_char": 0.9347398049134611, "correct_loss_per_token": 5.484362920125325, "incorrect_loss_per_token": 5.534166844685873, "correct_loss_uncond": -7.527194976806641, "incorrect_loss_uncond": -8.134689966837565}, "model_output": [{"sum_logits": -24.52623176574707, "num_tokens": 5, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -36.80254364013672, "logits_per_token": -4.905246353149414, "logits_per_char": -0.7213597578160903, "num_chars": 34}, {"sum_logits": -45.734458923339844, "num_tokens": 6, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -46.564353942871094, "logits_per_token": -7.622409820556641, "logits_per_char": -1.3066988263811383, "num_chars": 35}, {"sum_logits": -32.90617752075195, "num_tokens": 6, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -40.433372497558594, "logits_per_token": -5.484362920125325, "logits_per_char": -0.8659520400197882, "num_chars": 38}, {"sum_logits": -32.5987548828125, "num_tokens": 8, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -43.8966178894043, "logits_per_token": -4.0748443603515625, "logits_per_char": -0.7761608305431548, "num_chars": 42}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 72, "native_id": "9-491", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.416831970214844, "incorrect_loss_raw": 11.77909247080485, "correct_loss_per_char": 1.1771039962768555, "incorrect_loss_per_char": 1.4862295002132269, "correct_loss_per_token": 4.708415985107422, "incorrect_loss_per_token": 7.827519416809082, "correct_loss_uncond": -5.454694747924805, "incorrect_loss_uncond": -3.5258169174194336}, "model_output": [{"sum_logits": -11.627839088439941, "num_tokens": 1, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -14.70566177368164, "logits_per_token": -11.627839088439941, "logits_per_char": -1.6611198697771346, "num_chars": 7}, {"sum_logits": -11.347759246826172, "num_tokens": 2, "num_tokens_all": 123, "is_greedy": false, "sum_logits_uncond": -14.634969711303711, "logits_per_token": -5.673879623413086, "logits_per_char": -1.0316144769841975, "num_chars": 11}, {"sum_logits": -9.416831970214844, "num_tokens": 2, "num_tokens_all": 123, "is_greedy": false, "sum_logits_uncond": -14.871526718139648, "logits_per_token": -4.708415985107422, "logits_per_char": -1.1771039962768555, "num_chars": 8}, {"sum_logits": -12.361679077148438, "num_tokens": 2, "num_tokens_all": 123, "is_greedy": false, "sum_logits_uncond": -16.5740966796875, "logits_per_token": -6.180839538574219, "logits_per_char": -1.7659541538783483, "num_chars": 7}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 73, "native_id": "75", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.392813205718994, "incorrect_loss_raw": 6.343722820281982, "correct_loss_per_char": 0.42410165071487427, "incorrect_loss_per_char": 0.8938944339752197, "correct_loss_per_token": 3.392813205718994, "incorrect_loss_per_token": 6.343722820281982, "correct_loss_uncond": -11.262213230133057, "incorrect_loss_uncond": -6.223766485850017}, "model_output": [{"sum_logits": -3.392813205718994, "num_tokens": 1, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -14.65502643585205, "logits_per_token": -3.392813205718994, "logits_per_char": -0.42410165071487427, "num_chars": 8}, {"sum_logits": -8.621397018432617, "num_tokens": 1, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -12.74663257598877, "logits_per_token": -8.621397018432617, "logits_per_char": -1.0776746273040771, "num_chars": 8}, {"sum_logits": -5.500035762786865, "num_tokens": 1, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -11.754987716674805, "logits_per_token": -5.500035762786865, "logits_per_char": -0.7857193946838379, "num_chars": 7}, {"sum_logits": -4.909735679626465, "num_tokens": 1, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -13.200847625732422, "logits_per_token": -4.909735679626465, "logits_per_char": -0.8182892799377441, "num_chars": 6}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 74, "native_id": "1215", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 37.6072998046875, "incorrect_loss_raw": 16.57887585957845, "correct_loss_per_char": 0.5969412667410714, "incorrect_loss_per_char": 0.6474351042930526, "correct_loss_per_token": 3.133941650390625, "incorrect_loss_per_token": 3.4318306393093536, "correct_loss_uncond": -19.464813232421875, "incorrect_loss_uncond": -10.226067860921225}, "model_output": [{"sum_logits": -13.108341217041016, "num_tokens": 5, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -26.09307098388672, "logits_per_token": -2.621668243408203, "logits_per_char": -0.5958336916836825, "num_chars": 22}, {"sum_logits": -18.82931137084961, "num_tokens": 4, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -29.356050491333008, "logits_per_token": -4.707327842712402, "logits_per_char": -0.7531724548339844, "num_chars": 25}, {"sum_logits": -37.6072998046875, "num_tokens": 12, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -57.072113037109375, "logits_per_token": -3.133941650390625, "logits_per_char": -0.5969412667410714, "num_chars": 63}, {"sum_logits": -17.798974990844727, "num_tokens": 6, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -24.965709686279297, "logits_per_token": -2.9664958318074546, "logits_per_char": -0.5932991663614909, "num_chars": 30}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 75, "native_id": "8-93", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 21.2761287689209, "incorrect_loss_raw": 20.975512822469074, "correct_loss_per_char": 1.0131489889962333, "incorrect_loss_per_char": 1.0449034736269998, "correct_loss_per_token": 4.255225753784179, "incorrect_loss_per_token": 4.541219838460287, "correct_loss_uncond": -0.4879131317138672, "incorrect_loss_uncond": -0.07528495788574219}, "model_output": [{"sum_logits": -17.839548110961914, "num_tokens": 5, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -19.584144592285156, "logits_per_token": -3.5679096221923827, "logits_per_char": -0.8495022909981864, "num_chars": 21}, {"sum_logits": -20.76703643798828, "num_tokens": 4, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -20.701309204101562, "logits_per_token": -5.19175910949707, "logits_per_char": -1.3844690958658854, "num_chars": 15}, {"sum_logits": -21.2761287689209, "num_tokens": 5, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -21.764041900634766, "logits_per_token": -4.255225753784179, "logits_per_char": -1.0131489889962333, "num_chars": 21}, {"sum_logits": -24.31995391845703, "num_tokens": 5, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -22.866939544677734, "logits_per_token": -4.863990783691406, "logits_per_char": -0.9007390340169271, "num_chars": 27}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 76, "native_id": "7-988", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 19.326332092285156, "incorrect_loss_raw": 21.61922836303711, "correct_loss_per_char": 0.568421532126034, "incorrect_loss_per_char": 0.7961189268192568, "correct_loss_per_token": 2.4157915115356445, "incorrect_loss_per_token": 4.084640142652723, "correct_loss_uncond": -11.264595031738281, "incorrect_loss_uncond": -9.924059549967447}, "model_output": [{"sum_logits": -19.326332092285156, "num_tokens": 8, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -30.590927124023438, "logits_per_token": -2.4157915115356445, "logits_per_char": -0.568421532126034, "num_chars": 34}, {"sum_logits": -24.58693504333496, "num_tokens": 5, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -34.24799346923828, "logits_per_token": -4.917387008666992, "logits_per_char": -1.0244556268056233, "num_chars": 24}, {"sum_logits": -18.742252349853516, "num_tokens": 5, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -29.052499771118164, "logits_per_token": -3.7484504699707033, "logits_per_char": -0.6462845637880522, "num_chars": 29}, {"sum_logits": -21.52849769592285, "num_tokens": 6, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -31.329370498657227, "logits_per_token": -3.588082949320475, "logits_per_char": -0.717616589864095, "num_chars": 30}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 77, "native_id": "9-1139", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 13.377752304077148, "incorrect_loss_raw": 7.326128959655762, "correct_loss_per_char": 0.5816414045250934, "incorrect_loss_per_char": 1.0312801290441442, "correct_loss_per_token": 6.688876152038574, "incorrect_loss_per_token": 5.618456681569417, "correct_loss_uncond": -4.507524490356445, "incorrect_loss_uncond": -5.432017962137858}, "model_output": [{"sum_logits": -10.246033668518066, "num_tokens": 2, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -16.18107032775879, "logits_per_token": -5.123016834259033, "logits_per_char": -1.1384481853908963, "num_chars": 9}, {"sum_logits": -4.802304267883301, "num_tokens": 1, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -9.888992309570312, "logits_per_token": -4.802304267883301, "logits_per_char": -0.8003840446472168, "num_chars": 6}, {"sum_logits": -13.377752304077148, "num_tokens": 2, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -17.885276794433594, "logits_per_token": -6.688876152038574, "logits_per_char": -0.5816414045250934, "num_chars": 23}, {"sum_logits": -6.930048942565918, "num_tokens": 1, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -12.204378128051758, "logits_per_token": -6.930048942565918, "logits_per_char": -1.1550081570943196, "num_chars": 6}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 78, "native_id": "1545", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 8.736599922180176, "incorrect_loss_raw": 8.188749631245932, "correct_loss_per_char": 1.4560999870300293, "incorrect_loss_per_char": 1.170720186687651, "correct_loss_per_token": 4.368299961090088, "incorrect_loss_per_token": 6.681556065877278, "correct_loss_uncond": -4.781083106994629, "incorrect_loss_uncond": -2.2993831634521484}, "model_output": [{"sum_logits": -9.043161392211914, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -10.344058990478516, "logits_per_token": -4.521580696105957, "logits_per_char": -1.2918801988874162, "num_chars": 7}, {"sum_logits": -8.736599922180176, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -13.517683029174805, "logits_per_token": -4.368299961090088, "logits_per_char": -1.4560999870300293, "num_chars": 6}, {"sum_logits": -6.679716110229492, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -9.46847915649414, "logits_per_token": -6.679716110229492, "logits_per_char": -1.3359432220458984, "num_chars": 5}, {"sum_logits": -8.843371391296387, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -11.651860237121582, "logits_per_token": -8.843371391296387, "logits_per_char": -0.8843371391296386, "num_chars": 10}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 79, "native_id": "7-664", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.935731887817383, "incorrect_loss_raw": 15.972832043965658, "correct_loss_per_char": 0.43742927551269534, "incorrect_loss_per_char": 0.6585341947855966, "correct_loss_per_token": 2.1871463775634767, "incorrect_loss_per_token": 2.997105238172743, "correct_loss_uncond": -16.734167098999023, "incorrect_loss_uncond": -13.056090354919434}, "model_output": [{"sum_logits": -15.068647384643555, "num_tokens": 5, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -26.700389862060547, "logits_per_token": -3.013729476928711, "logits_per_char": -0.538165978022984, "num_chars": 28}, {"sum_logits": -17.77150535583496, "num_tokens": 6, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -31.889205932617188, "logits_per_token": -2.9619175593058267, "logits_per_char": -0.6835194367628831, "num_chars": 26}, {"sum_logits": -10.935731887817383, "num_tokens": 5, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -27.669898986816406, "logits_per_token": -2.1871463775634767, "logits_per_char": -0.43742927551269534, "num_chars": 25}, {"sum_logits": -15.078343391418457, "num_tokens": 5, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -28.49717140197754, "logits_per_token": -3.0156686782836912, "logits_per_char": -0.7539171695709228, "num_chars": 20}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 80, "native_id": "8-53", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 15.914631843566895, "incorrect_loss_raw": 20.835453033447266, "correct_loss_per_char": 0.8841462135314941, "incorrect_loss_per_char": 1.4667708816982452, "correct_loss_per_token": 5.304877281188965, "incorrect_loss_per_token": 6.945151011149089, "correct_loss_uncond": -16.209834098815918, "incorrect_loss_uncond": -7.145374298095703}, "model_output": [{"sum_logits": -20.64179039001465, "num_tokens": 3, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -30.651966094970703, "logits_per_token": -6.88059679667155, "logits_per_char": -1.2901118993759155, "num_chars": 16}, {"sum_logits": -20.047426223754883, "num_tokens": 3, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -27.08297348022461, "logits_per_token": -6.682475407918294, "logits_per_char": -1.4319590159824915, "num_chars": 14}, {"sum_logits": -15.914631843566895, "num_tokens": 3, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -32.12446594238281, "logits_per_token": -5.304877281188965, "logits_per_char": -0.8841462135314941, "num_chars": 18}, {"sum_logits": -21.817142486572266, "num_tokens": 3, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -26.207542419433594, "logits_per_token": -7.272380828857422, "logits_per_char": -1.6782417297363281, "num_chars": 13}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 81, "native_id": "7-1044", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 18.381336212158203, "incorrect_loss_raw": 13.03558349609375, "correct_loss_per_char": 0.6127112070719402, "incorrect_loss_per_char": 0.6577842424786279, "correct_loss_per_token": 3.6762672424316407, "incorrect_loss_per_token": 3.581614520814684, "correct_loss_uncond": -12.833168029785156, "incorrect_loss_uncond": -7.488732655843099}, "model_output": [{"sum_logits": -18.381336212158203, "num_tokens": 5, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -31.21450424194336, "logits_per_token": -3.6762672424316407, "logits_per_char": -0.6127112070719402, "num_chars": 30}, {"sum_logits": -8.041155815124512, "num_tokens": 4, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -19.81112289428711, "logits_per_token": -2.010288953781128, "logits_per_char": -0.4232187271118164, "num_chars": 19}, {"sum_logits": -19.447723388671875, "num_tokens": 4, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -28.993518829345703, "logits_per_token": -4.861930847167969, "logits_per_char": -0.7202860514322916, "num_chars": 27}, {"sum_logits": -11.617871284484863, "num_tokens": 3, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -12.768306732177734, "logits_per_token": -3.8726237614949546, "logits_per_char": -0.8298479488917759, "num_chars": 14}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 82, "native_id": "7-1122", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 12.467845916748047, "incorrect_loss_raw": 12.8146120707194, "correct_loss_per_char": 0.6926581064860026, "incorrect_loss_per_char": 1.0661810672644412, "correct_loss_per_token": 4.155948638916016, "incorrect_loss_per_token": 4.962379455566406, "correct_loss_uncond": -6.923887252807617, "incorrect_loss_uncond": -5.217480977376302}, "model_output": [{"sum_logits": -12.467845916748047, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -19.391733169555664, "logits_per_token": -4.155948638916016, "logits_per_char": -0.6926581064860026, "num_chars": 18}, {"sum_logits": -12.435157775878906, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -14.648187637329102, "logits_per_token": -6.217578887939453, "logits_per_char": -1.2435157775878907, "num_chars": 10}, {"sum_logits": -16.887662887573242, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -22.272300720214844, "logits_per_token": -5.629220962524414, "logits_per_char": -1.1258441925048828, "num_chars": 15}, {"sum_logits": -9.121015548706055, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -17.175790786743164, "logits_per_token": -3.0403385162353516, "logits_per_char": -0.8291832317005504, "num_chars": 11}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 83, "native_id": "9-79", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 11.789230346679688, "incorrect_loss_raw": 13.929632822672525, "correct_loss_per_char": 0.5125752324643342, "incorrect_loss_per_char": 0.561869574218392, "correct_loss_per_token": 2.3578460693359373, "incorrect_loss_per_token": 3.0135502179463707, "correct_loss_uncond": -17.444007873535156, "incorrect_loss_uncond": -7.595170338948567}, "model_output": [{"sum_logits": -14.634849548339844, "num_tokens": 5, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -21.21835708618164, "logits_per_token": -2.9269699096679687, "logits_per_char": -0.6097853978474935, "num_chars": 24}, {"sum_logits": -11.789230346679688, "num_tokens": 5, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -29.233238220214844, "logits_per_token": -2.3578460693359373, "logits_per_char": -0.5125752324643342, "num_chars": 23}, {"sum_logits": -13.49662971496582, "num_tokens": 5, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -22.783790588378906, "logits_per_token": -2.699325942993164, "logits_per_char": -0.48202248982020784, "num_chars": 28}, {"sum_logits": -13.657419204711914, "num_tokens": 4, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -20.572261810302734, "logits_per_token": -3.4143548011779785, "logits_per_char": -0.5938008349874745, "num_chars": 23}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 84, "native_id": "7-157", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 33.91008377075195, "incorrect_loss_raw": 23.82569758097331, "correct_loss_per_char": 0.7064600785573324, "incorrect_loss_per_char": 0.8142862916753971, "correct_loss_per_token": 3.3910083770751953, "incorrect_loss_per_token": 4.300212387811571, "correct_loss_uncond": -14.777690887451172, "incorrect_loss_uncond": -6.3099015553792315}, "model_output": [{"sum_logits": -24.408674240112305, "num_tokens": 7, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -29.265363693237305, "logits_per_token": -3.486953462873186, "logits_per_char": -0.9040249718560113, "num_chars": 27}, {"sum_logits": -20.148441314697266, "num_tokens": 5, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -28.735157012939453, "logits_per_token": -4.0296882629394535, "logits_per_char": -0.610558827718099, "num_chars": 33}, {"sum_logits": -26.91997718811035, "num_tokens": 5, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -32.40627670288086, "logits_per_token": -5.383995437622071, "logits_per_char": -0.9282750754520811, "num_chars": 29}, {"sum_logits": -33.91008377075195, "num_tokens": 10, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -48.687774658203125, "logits_per_token": -3.3910083770751953, "logits_per_char": -0.7064600785573324, "num_chars": 48}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 85, "native_id": "9-1164", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.2068095207214355, "incorrect_loss_raw": 6.866523901621501, "correct_loss_per_char": 0.7011349201202393, "incorrect_loss_per_char": 1.2198560237884521, "correct_loss_per_token": 4.2068095207214355, "incorrect_loss_per_token": 6.866523901621501, "correct_loss_uncond": -5.682182788848877, "incorrect_loss_uncond": -6.830181916554769}, "model_output": [{"sum_logits": -3.0397610664367676, "num_tokens": 1, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -12.455323219299316, "logits_per_token": -3.0397610664367676, "logits_per_char": -0.5066268444061279, "num_chars": 6}, {"sum_logits": -4.2068095207214355, "num_tokens": 1, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -9.888992309570312, "logits_per_token": -4.2068095207214355, "logits_per_char": -0.7011349201202393, "num_chars": 6}, {"sum_logits": -6.789183616638184, "num_tokens": 1, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -14.31518840789795, "logits_per_token": -6.789183616638184, "logits_per_char": -1.3578367233276367, "num_chars": 5}, {"sum_logits": -10.77062702178955, "num_tokens": 1, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -14.319605827331543, "logits_per_token": -10.77062702178955, "logits_per_char": -1.7951045036315918, "num_chars": 6}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 86, "native_id": "8-63", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 25.187177658081055, "incorrect_loss_raw": 16.531333287556965, "correct_loss_per_char": 1.0074871063232422, "incorrect_loss_per_char": 0.6203562995016356, "correct_loss_per_token": 4.197862943013509, "incorrect_loss_per_token": 2.7552222145928282, "correct_loss_uncond": -13.289068222045898, "incorrect_loss_uncond": -13.708182652791342}, "model_output": [{"sum_logits": -16.886919021606445, "num_tokens": 6, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -27.123836517333984, "logits_per_token": -2.814486503601074, "logits_per_char": -0.6031042507716587, "num_chars": 28}, {"sum_logits": -15.907918930053711, "num_tokens": 6, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -34.07218551635742, "logits_per_token": -2.6513198216756186, "logits_per_char": -0.6118430357712966, "num_chars": 26}, {"sum_logits": -16.799161911010742, "num_tokens": 6, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -29.522525787353516, "logits_per_token": -2.7998603185017905, "logits_per_char": -0.6461216119619516, "num_chars": 26}, {"sum_logits": -25.187177658081055, "num_tokens": 6, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -38.47624588012695, "logits_per_token": -4.197862943013509, "logits_per_char": -1.0074871063232422, "num_chars": 25}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 87, "native_id": "8-308", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 13.99526309967041, "incorrect_loss_raw": 28.601807912190754, "correct_loss_per_char": 0.6664410999843052, "incorrect_loss_per_char": 0.7109300681461379, "correct_loss_per_token": 2.3325438499450684, "incorrect_loss_per_token": 4.552126884460449, "correct_loss_uncond": -19.972220420837402, "incorrect_loss_uncond": -12.39315414428711}, "model_output": [{"sum_logits": -30.116647720336914, "num_tokens": 6, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -43.20283508300781, "logits_per_token": -5.019441286722819, "logits_per_char": -0.8365735477871366, "num_chars": 36}, {"sum_logits": -27.069978713989258, "num_tokens": 7, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -36.95491027832031, "logits_per_token": -3.8671398162841797, "logits_per_char": -0.6602433832680307, "num_chars": 41}, {"sum_logits": -28.618797302246094, "num_tokens": 6, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -42.82714080810547, "logits_per_token": -4.769799550374349, "logits_per_char": -0.6359732733832465, "num_chars": 45}, {"sum_logits": -13.99526309967041, "num_tokens": 6, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -33.96748352050781, "logits_per_token": -2.3325438499450684, "logits_per_char": -0.6664410999843052, "num_chars": 21}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 88, "native_id": "326", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.827556133270264, "incorrect_loss_raw": 12.069790363311768, "correct_loss_per_char": 0.8697284592522515, "incorrect_loss_per_char": 1.4247692516871862, "correct_loss_per_token": 3.913778066635132, "incorrect_loss_per_token": 6.034895181655884, "correct_loss_uncond": -5.875047206878662, "incorrect_loss_uncond": -0.8051981925964355}, "model_output": [{"sum_logits": -15.029779434204102, "num_tokens": 2, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -13.65031623840332, "logits_per_token": -7.514889717102051, "logits_per_char": -1.5029779434204102, "num_chars": 10}, {"sum_logits": -14.242263793945312, "num_tokens": 2, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -13.52404499053955, "logits_per_token": -7.121131896972656, "logits_per_char": -1.780282974243164, "num_chars": 8}, {"sum_logits": -7.827556133270264, "num_tokens": 2, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -13.702603340148926, "logits_per_token": -3.913778066635132, "logits_per_char": -0.8697284592522515, "num_chars": 9}, {"sum_logits": -6.937327861785889, "num_tokens": 2, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -11.450604438781738, "logits_per_token": -3.4686639308929443, "logits_per_char": -0.991046837397984, "num_chars": 7}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 89, "native_id": "1184", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 14.532155990600586, "incorrect_loss_raw": 15.798929214477539, "correct_loss_per_char": 0.5812862396240235, "incorrect_loss_per_char": 0.7471979390343263, "correct_loss_per_token": 4.844051996866862, "incorrect_loss_per_token": 4.5715684254964195, "correct_loss_uncond": -11.427637100219727, "incorrect_loss_uncond": -11.140516916910807}, "model_output": [{"sum_logits": -14.532155990600586, "num_tokens": 3, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -25.959793090820312, "logits_per_token": -4.844051996866862, "logits_per_char": -0.5812862396240235, "num_chars": 25}, {"sum_logits": -15.63167953491211, "num_tokens": 5, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -31.328350067138672, "logits_per_token": -3.1263359069824217, "logits_per_char": -0.5210559844970704, "num_chars": 30}, {"sum_logits": -15.113115310668945, "num_tokens": 3, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -19.224063873291016, "logits_per_token": -5.037705103556315, "logits_per_char": -0.795427121614155, "num_chars": 19}, {"sum_logits": -16.651992797851562, "num_tokens": 3, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -30.26592445373535, "logits_per_token": -5.5506642659505205, "logits_per_char": -0.9251107109917535, "num_chars": 18}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 90, "native_id": "359", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.916549682617188, "incorrect_loss_raw": 4.207205851872762, "correct_loss_per_char": 1.9833099365234375, "incorrect_loss_per_char": 0.8652784691916572, "correct_loss_per_token": 4.958274841308594, "incorrect_loss_per_token": 4.207205851872762, "correct_loss_uncond": -7.900505065917969, "incorrect_loss_uncond": -7.345258951187134}, "model_output": [{"sum_logits": -9.916549682617188, "num_tokens": 2, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -17.817054748535156, "logits_per_token": -4.958274841308594, "logits_per_char": -1.9833099365234375, "num_chars": 5}, {"sum_logits": -3.601369857788086, "num_tokens": 1, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -10.948315620422363, "logits_per_token": -3.601369857788086, "logits_per_char": -0.9003424644470215, "num_chars": 4}, {"sum_logits": -3.2566978931427, "num_tokens": 1, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -10.185125350952148, "logits_per_token": -3.2566978931427, "logits_per_char": -0.5427829821904501, "num_chars": 6}, {"sum_logits": -5.7635498046875, "num_tokens": 1, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -13.523953437805176, "logits_per_token": -5.7635498046875, "logits_per_char": -1.1527099609375, "num_chars": 5}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 91, "native_id": "9-350", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 34.76396942138672, "incorrect_loss_raw": 31.046122868855793, "correct_loss_per_char": 1.241570336478097, "incorrect_loss_per_char": 1.0469944965218794, "correct_loss_per_token": 5.793994903564453, "incorrect_loss_per_token": 4.887853365095835, "correct_loss_uncond": -1.3524894714355469, "incorrect_loss_uncond": -8.573232650756836}, "model_output": [{"sum_logits": -36.099056243896484, "num_tokens": 7, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -44.6673583984375, "logits_per_token": -5.157008034842355, "logits_per_char": -1.3884252401498647, "num_chars": 26}, {"sum_logits": -25.44351577758789, "num_tokens": 6, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -35.54996109008789, "logits_per_token": -4.240585962931315, "logits_per_char": -0.7951098680496216, "num_chars": 32}, {"sum_logits": -31.595796585083008, "num_tokens": 6, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -38.6407470703125, "logits_per_token": -5.265966097513835, "logits_per_char": -0.9574483813661517, "num_chars": 33}, {"sum_logits": -34.76396942138672, "num_tokens": 6, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -36.116458892822266, "logits_per_token": -5.793994903564453, "logits_per_char": -1.241570336478097, "num_chars": 28}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 92, "native_id": "7-140", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 14.593976974487305, "incorrect_loss_raw": 15.209341049194336, "correct_loss_per_char": 0.810776498582628, "incorrect_loss_per_char": 0.9992800034681485, "correct_loss_per_token": 4.8646589914957685, "incorrect_loss_per_token": 5.724885198805066, "correct_loss_uncond": -5.740140914916992, "incorrect_loss_uncond": -2.8351917266845703}, "model_output": [{"sum_logits": -19.740266799926758, "num_tokens": 3, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -20.466951370239258, "logits_per_token": -6.580088933308919, "logits_per_char": -1.09668148888482, "num_chars": 18}, {"sum_logits": -11.791887283325195, "num_tokens": 2, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -15.647441864013672, "logits_per_token": -5.895943641662598, "logits_per_char": -1.0719897530295632, "num_chars": 11}, {"sum_logits": -14.593976974487305, "num_tokens": 3, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -20.334117889404297, "logits_per_token": -4.8646589914957685, "logits_per_char": -0.810776498582628, "num_chars": 18}, {"sum_logits": -14.095869064331055, "num_tokens": 3, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -18.01920509338379, "logits_per_token": -4.698623021443685, "logits_per_char": -0.829168768490062, "num_chars": 17}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 93, "native_id": "591", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 24.948192596435547, "incorrect_loss_raw": 18.170055389404297, "correct_loss_per_char": 0.9979277038574219, "incorrect_loss_per_char": 1.0845312129809335, "correct_loss_per_token": 3.5640275137765065, "incorrect_loss_per_token": 5.28570376502143, "correct_loss_uncond": -7.6083831787109375, "incorrect_loss_uncond": -7.001841862996419}, "model_output": [{"sum_logits": -24.948192596435547, "num_tokens": 7, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -32.556575775146484, "logits_per_token": -3.5640275137765065, "logits_per_char": -0.9979277038574219, "num_chars": 25}, {"sum_logits": -10.933479309082031, "num_tokens": 3, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -22.542320251464844, "logits_per_token": -3.6444931030273438, "logits_per_char": -0.7809628077915737, "num_chars": 14}, {"sum_logits": -27.755329132080078, "num_tokens": 4, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -30.025888442993164, "logits_per_token": -6.9388322830200195, "logits_per_char": -1.5419627295600042, "num_chars": 18}, {"sum_logits": -15.821357727050781, "num_tokens": 3, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -22.94748306274414, "logits_per_token": -5.273785909016927, "logits_per_char": -0.9306681015912224, "num_chars": 17}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 94, "native_id": "7-391", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.351526260375977, "incorrect_loss_raw": 8.496049245198568, "correct_loss_per_char": 1.293940782546997, "incorrect_loss_per_char": 1.1282303219749814, "correct_loss_per_token": 5.175763130187988, "incorrect_loss_per_token": 6.350082476933797, "correct_loss_uncond": -7.490331649780273, "incorrect_loss_uncond": -4.410452206929524}, "model_output": [{"sum_logits": -6.956878662109375, "num_tokens": 1, "num_tokens_all": 105, "is_greedy": false, "sum_logits_uncond": -11.714385986328125, "logits_per_token": -6.956878662109375, "logits_per_char": -1.391375732421875, "num_chars": 5}, {"sum_logits": -9.947402000427246, "num_tokens": 1, "num_tokens_all": 105, "is_greedy": false, "sum_logits_uncond": -14.044624328613281, "logits_per_token": -9.947402000427246, "logits_per_char": -1.4210574286324638, "num_chars": 7}, {"sum_logits": -10.351526260375977, "num_tokens": 2, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -17.84185791015625, "logits_per_token": -5.175763130187988, "logits_per_char": -1.293940782546997, "num_chars": 8}, {"sum_logits": -8.583867073059082, "num_tokens": 4, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -12.960494041442871, "logits_per_token": -2.1459667682647705, "logits_per_char": -0.5722578048706055, "num_chars": 15}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 95, "native_id": "1672", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 40.45863342285156, "incorrect_loss_raw": 29.770506540934246, "correct_loss_per_char": 0.9867959371427211, "incorrect_loss_per_char": 0.7772659679790875, "correct_loss_per_token": 4.045863342285156, "incorrect_loss_per_token": 4.185075336032443, "correct_loss_uncond": -7.590339660644531, "incorrect_loss_uncond": -12.226875305175781}, "model_output": [{"sum_logits": -28.423500061035156, "num_tokens": 6, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -39.63768768310547, "logits_per_token": -4.737250010172526, "logits_per_char": -0.7682027043523015, "num_chars": 37}, {"sum_logits": -41.94049072265625, "num_tokens": 9, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -47.961029052734375, "logits_per_token": -4.660054524739583, "logits_per_char": -0.9320109049479167, "num_chars": 45}, {"sum_logits": -40.45863342285156, "num_tokens": 10, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -48.048973083496094, "logits_per_token": -4.045863342285156, "logits_per_char": -0.9867959371427211, "num_chars": 41}, {"sum_logits": -18.947528839111328, "num_tokens": 6, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -38.393428802490234, "logits_per_token": -3.157921473185221, "logits_per_char": -0.6315842946370442, "num_chars": 30}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 96, "native_id": "9-464", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.236952781677246, "incorrect_loss_raw": 11.318425814310709, "correct_loss_per_char": 1.0263280868530273, "incorrect_loss_per_char": 1.4644381477719264, "correct_loss_per_token": 4.618476390838623, "incorrect_loss_per_token": 5.6592129071553545, "correct_loss_uncond": -5.490983963012695, "incorrect_loss_uncond": -0.36307748158772785}, "model_output": [{"sum_logits": -10.500224113464355, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -10.535147666931152, "logits_per_token": -5.250112056732178, "logits_per_char": -1.5000320162091936, "num_chars": 7}, {"sum_logits": -10.673587799072266, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -12.3983154296875, "logits_per_token": -5.336793899536133, "logits_per_char": -1.0673587799072266, "num_chars": 10}, {"sum_logits": -9.236952781677246, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -14.727936744689941, "logits_per_token": -4.618476390838623, "logits_per_char": -1.0263280868530273, "num_chars": 9}, {"sum_logits": -12.781465530395508, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -12.11104679107666, "logits_per_token": -6.390732765197754, "logits_per_char": -1.8259236471993583, "num_chars": 7}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 97, "native_id": "9-983", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 15.074592590332031, "incorrect_loss_raw": 17.398523330688477, "correct_loss_per_char": 1.1595840454101562, "incorrect_loss_per_char": 1.039816847301665, "correct_loss_per_token": 7.537296295166016, "incorrect_loss_per_token": 7.99517748090956, "correct_loss_uncond": -11.850317001342773, "incorrect_loss_uncond": -5.070838928222656}, "model_output": [{"sum_logits": -22.08172035217285, "num_tokens": 2, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -22.506715774536133, "logits_per_token": -11.040860176086426, "logits_per_char": -1.051510492960612, "num_chars": 21}, {"sum_logits": -15.074592590332031, "num_tokens": 2, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -26.924909591674805, "logits_per_token": -7.537296295166016, "logits_per_char": -1.1595840454101562, "num_chars": 13}, {"sum_logits": -17.44033432006836, "num_tokens": 2, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -24.091915130615234, "logits_per_token": -8.72016716003418, "logits_per_char": -1.162688954671224, "num_chars": 15}, {"sum_logits": -12.673515319824219, "num_tokens": 3, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -20.80945587158203, "logits_per_token": -4.224505106608073, "logits_per_char": -0.9052510942731585, "num_chars": 14}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 98, "native_id": "9-179", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.371443271636963, "incorrect_loss_raw": 10.325520515441895, "correct_loss_per_char": 0.6701312065124512, "incorrect_loss_per_char": 0.9633813164450905, "correct_loss_per_token": 3.6857216358184814, "incorrect_loss_per_token": 5.162760257720947, "correct_loss_uncond": -13.303475856781006, "incorrect_loss_uncond": -9.068812370300293}, "model_output": [{"sum_logits": -7.371443271636963, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -20.67491912841797, "logits_per_token": -3.6857216358184814, "logits_per_char": -0.6701312065124512, "num_chars": 11}, {"sum_logits": -8.150218963623047, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -17.258132934570312, "logits_per_token": -4.075109481811523, "logits_per_char": -0.8150218963623047, "num_chars": 10}, {"sum_logits": -11.869871139526367, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -20.22022247314453, "logits_per_token": -5.934935569763184, "logits_per_char": -1.079079194502397, "num_chars": 11}, {"sum_logits": -10.95647144317627, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -20.70464324951172, "logits_per_token": -5.478235721588135, "logits_per_char": -0.99604285847057, "num_chars": 11}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 99, "native_id": "7-942", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.381950378417969, "incorrect_loss_raw": 4.896688222885132, "correct_loss_per_char": 0.8202167087131076, "incorrect_loss_per_char": 0.9240469720628526, "correct_loss_per_token": 3.6909751892089844, "incorrect_loss_per_token": 4.896688222885132, "correct_loss_uncond": -9.684270858764648, "incorrect_loss_uncond": -7.455078045527141}, "model_output": [{"sum_logits": -6.197310447692871, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -12.609890937805176, "logits_per_token": -6.197310447692871, "logits_per_char": -1.2394620895385742, "num_chars": 5}, {"sum_logits": -4.976160526275635, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -11.893747329711914, "logits_per_token": -4.976160526275635, "logits_per_char": -0.8293600877126058, "num_chars": 6}, {"sum_logits": -7.381950378417969, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -17.066221237182617, "logits_per_token": -3.6909751892089844, "logits_per_char": -0.8202167087131076, "num_chars": 9}, {"sum_logits": -3.5165936946868896, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -12.551660537719727, "logits_per_token": -3.5165936946868896, "logits_per_char": -0.703318738937378, "num_chars": 5}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 100, "native_id": "7-100", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 24.349807739257812, "incorrect_loss_raw": 10.565101941426596, "correct_loss_per_char": 1.0145753224690754, "incorrect_loss_per_char": 0.7142265549685866, "correct_loss_per_token": 8.116602579752604, "incorrect_loss_per_token": 5.6063294940524635, "correct_loss_uncond": -1.2955436706542969, "incorrect_loss_uncond": -6.551429430643718}, "model_output": [{"sum_logits": -6.626304626464844, "num_tokens": 1, "num_tokens_all": 105, "is_greedy": false, "sum_logits_uncond": -14.02598762512207, "logits_per_token": -6.626304626464844, "logits_per_char": -0.7362560696072049, "num_chars": 9}, {"sum_logits": -24.349807739257812, "num_tokens": 3, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -25.64535140991211, "logits_per_token": -8.116602579752604, "logits_per_char": -1.0145753224690754, "num_chars": 24}, {"sum_logits": -14.05090045928955, "num_tokens": 3, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -19.425966262817383, "logits_per_token": -4.683633486429851, "logits_per_char": -0.8265235564287972, "num_chars": 17}, {"sum_logits": -11.01810073852539, "num_tokens": 2, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -17.897640228271484, "logits_per_token": -5.509050369262695, "logits_per_char": -0.5799000388697574, "num_chars": 19}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 101, "native_id": "9-30", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.492476463317871, "incorrect_loss_raw": 7.4933522542317705, "correct_loss_per_char": 0.9154127438863119, "incorrect_loss_per_char": 1.199505541059706, "correct_loss_per_token": 5.492476463317871, "incorrect_loss_per_token": 6.018723169962565, "correct_loss_uncond": -7.225940704345703, "incorrect_loss_uncond": -5.621739387512207}, "model_output": [{"sum_logits": -7.040496826171875, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -11.29869270324707, "logits_per_token": -7.040496826171875, "logits_per_char": -1.7601242065429688, "num_chars": 4}, {"sum_logits": -5.492476463317871, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -12.718417167663574, "logits_per_token": -5.492476463317871, "logits_per_char": -0.9154127438863119, "num_chars": 6}, {"sum_logits": -6.591785430908203, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -12.131689071655273, "logits_per_token": -6.591785430908203, "logits_per_char": -0.7324206034342448, "num_chars": 9}, {"sum_logits": -8.847774505615234, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -15.91489315032959, "logits_per_token": -4.423887252807617, "logits_per_char": -1.1059718132019043, "num_chars": 8}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 102, "native_id": "1709", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 23.276756286621094, "incorrect_loss_raw": 19.263128916422527, "correct_loss_per_char": 0.6125462180689761, "incorrect_loss_per_char": 0.5824129256198854, "correct_loss_per_token": 3.8794593811035156, "incorrect_loss_per_token": 3.365252576050935, "correct_loss_uncond": -17.121559143066406, "incorrect_loss_uncond": -14.926840464274088}, "model_output": [{"sum_logits": -23.276756286621094, "num_tokens": 6, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -40.3983154296875, "logits_per_token": -3.8794593811035156, "logits_per_char": -0.6125462180689761, "num_chars": 38}, {"sum_logits": -24.75912857055664, "num_tokens": 9, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -38.71324157714844, "logits_per_token": -2.7510142856174045, "logits_per_char": -0.46715336925578566, "num_chars": 53}, {"sum_logits": -14.773836135864258, "num_tokens": 4, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -31.309654235839844, "logits_per_token": -3.6934590339660645, "logits_per_char": -0.6715380061756481, "num_chars": 22}, {"sum_logits": -18.25642204284668, "num_tokens": 5, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -32.54701232910156, "logits_per_token": -3.651284408569336, "logits_per_char": -0.6085474014282226, "num_chars": 30}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 103, "native_id": "8-491", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 22.13431739807129, "incorrect_loss_raw": 17.42419656117757, "correct_loss_per_char": 0.6510093352373909, "incorrect_loss_per_char": 0.8489679505253397, "correct_loss_per_token": 5.533579349517822, "incorrect_loss_per_token": 4.021658309300741, "correct_loss_uncond": -13.266237258911133, "incorrect_loss_uncond": -10.28326384226481}, "model_output": [{"sum_logits": -17.86623764038086, "num_tokens": 4, "num_tokens_all": 130, "is_greedy": false, "sum_logits_uncond": -28.74478530883789, "logits_per_token": -4.466559410095215, "logits_per_char": -0.893311882019043, "num_chars": 20}, {"sum_logits": -14.342902183532715, "num_tokens": 4, "num_tokens_all": 130, "is_greedy": false, "sum_logits_uncond": -29.392303466796875, "logits_per_token": -3.5857255458831787, "logits_per_char": -0.5976209243138632, "num_chars": 24}, {"sum_logits": -20.06344985961914, "num_tokens": 5, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -24.985292434692383, "logits_per_token": -4.0126899719238285, "logits_per_char": -1.0559710452431126, "num_chars": 19}, {"sum_logits": -22.13431739807129, "num_tokens": 4, "num_tokens_all": 130, "is_greedy": false, "sum_logits_uncond": -35.40055465698242, "logits_per_token": -5.533579349517822, "logits_per_char": -0.6510093352373909, "num_chars": 34}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 104, "native_id": "44", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.528927803039551, "incorrect_loss_raw": 6.525582631429036, "correct_loss_per_char": 0.932703971862793, "incorrect_loss_per_char": 0.650270981511111, "correct_loss_per_token": 6.528927803039551, "incorrect_loss_per_token": 5.3496778806050616, "correct_loss_uncond": -8.142241477966309, "incorrect_loss_uncond": -9.665872891743978}, "model_output": [{"sum_logits": -6.00762939453125, "num_tokens": 1, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -14.428529739379883, "logits_per_token": -6.00762939453125, "logits_per_char": -0.6675143771701388, "num_chars": 9}, {"sum_logits": -6.528927803039551, "num_tokens": 1, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -14.67116928100586, "logits_per_token": -6.528927803039551, "logits_per_char": -0.932703971862793, "num_chars": 7}, {"sum_logits": -7.055428504943848, "num_tokens": 2, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -19.573148727416992, "logits_per_token": -3.527714252471924, "logits_per_char": -0.3527714252471924, "num_chars": 20}, {"sum_logits": -6.513689994812012, "num_tokens": 1, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -14.572688102722168, "logits_per_token": -6.513689994812012, "logits_per_char": -0.9305271421160016, "num_chars": 7}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 105, "native_id": "1023", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.0851240158081055, "incorrect_loss_raw": 7.5475161870320635, "correct_loss_per_char": 0.5070936679840088, "incorrect_loss_per_char": 1.351051468319363, "correct_loss_per_token": 6.0851240158081055, "incorrect_loss_per_token": 7.5475161870320635, "correct_loss_uncond": -5.4101152420043945, "incorrect_loss_uncond": -3.8432420094807944}, "model_output": [{"sum_logits": -7.981376647949219, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -12.559885025024414, "logits_per_token": -7.981376647949219, "logits_per_char": -1.3302294413248699, "num_chars": 6}, {"sum_logits": -8.381889343261719, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -11.723397254943848, "logits_per_token": -8.381889343261719, "logits_per_char": -1.6763778686523438, "num_chars": 5}, {"sum_logits": -6.279282569885254, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -9.888992309570312, "logits_per_token": -6.279282569885254, "logits_per_char": -1.0465470949808757, "num_chars": 6}, {"sum_logits": -6.0851240158081055, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -11.4952392578125, "logits_per_token": -6.0851240158081055, "logits_per_char": -0.5070936679840088, "num_chars": 12}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 106, "native_id": "1911", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 31.5498104095459, "incorrect_loss_raw": 21.86576207478841, "correct_loss_per_char": 0.5952794416895453, "incorrect_loss_per_char": 0.5525044193638554, "correct_loss_per_token": 3.9437263011932373, "incorrect_loss_per_token": 3.3172015780494326, "correct_loss_uncond": -18.29018211364746, "incorrect_loss_uncond": -13.050924301147461}, "model_output": [{"sum_logits": -17.09769058227539, "num_tokens": 5, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -27.016271591186523, "logits_per_token": -3.419538116455078, "logits_per_char": -0.46209974546690247, "num_chars": 37}, {"sum_logits": -31.5498104095459, "num_tokens": 8, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -49.83999252319336, "logits_per_token": -3.9437263011932373, "logits_per_char": -0.5952794416895453, "num_chars": 53}, {"sum_logits": -26.298561096191406, "num_tokens": 7, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -35.57354736328125, "logits_per_token": -3.7569372994559154, "logits_per_char": -0.6261562165759859, "num_chars": 42}, {"sum_logits": -22.201034545898438, "num_tokens": 8, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -42.160240173339844, "logits_per_token": -2.7751293182373047, "logits_per_char": -0.5692572960486779, "num_chars": 39}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 107, "native_id": "429", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.375011920928955, "incorrect_loss_raw": 7.45717175801595, "correct_loss_per_char": 0.6750023841857911, "incorrect_loss_per_char": 1.7210774739583332, "correct_loss_per_token": 3.375011920928955, "incorrect_loss_per_token": 7.45717175801595, "correct_loss_uncond": -6.983108043670654, "incorrect_loss_uncond": -3.459946632385254}, "model_output": [{"sum_logits": -3.375011920928955, "num_tokens": 1, "num_tokens_all": 125, "is_greedy": false, "sum_logits_uncond": -10.35811996459961, "logits_per_token": -3.375011920928955, "logits_per_char": -0.6750023841857911, "num_chars": 5}, {"sum_logits": -6.7067484855651855, "num_tokens": 1, "num_tokens_all": 125, "is_greedy": false, "sum_logits_uncond": -10.478487014770508, "logits_per_token": -6.7067484855651855, "logits_per_char": -1.6766871213912964, "num_chars": 4}, {"sum_logits": -8.592927932739258, "num_tokens": 1, "num_tokens_all": 125, "is_greedy": false, "sum_logits_uncond": -12.29459285736084, "logits_per_token": -8.592927932739258, "logits_per_char": -1.7185855865478517, "num_chars": 5}, {"sum_logits": -7.071838855743408, "num_tokens": 1, "num_tokens_all": 125, "is_greedy": false, "sum_logits_uncond": -9.978275299072266, "logits_per_token": -7.071838855743408, "logits_per_char": -1.767959713935852, "num_chars": 4}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 108, "native_id": "8-49", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 8.289321899414062, "incorrect_loss_raw": 14.98658561706543, "correct_loss_per_char": 0.41446609497070314, "incorrect_loss_per_char": 0.7109003170749597, "correct_loss_per_token": 2.0723304748535156, "incorrect_loss_per_token": 3.4996439297993978, "correct_loss_uncond": -15.626565933227539, "incorrect_loss_uncond": -11.645780563354492}, "model_output": [{"sum_logits": -17.632062911987305, "num_tokens": 4, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -25.69327735900879, "logits_per_token": -4.408015727996826, "logits_per_char": -0.8816031455993653, "num_chars": 20}, {"sum_logits": -12.507545471191406, "num_tokens": 4, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -28.513010025024414, "logits_per_token": -3.1268863677978516, "logits_per_char": -0.6582918669048109, "num_chars": 19}, {"sum_logits": -14.820148468017578, "num_tokens": 5, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -25.690811157226562, "logits_per_token": -2.9640296936035155, "logits_per_char": -0.5928059387207031, "num_chars": 25}, {"sum_logits": -8.289321899414062, "num_tokens": 4, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -23.9158878326416, "logits_per_token": -2.0723304748535156, "logits_per_char": -0.41446609497070314, "num_chars": 20}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 109, "native_id": "520", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 23.134233474731445, "incorrect_loss_raw": 18.3350617090861, "correct_loss_per_char": 1.0515560670332476, "incorrect_loss_per_char": 0.6935645159460279, "correct_loss_per_token": 4.626846694946289, "incorrect_loss_per_token": 3.9565972487131753, "correct_loss_uncond": -9.520292282104492, "incorrect_loss_uncond": -13.36247984568278}, "model_output": [{"sum_logits": -14.341841697692871, "num_tokens": 4, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -28.846755981445312, "logits_per_token": -3.5854604244232178, "logits_per_char": -0.5516092960651104, "num_chars": 26}, {"sum_logits": -17.56070899963379, "num_tokens": 7, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -31.653438568115234, "logits_per_token": -2.5086727142333984, "logits_per_char": -0.5664744838591544, "num_chars": 31}, {"sum_logits": -23.134233474731445, "num_tokens": 5, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -32.65452575683594, "logits_per_token": -4.626846694946289, "logits_per_char": -1.0515560670332476, "num_chars": 22}, {"sum_logits": -23.10263442993164, "num_tokens": 4, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -34.592430114746094, "logits_per_token": -5.77565860748291, "logits_per_char": -0.9626097679138184, "num_chars": 24}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 110, "native_id": "7-1128", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 21.908090591430664, "incorrect_loss_raw": 26.794514973958332, "correct_loss_per_char": 0.7302696863810222, "incorrect_loss_per_char": 0.9336014263213627, "correct_loss_per_token": 3.651348431905111, "incorrect_loss_per_token": 4.217046207851834, "correct_loss_uncond": -10.079017639160156, "incorrect_loss_uncond": -7.826172510782878}, "model_output": [{"sum_logits": -29.415376663208008, "num_tokens": 6, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -36.24993133544922, "logits_per_token": -4.902562777201335, "logits_per_char": -0.9805125554402669, "num_chars": 30}, {"sum_logits": -19.631175994873047, "num_tokens": 6, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -27.86863136291504, "logits_per_token": -3.2718626658121743, "logits_per_char": -0.7011134283883231, "num_chars": 28}, {"sum_logits": -21.908090591430664, "num_tokens": 6, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -31.98710823059082, "logits_per_token": -3.651348431905111, "logits_per_char": -0.7302696863810222, "num_chars": 30}, {"sum_logits": -31.336992263793945, "num_tokens": 7, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -39.743499755859375, "logits_per_token": -4.476713180541992, "logits_per_char": -1.119178295135498, "num_chars": 28}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 111, "native_id": "7-394", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 16.76919937133789, "incorrect_loss_raw": 19.06590461730957, "correct_loss_per_char": 0.5240374803543091, "incorrect_loss_per_char": 0.5952485967497539, "correct_loss_per_token": 2.7948665618896484, "incorrect_loss_per_token": 2.869573822475615, "correct_loss_uncond": -11.301267623901367, "incorrect_loss_uncond": -8.46852175394694}, "model_output": [{"sum_logits": -15.126039505004883, "num_tokens": 5, "num_tokens_all": 127, "is_greedy": false, "sum_logits_uncond": -22.29599380493164, "logits_per_token": -3.0252079010009765, "logits_per_char": -0.6050415802001953, "num_chars": 25}, {"sum_logits": -16.76919937133789, "num_tokens": 6, "num_tokens_all": 128, "is_greedy": false, "sum_logits_uncond": -28.070466995239258, "logits_per_token": -2.7948665618896484, "logits_per_char": -0.5240374803543091, "num_chars": 32}, {"sum_logits": -23.896635055541992, "num_tokens": 8, "num_tokens_all": 130, "is_greedy": false, "sum_logits_uncond": -34.46636962890625, "logits_per_token": -2.987079381942749, "logits_per_char": -0.6127342321933844, "num_chars": 39}, {"sum_logits": -18.175039291381836, "num_tokens": 7, "num_tokens_all": 129, "is_greedy": false, "sum_logits_uncond": -25.84091567993164, "logits_per_token": -2.5964341844831194, "logits_per_char": -0.5679699778556824, "num_chars": 32}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 112, "native_id": "9-1166", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.980841636657715, "incorrect_loss_raw": 8.185115496317545, "correct_loss_per_char": 1.1226052045822144, "incorrect_loss_per_char": 1.4868491566370405, "correct_loss_per_token": 8.980841636657715, "incorrect_loss_per_token": 6.7005055745442705, "correct_loss_uncond": -6.4263505935668945, "incorrect_loss_uncond": -5.416880925496419}, "model_output": [{"sum_logits": -8.907659530639648, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -15.045149803161621, "logits_per_token": -4.453829765319824, "logits_per_char": -1.2725227900913783, "num_chars": 7}, {"sum_logits": -8.980841636657715, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -15.40719223022461, "logits_per_token": -8.980841636657715, "logits_per_char": -1.1226052045822144, "num_chars": 8}, {"sum_logits": -6.9609222412109375, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -12.19229507446289, "logits_per_token": -6.9609222412109375, "logits_per_char": -1.7402305603027344, "num_chars": 4}, {"sum_logits": -8.68676471710205, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -13.568544387817383, "logits_per_token": -8.68676471710205, "logits_per_char": -1.4477941195170085, "num_chars": 6}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 113, "native_id": "7-884", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 23.087371826171875, "incorrect_loss_raw": 20.388843218485516, "correct_loss_per_char": 0.6239830223289696, "incorrect_loss_per_char": 1.0096018278541108, "correct_loss_per_token": 2.5652635362413196, "incorrect_loss_per_token": 5.097210804621379, "correct_loss_uncond": -18.16940689086914, "incorrect_loss_uncond": -11.413918495178223}, "model_output": [{"sum_logits": -14.671675682067871, "num_tokens": 4, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -30.797454833984375, "logits_per_token": -3.6679189205169678, "logits_per_char": -0.8630397460039925, "num_chars": 17}, {"sum_logits": -22.303016662597656, "num_tokens": 4, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -36.65026092529297, "logits_per_token": -5.575754165649414, "logits_per_char": -1.0137734846635298, "num_chars": 22}, {"sum_logits": -23.087371826171875, "num_tokens": 9, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -41.256778717041016, "logits_per_token": -2.5652635362413196, "logits_per_char": -0.6239830223289696, "num_chars": 37}, {"sum_logits": -24.191837310791016, "num_tokens": 4, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -27.960569381713867, "logits_per_token": -6.047959327697754, "logits_per_char": -1.1519922528948103, "num_chars": 21}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 114, "native_id": "9-501", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 14.040056228637695, "incorrect_loss_raw": 12.377086321512857, "correct_loss_per_char": 1.0800043252798228, "incorrect_loss_per_char": 0.757068579657036, "correct_loss_per_token": 3.510014057159424, "incorrect_loss_per_token": 3.7685858673519554, "correct_loss_uncond": -3.6720123291015625, "incorrect_loss_uncond": -2.7889633178710938}, "model_output": [{"sum_logits": -14.040056228637695, "num_tokens": 4, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -17.712068557739258, "logits_per_token": -3.510014057159424, "logits_per_char": -1.0800043252798228, "num_chars": 13}, {"sum_logits": -12.855944633483887, "num_tokens": 4, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -14.576082229614258, "logits_per_token": -3.2139861583709717, "logits_per_char": -0.6766286649202046, "num_chars": 19}, {"sum_logits": -15.609882354736328, "num_tokens": 3, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -17.666446685791016, "logits_per_token": -5.203294118245442, "logits_per_char": -0.9756176471710205, "num_chars": 16}, {"sum_logits": -8.66543197631836, "num_tokens": 3, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -13.255620002746582, "logits_per_token": -2.888477325439453, "logits_per_char": -0.6189594268798828, "num_chars": 14}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 115, "native_id": "9-757", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.56660270690918, "incorrect_loss_raw": 8.103246927261353, "correct_loss_per_char": 0.956660270690918, "incorrect_loss_per_char": 2.0075866699218747, "correct_loss_per_token": 3.1888675689697266, "incorrect_loss_per_token": 8.103246927261353, "correct_loss_uncond": -10.240127563476562, "incorrect_loss_uncond": -3.8388583660125732}, "model_output": [{"sum_logits": -10.47071361541748, "num_tokens": 1, "num_tokens_all": 128, "is_greedy": false, "sum_logits_uncond": -12.066847801208496, "logits_per_token": -10.47071361541748, "logits_per_char": -2.61767840385437, "num_chars": 4}, {"sum_logits": -1.093503713607788, "num_tokens": 1, "num_tokens_all": 128, "is_greedy": false, "sum_logits_uncond": -11.327237129211426, "logits_per_token": -1.093503713607788, "logits_per_char": -0.21870074272155762, "num_chars": 5}, {"sum_logits": -9.56660270690918, "num_tokens": 3, "num_tokens_all": 130, "is_greedy": false, "sum_logits_uncond": -19.806730270385742, "logits_per_token": -3.1888675689697266, "logits_per_char": -0.956660270690918, "num_chars": 10}, {"sum_logits": -12.745523452758789, "num_tokens": 1, "num_tokens_all": 128, "is_greedy": false, "sum_logits_uncond": -12.432230949401855, "logits_per_token": -12.745523452758789, "logits_per_char": -3.1863808631896973, "num_chars": 4}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 116, "native_id": "7-725", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 30.742673873901367, "incorrect_loss_raw": 23.509103775024414, "correct_loss_per_char": 0.9041962904088637, "incorrect_loss_per_char": 0.7339361087948668, "correct_loss_per_token": 3.842834234237671, "incorrect_loss_per_token": 3.8600560082329642, "correct_loss_uncond": -15.307985305786133, "incorrect_loss_uncond": -15.078835169474283}, "model_output": [{"sum_logits": -31.307632446289062, "num_tokens": 7, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -41.82568359375, "logits_per_token": -4.4725189208984375, "logits_per_char": -0.9208127190085018, "num_chars": 34}, {"sum_logits": -22.088600158691406, "num_tokens": 6, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -37.707763671875, "logits_per_token": -3.681433359781901, "logits_per_char": -0.6902687549591064, "num_chars": 32}, {"sum_logits": -17.131078720092773, "num_tokens": 5, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -36.230369567871094, "logits_per_token": -3.4262157440185548, "logits_per_char": -0.5907268524169922, "num_chars": 29}, {"sum_logits": -30.742673873901367, "num_tokens": 8, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -46.0506591796875, "logits_per_token": -3.842834234237671, "logits_per_char": -0.9041962904088637, "num_chars": 34}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 117, "native_id": "1300", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 31.233469009399414, "incorrect_loss_raw": 35.32681147257487, "correct_loss_per_char": 1.1567951484962746, "incorrect_loss_per_char": 0.838925748685364, "correct_loss_per_token": 5.205578168233235, "incorrect_loss_per_token": 4.851330545213487, "correct_loss_uncond": -13.633390426635742, "incorrect_loss_uncond": -9.721781412760416}, "model_output": [{"sum_logits": -34.39894104003906, "num_tokens": 8, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -44.99102783203125, "logits_per_token": -4.299867630004883, "logits_per_char": -0.7999753730241642, "num_chars": 43}, {"sum_logits": -31.354496002197266, "num_tokens": 6, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -40.474998474121094, "logits_per_token": -5.225749333699544, "logits_per_char": -0.8958427429199218, "num_chars": 35}, {"sum_logits": -40.22699737548828, "num_tokens": 8, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -49.679752349853516, "logits_per_token": -5.028374671936035, "logits_per_char": -0.8209591301120057, "num_chars": 49}, {"sum_logits": -31.233469009399414, "num_tokens": 6, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -44.866859436035156, "logits_per_token": -5.205578168233235, "logits_per_char": -1.1567951484962746, "num_chars": 27}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 118, "native_id": "9-230", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 37.133453369140625, "incorrect_loss_raw": 25.37867323557536, "correct_loss_per_char": 0.4473910044474774, "incorrect_loss_per_char": 0.6598116375732843, "correct_loss_per_token": 2.1843207864200367, "incorrect_loss_per_token": 3.1093895789904473, "correct_loss_uncond": -13.298057556152344, "incorrect_loss_uncond": -11.903860410054525}, "model_output": [{"sum_logits": -37.133453369140625, "num_tokens": 17, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -50.43151092529297, "logits_per_token": -2.1843207864200367, "logits_per_char": -0.4473910044474774, "num_chars": 83}, {"sum_logits": -16.743221282958984, "num_tokens": 6, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -23.185009002685547, "logits_per_token": -2.790536880493164, "logits_per_char": -0.5773524580330684, "num_chars": 29}, {"sum_logits": -43.39503860473633, "num_tokens": 13, "num_tokens_all": 127, "is_greedy": false, "sum_logits_uncond": -58.884178161621094, "logits_per_token": -3.338079892672025, "logits_per_char": -0.735509128893836, "num_chars": 59}, {"sum_logits": -15.997759819030762, "num_tokens": 5, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -29.778413772583008, "logits_per_token": -3.1995519638061523, "logits_per_char": -0.6665733257929484, "num_chars": 24}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 119, "native_id": "9-988", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 28.192440032958984, "incorrect_loss_raw": 26.642297108968098, "correct_loss_per_char": 0.5638488006591796, "incorrect_loss_per_char": 0.6864435442991539, "correct_loss_per_token": 2.8192440032958985, "incorrect_loss_per_token": 4.030207556265371, "correct_loss_uncond": -18.591678619384766, "incorrect_loss_uncond": -13.584857940673828}, "model_output": [{"sum_logits": -19.86943817138672, "num_tokens": 5, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -34.262176513671875, "logits_per_token": -3.973887634277344, "logits_per_char": -0.5519288380940756, "num_chars": 36}, {"sum_logits": -28.192440032958984, "num_tokens": 10, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -46.78411865234375, "logits_per_token": -2.8192440032958985, "logits_per_char": -0.5638488006591796, "num_chars": 50}, {"sum_logits": -34.071128845214844, "num_tokens": 9, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -47.826690673828125, "logits_per_token": -3.785680982801649, "logits_per_char": -0.6953291601064254, "num_chars": 49}, {"sum_logits": -25.986324310302734, "num_tokens": 6, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -38.59259796142578, "logits_per_token": -4.331054051717122, "logits_per_char": -0.8120726346969604, "num_chars": 32}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 120, "native_id": "9-393", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.66513442993164, "incorrect_loss_raw": 10.985156059265137, "correct_loss_per_char": 0.9665134429931641, "incorrect_loss_per_char": 1.9892854872204009, "correct_loss_per_token": 9.66513442993164, "incorrect_loss_per_token": 10.985156059265137, "correct_loss_uncond": -4.4527435302734375, "incorrect_loss_uncond": -1.9366626739501953}, "model_output": [{"sum_logits": -9.66513442993164, "num_tokens": 1, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -14.117877960205078, "logits_per_token": -9.66513442993164, "logits_per_char": -0.9665134429931641, "num_chars": 10}, {"sum_logits": -11.156331062316895, "num_tokens": 1, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -13.276829719543457, "logits_per_token": -11.156331062316895, "logits_per_char": -2.231266212463379, "num_chars": 5}, {"sum_logits": -10.90665054321289, "num_tokens": 1, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -13.892477989196777, "logits_per_token": -10.90665054321289, "logits_per_char": -1.5580929347446986, "num_chars": 7}, {"sum_logits": -10.892486572265625, "num_tokens": 1, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -11.596148490905762, "logits_per_token": -10.892486572265625, "logits_per_char": -2.178497314453125, "num_chars": 5}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 121, "native_id": "7-823", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 18.037076950073242, "incorrect_loss_raw": 15.515055020650228, "correct_loss_per_char": 0.9493198394775391, "incorrect_loss_per_char": 0.7106587447011984, "correct_loss_per_token": 4.5092692375183105, "incorrect_loss_per_token": 3.1840052392747666, "correct_loss_uncond": -8.513988494873047, "incorrect_loss_uncond": -15.512115160624186}, "model_output": [{"sum_logits": -14.867659568786621, "num_tokens": 6, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -32.82360076904297, "logits_per_token": -2.477943261464437, "logits_per_char": -0.6194858153661092, "num_chars": 24}, {"sum_logits": -16.906078338623047, "num_tokens": 5, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -31.49846839904785, "logits_per_token": -3.3812156677246095, "logits_per_char": -0.7350468842879586, "num_chars": 23}, {"sum_logits": -14.771427154541016, "num_tokens": 4, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -28.759441375732422, "logits_per_token": -3.692856788635254, "logits_per_char": -0.7774435344495272, "num_chars": 19}, {"sum_logits": -18.037076950073242, "num_tokens": 4, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -26.55106544494629, "logits_per_token": -4.5092692375183105, "logits_per_char": -0.9493198394775391, "num_chars": 19}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 122, "native_id": "9-24", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 20.583192825317383, "incorrect_loss_raw": 20.35313542683919, "correct_loss_per_char": 0.8949214271877123, "incorrect_loss_per_char": 1.0900262643022147, "correct_loss_per_token": 5.145798206329346, "incorrect_loss_per_token": 6.784378475613064, "correct_loss_uncond": -5.97490119934082, "incorrect_loss_uncond": -9.347466786702475}, "model_output": [{"sum_logits": -20.583192825317383, "num_tokens": 4, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -26.558094024658203, "logits_per_token": -5.145798206329346, "logits_per_char": -0.8949214271877123, "num_chars": 23}, {"sum_logits": -19.29763412475586, "num_tokens": 3, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -27.71743392944336, "logits_per_token": -6.432544708251953, "logits_per_char": -1.072090784708659, "num_chars": 18}, {"sum_logits": -19.940832138061523, "num_tokens": 3, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -25.401065826416016, "logits_per_token": -6.646944046020508, "logits_per_char": -1.0495174809506065, "num_chars": 19}, {"sum_logits": -21.820940017700195, "num_tokens": 3, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -35.983306884765625, "logits_per_token": -7.2736466725667315, "logits_per_char": -1.1484705272473787, "num_chars": 19}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 123, "native_id": "570", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 34.26433563232422, "incorrect_loss_raw": 32.26990509033203, "correct_loss_per_char": 0.6464968987230985, "incorrect_loss_per_char": 1.0287279186447134, "correct_loss_per_token": 3.1149396029385654, "incorrect_loss_per_token": 4.625240674094549, "correct_loss_uncond": -8.213607788085938, "incorrect_loss_uncond": -1.8007367451985676}, "model_output": [{"sum_logits": -30.711139678955078, "num_tokens": 7, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -35.36387252807617, "logits_per_token": -4.387305668422154, "logits_per_char": -1.0237046559651692, "num_chars": 30}, {"sum_logits": -36.67230987548828, "num_tokens": 8, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -37.64006042480469, "logits_per_token": -4.584038734436035, "logits_per_char": -1.047780282156808, "num_chars": 35}, {"sum_logits": -29.426265716552734, "num_tokens": 6, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -29.207992553710938, "logits_per_token": -4.904377619425456, "logits_per_char": -1.0146988178121632, "num_chars": 29}, {"sum_logits": -34.26433563232422, "num_tokens": 11, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -42.477943420410156, "logits_per_token": -3.1149396029385654, "logits_per_char": -0.6464968987230985, "num_chars": 53}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 124, "native_id": "9-124", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 16.707517623901367, "incorrect_loss_raw": 17.139511744181316, "correct_loss_per_char": 0.6683007049560546, "incorrect_loss_per_char": 1.3690271831694103, "correct_loss_per_token": 5.569172541300456, "incorrect_loss_per_token": 10.571696599324545, "correct_loss_uncond": -6.311529159545898, "incorrect_loss_uncond": -2.819888432820638}, "model_output": [{"sum_logits": -19.87774658203125, "num_tokens": 2, "num_tokens_all": 123, "is_greedy": false, "sum_logits_uncond": -24.424179077148438, "logits_per_token": -9.938873291015625, "logits_per_char": -1.2423591613769531, "num_chars": 16}, {"sum_logits": -16.707517623901367, "num_tokens": 3, "num_tokens_all": 124, "is_greedy": false, "sum_logits_uncond": -23.019046783447266, "logits_per_token": -5.569172541300456, "logits_per_char": -0.6683007049560546, "num_chars": 25}, {"sum_logits": -12.01164436340332, "num_tokens": 1, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -13.089910507202148, "logits_per_token": -12.01164436340332, "logits_per_char": -1.715949194771903, "num_chars": 7}, {"sum_logits": -19.529144287109375, "num_tokens": 2, "num_tokens_all": 123, "is_greedy": false, "sum_logits_uncond": -22.364110946655273, "logits_per_token": -9.764572143554688, "logits_per_char": -1.148773193359375, "num_chars": 17}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 125, "native_id": "9-199", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 13.519299507141113, "incorrect_loss_raw": 12.279581705729166, "correct_loss_per_char": 1.5021443896823459, "incorrect_loss_per_char": 1.6143241942874968, "correct_loss_per_token": 6.759649753570557, "incorrect_loss_per_token": 8.5426877339681, "correct_loss_uncond": -4.968966484069824, "incorrect_loss_uncond": -2.3000307083129883}, "model_output": [{"sum_logits": -13.519299507141113, "num_tokens": 2, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -18.488265991210938, "logits_per_token": -6.759649753570557, "logits_per_char": -1.5021443896823459, "num_chars": 9}, {"sum_logits": -16.816022872924805, "num_tokens": 3, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -19.672626495361328, "logits_per_token": -5.605340957641602, "logits_per_char": -1.2011444909232003, "num_chars": 14}, {"sum_logits": -9.141231536865234, "num_tokens": 1, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -11.94001579284668, "logits_per_token": -9.141231536865234, "logits_per_char": -1.8282463073730468, "num_chars": 5}, {"sum_logits": -10.881490707397461, "num_tokens": 1, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -12.126194953918457, "logits_per_token": -10.881490707397461, "logits_per_char": -1.8135817845662434, "num_chars": 6}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 126, "native_id": "767", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 18.77707290649414, "incorrect_loss_raw": 19.075049718221027, "correct_loss_per_char": 1.2518048604329428, "incorrect_loss_per_char": 1.685378565932765, "correct_loss_per_token": 6.259024302164714, "incorrect_loss_per_token": 6.358349906073676, "correct_loss_uncond": -8.32248306274414, "incorrect_loss_uncond": -3.775930404663086}, "model_output": [{"sum_logits": -18.77707290649414, "num_tokens": 3, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -27.09955596923828, "logits_per_token": -6.259024302164714, "logits_per_char": -1.2518048604329428, "num_chars": 15}, {"sum_logits": -18.820819854736328, "num_tokens": 3, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -21.7705078125, "logits_per_token": -6.273606618245442, "logits_per_char": -1.710983623157848, "num_chars": 11}, {"sum_logits": -19.29187774658203, "num_tokens": 3, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -22.932968139648438, "logits_per_token": -6.430625915527344, "logits_per_char": -1.607656478881836, "num_chars": 12}, {"sum_logits": -19.112451553344727, "num_tokens": 3, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -23.849464416503906, "logits_per_token": -6.370817184448242, "logits_per_char": -1.7374955957586116, "num_chars": 11}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 127, "native_id": "28", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.000489234924316, "incorrect_loss_raw": 5.5075258413950605, "correct_loss_per_char": 0.714355604989188, "incorrect_loss_per_char": 0.7556109660201602, "correct_loss_per_token": 5.000489234924316, "incorrect_loss_per_token": 5.5075258413950605, "correct_loss_uncond": -6.754498481750488, "incorrect_loss_uncond": -8.02664303779602}, "model_output": [{"sum_logits": -8.684674263000488, "num_tokens": 1, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -12.74663257598877, "logits_per_token": -8.684674263000488, "logits_per_char": -1.085584282875061, "num_chars": 8}, {"sum_logits": -3.0016462802886963, "num_tokens": 1, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -14.65502643585205, "logits_per_token": -3.0016462802886963, "logits_per_char": -0.37520578503608704, "num_chars": 8}, {"sum_logits": -4.836256980895996, "num_tokens": 1, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -13.200847625732422, "logits_per_token": -4.836256980895996, "logits_per_char": -0.8060428301493326, "num_chars": 6}, {"sum_logits": -5.000489234924316, "num_tokens": 1, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -11.754987716674805, "logits_per_token": -5.000489234924316, "logits_per_char": -0.714355604989188, "num_chars": 7}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 128, "native_id": "9-1134", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.995741844177246, "incorrect_loss_raw": 10.530687967936197, "correct_loss_per_char": 0.7139815602983747, "incorrect_loss_per_char": 0.6640616787804497, "correct_loss_per_token": 4.997870922088623, "incorrect_loss_per_token": 3.7459765540228953, "correct_loss_uncond": -14.023030281066895, "incorrect_loss_uncond": -10.52122688293457}, "model_output": [{"sum_logits": -10.397857666015625, "num_tokens": 3, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -18.65975570678711, "logits_per_token": -3.4659525553385415, "logits_per_char": -0.6498661041259766, "num_chars": 16}, {"sum_logits": -9.995741844177246, "num_tokens": 2, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -24.01877212524414, "logits_per_token": -4.997870922088623, "logits_per_char": -0.7139815602983747, "num_chars": 14}, {"sum_logits": -16.950756072998047, "num_tokens": 3, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -23.301219940185547, "logits_per_token": -5.650252024332683, "logits_per_char": -1.059422254562378, "num_chars": 16}, {"sum_logits": -4.243450164794922, "num_tokens": 2, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -21.19476890563965, "logits_per_token": -2.121725082397461, "logits_per_char": -0.2828966776529948, "num_chars": 15}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 129, "native_id": "9-1030", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 15.984134674072266, "incorrect_loss_raw": 13.846307118733725, "correct_loss_per_char": 1.141723905290876, "incorrect_loss_per_char": 0.8479231034866487, "correct_loss_per_token": 7.992067337036133, "incorrect_loss_per_token": 6.923153559366862, "correct_loss_uncond": -14.430774688720703, "incorrect_loss_uncond": -10.875930786132812}, "model_output": [{"sum_logits": -10.630342483520508, "num_tokens": 2, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -22.78618812561035, "logits_per_token": -5.315171241760254, "logits_per_char": -0.48319738561456854, "num_chars": 22}, {"sum_logits": -14.542139053344727, "num_tokens": 2, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -22.982635498046875, "logits_per_token": -7.271069526672363, "logits_per_char": -0.9694759368896484, "num_chars": 15}, {"sum_logits": -15.984134674072266, "num_tokens": 2, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -30.41490936279297, "logits_per_token": -7.992067337036133, "logits_per_char": -1.141723905290876, "num_chars": 14}, {"sum_logits": -16.366439819335938, "num_tokens": 2, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -28.397890090942383, "logits_per_token": -8.183219909667969, "logits_per_char": -1.0910959879557292, "num_chars": 15}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 130, "native_id": "9-18", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 15.055173873901367, "incorrect_loss_raw": 34.23447799682617, "correct_loss_per_char": 0.627298911412557, "incorrect_loss_per_char": 1.350025458394299, "correct_loss_per_token": 3.763793468475342, "incorrect_loss_per_token": 6.039992989434137, "correct_loss_uncond": -13.754344940185547, "incorrect_loss_uncond": -2.689131418863932}, "model_output": [{"sum_logits": -43.017120361328125, "num_tokens": 6, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -38.127925872802734, "logits_per_token": -7.1695200602213545, "logits_per_char": -1.4833489779768319, "num_chars": 29}, {"sum_logits": -15.055173873901367, "num_tokens": 4, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -28.809518814086914, "logits_per_token": -3.763793468475342, "logits_per_char": -0.627298911412557, "num_chars": 24}, {"sum_logits": -29.604114532470703, "num_tokens": 6, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -34.09843063354492, "logits_per_token": -4.934019088745117, "logits_per_char": -1.409719739641462, "num_chars": 21}, {"sum_logits": -30.082199096679688, "num_tokens": 5, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -38.544471740722656, "logits_per_token": -6.016439819335938, "logits_per_char": -1.1570076575646033, "num_chars": 26}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 131, "native_id": "8-378", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.5866217613220215, "incorrect_loss_raw": 10.044548352559408, "correct_loss_per_char": 0.24394895412303783, "incorrect_loss_per_char": 0.3214021040228519, "correct_loss_per_token": 1.3173243522644043, "incorrect_loss_per_token": 1.7382872502009075, "correct_loss_uncond": -20.66872262954712, "incorrect_loss_uncond": -16.839524904886883}, "model_output": [{"sum_logits": -8.003232955932617, "num_tokens": 5, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -25.08002281188965, "logits_per_token": -1.6006465911865235, "logits_per_char": -0.29641603540491174, "num_chars": 27}, {"sum_logits": -10.824896812438965, "num_tokens": 8, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -28.4658203125, "logits_per_token": -1.3531121015548706, "logits_per_char": -0.2640218734741211, "num_chars": 41}, {"sum_logits": -11.30551528930664, "num_tokens": 5, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -27.10637664794922, "logits_per_token": -2.261103057861328, "logits_per_char": -0.4037684031895229, "num_chars": 28}, {"sum_logits": -6.5866217613220215, "num_tokens": 5, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -27.25534439086914, "logits_per_token": -1.3173243522644043, "logits_per_char": -0.24394895412303783, "num_chars": 27}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 132, "native_id": "7-677", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 24.10003662109375, "incorrect_loss_raw": 24.977765401204426, "correct_loss_per_char": 0.8310357455549569, "incorrect_loss_per_char": 1.0061657750708426, "correct_loss_per_token": 2.1909124200994317, "incorrect_loss_per_token": 3.9279528723822703, "correct_loss_uncond": -10.625984191894531, "incorrect_loss_uncond": -8.210102717081705}, "model_output": [{"sum_logits": -29.611011505126953, "num_tokens": 7, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -38.265167236328125, "logits_per_token": -4.230144500732422, "logits_per_char": -0.9870337168375651, "num_chars": 30}, {"sum_logits": -20.336387634277344, "num_tokens": 6, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -29.28061866760254, "logits_per_token": -3.389397939046224, "logits_per_char": -0.7821687551645132, "num_chars": 26}, {"sum_logits": -24.10003662109375, "num_tokens": 11, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -34.72602081298828, "logits_per_token": -2.1909124200994317, "logits_per_char": -0.8310357455549569, "num_chars": 29}, {"sum_logits": -24.985897064208984, "num_tokens": 6, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -32.017818450927734, "logits_per_token": -4.164316177368164, "logits_per_char": -1.2492948532104493, "num_chars": 20}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 133, "native_id": "9-786", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 11.335360527038574, "incorrect_loss_raw": 8.348333358764648, "correct_loss_per_char": 1.0304873206398704, "incorrect_loss_per_char": 0.6986460791693793, "correct_loss_per_token": 5.667680263519287, "incorrect_loss_per_token": 2.731453683641222, "correct_loss_uncond": -6.022456169128418, "incorrect_loss_uncond": -4.8956858317057295}, "model_output": [{"sum_logits": -11.335360527038574, "num_tokens": 2, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -17.357816696166992, "logits_per_token": -5.667680263519287, "logits_per_char": -1.0304873206398704, "num_chars": 11}, {"sum_logits": -11.363513946533203, "num_tokens": 4, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -15.237171173095703, "logits_per_token": -2.840878486633301, "logits_per_char": -0.7575675964355468, "num_chars": 15}, {"sum_logits": -8.923563003540039, "num_tokens": 3, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -14.620805740356445, "logits_per_token": -2.974521001180013, "logits_per_char": -0.7436302502950033, "num_chars": 12}, {"sum_logits": -4.757923126220703, "num_tokens": 2, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -9.874080657958984, "logits_per_token": -2.3789615631103516, "logits_per_char": -0.5947403907775879, "num_chars": 8}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 134, "native_id": "9-463", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 17.310054779052734, "incorrect_loss_raw": 14.716461499532064, "correct_loss_per_char": 1.2364324842180525, "incorrect_loss_per_char": 1.122722372758374, "correct_loss_per_token": 5.770018259684245, "incorrect_loss_per_token": 6.3972464137607155, "correct_loss_uncond": -2.3341102600097656, "incorrect_loss_uncond": -3.2275222142537436}, "model_output": [{"sum_logits": -17.297718048095703, "num_tokens": 3, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -19.63001251220703, "logits_per_token": -5.765906016031901, "logits_per_char": -1.2355512891496931, "num_chars": 14}, {"sum_logits": -10.468071937561035, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -18.206134796142578, "logits_per_token": -5.234035968780518, "logits_per_char": -0.8723393281300863, "num_chars": 12}, {"sum_logits": -16.383594512939453, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -15.995803833007812, "logits_per_token": -8.191797256469727, "logits_per_char": -1.2602765009953425, "num_chars": 13}, {"sum_logits": -17.310054779052734, "num_tokens": 3, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -19.6441650390625, "logits_per_token": -5.770018259684245, "logits_per_char": -1.2364324842180525, "num_chars": 14}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 135, "native_id": "7-71", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 28.354705810546875, "incorrect_loss_raw": 24.59308624267578, "correct_loss_per_char": 0.6751120431082589, "incorrect_loss_per_char": 0.8674896941522156, "correct_loss_per_token": 3.1505228678385415, "incorrect_loss_per_token": 4.667358875274658, "correct_loss_uncond": -7.9553375244140625, "incorrect_loss_uncond": -9.84177017211914}, "model_output": [{"sum_logits": -28.354705810546875, "num_tokens": 9, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -36.31004333496094, "logits_per_token": -3.1505228678385415, "logits_per_char": -0.6751120431082589, "num_chars": 42}, {"sum_logits": -31.550735473632812, "num_tokens": 4, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -34.80670166015625, "logits_per_token": -7.887683868408203, "logits_per_char": -1.2620294189453125, "num_chars": 25}, {"sum_logits": -22.16866683959961, "num_tokens": 8, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -39.517391204833984, "logits_per_token": -2.771083354949951, "logits_per_char": -0.6717777830181699, "num_chars": 33}, {"sum_logits": -20.059856414794922, "num_tokens": 6, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -28.98047637939453, "logits_per_token": -3.3433094024658203, "logits_per_char": -0.6686618804931641, "num_chars": 30}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 136, "native_id": "9-1053", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.473093509674072, "incorrect_loss_raw": 9.026684919993082, "correct_loss_per_char": 1.118273377418518, "incorrect_loss_per_char": 2.0029967705408733, "correct_loss_per_token": 1.4910311698913574, "incorrect_loss_per_token": 3.244970162709554, "correct_loss_uncond": -5.672299861907959, "incorrect_loss_uncond": -4.718286037445068}, "model_output": [{"sum_logits": -7.436429023742676, "num_tokens": 2, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -14.397109985351562, "logits_per_token": -3.718214511871338, "logits_per_char": -1.859107255935669, "num_chars": 4}, {"sum_logits": -15.220467567443848, "num_tokens": 4, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -17.269790649414062, "logits_per_token": -3.805116891860962, "logits_per_char": -3.0440935134887694, "num_chars": 5}, {"sum_logits": -4.423158168792725, "num_tokens": 2, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -9.568012237548828, "logits_per_token": -2.2115790843963623, "logits_per_char": -1.1057895421981812, "num_chars": 4}, {"sum_logits": -4.473093509674072, "num_tokens": 3, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -10.145393371582031, "logits_per_token": -1.4910311698913574, "logits_per_char": -1.118273377418518, "num_chars": 4}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 137, "native_id": "9-437", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 23.74317741394043, "incorrect_loss_raw": 16.634592056274414, "correct_loss_per_char": 1.3190654118855794, "incorrect_loss_per_char": 1.045438003540039, "correct_loss_per_token": 5.935794353485107, "incorrect_loss_per_token": 6.022841665479873, "correct_loss_uncond": -1.5655536651611328, "incorrect_loss_uncond": -3.3748855590820312}, "model_output": [{"sum_logits": -19.984600067138672, "num_tokens": 4, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -23.336511611938477, "logits_per_token": -4.996150016784668, "logits_per_char": -0.9992300033569336, "num_chars": 20}, {"sum_logits": -23.74317741394043, "num_tokens": 4, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -25.308731079101562, "logits_per_token": -5.935794353485107, "logits_per_char": -1.3190654118855794, "num_chars": 18}, {"sum_logits": -18.595897674560547, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -19.82268524169922, "logits_per_token": -9.297948837280273, "logits_per_char": -1.3282784053257533, "num_chars": 14}, {"sum_logits": -11.323278427124023, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -16.86923599243164, "logits_per_token": -3.7744261423746743, "logits_per_char": -0.8088056019374302, "num_chars": 14}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 138, "native_id": "1787", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 15.007872581481934, "incorrect_loss_raw": 20.328445434570312, "correct_loss_per_char": 0.7503936290740967, "incorrect_loss_per_char": 0.6399178897618559, "correct_loss_per_token": 3.0015745162963867, "incorrect_loss_per_token": 3.173401968819755, "correct_loss_uncond": -11.094882011413574, "incorrect_loss_uncond": -14.543721516927084}, "model_output": [{"sum_logits": -16.367694854736328, "num_tokens": 6, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -36.03413391113281, "logits_per_token": -2.7279491424560547, "logits_per_char": -0.5279901566043976, "num_chars": 31}, {"sum_logits": -15.007872581481934, "num_tokens": 5, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -26.102754592895508, "logits_per_token": -3.0015745162963867, "logits_per_char": -0.7503936290740967, "num_chars": 20}, {"sum_logits": -27.0487060546875, "num_tokens": 7, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -34.95801544189453, "logits_per_token": -3.864100864955357, "logits_per_char": -0.6597245379192073, "num_chars": 41}, {"sum_logits": -17.56893539428711, "num_tokens": 6, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -33.624351501464844, "logits_per_token": -2.9281558990478516, "logits_per_char": -0.7320389747619629, "num_chars": 24}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 139, "native_id": "7-107", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 4.43613338470459, "incorrect_loss_raw": 6.799509366353353, "correct_loss_per_char": 0.443613338470459, "incorrect_loss_per_char": 0.6769411654699417, "correct_loss_per_token": 4.43613338470459, "incorrect_loss_per_token": 4.203291654586792, "correct_loss_uncond": -5.018061637878418, "incorrect_loss_uncond": -5.695568084716797}, "model_output": [{"sum_logits": -4.43613338470459, "num_tokens": 1, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -9.454195022583008, "logits_per_token": -4.43613338470459, "logits_per_char": -0.443613338470459, "num_chars": 10}, {"sum_logits": -8.029447555541992, "num_tokens": 2, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -16.191654205322266, "logits_per_token": -4.014723777770996, "logits_per_char": -0.8029447555541992, "num_chars": 10}, {"sum_logits": -7.547858715057373, "num_tokens": 2, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -10.498824119567871, "logits_per_token": -3.7739293575286865, "logits_per_char": -0.5391327653612409, "num_chars": 14}, {"sum_logits": -4.821221828460693, "num_tokens": 1, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -10.794754028320312, "logits_per_token": -4.821221828460693, "logits_per_char": -0.6887459754943848, "num_chars": 7}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 140, "native_id": "769", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.31982421875, "incorrect_loss_raw": 6.412067890167236, "correct_loss_per_char": 1.16497802734375, "incorrect_loss_per_char": 0.8813413546635553, "correct_loss_per_token": 9.31982421875, "incorrect_loss_per_token": 4.782711982727051, "correct_loss_uncond": -3.7730712890625, "incorrect_loss_uncond": -8.779208978017172}, "model_output": [{"sum_logits": -5.231091499328613, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -12.218637466430664, "logits_per_token": -5.231091499328613, "logits_per_char": -1.0462182998657226, "num_chars": 5}, {"sum_logits": -4.228976726531982, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -12.787039756774902, "logits_per_token": -4.228976726531982, "logits_per_char": -0.8457953453063964, "num_chars": 5}, {"sum_logits": -9.31982421875, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -13.0928955078125, "logits_per_token": -9.31982421875, "logits_per_char": -1.16497802734375, "num_chars": 8}, {"sum_logits": -9.776135444641113, "num_tokens": 2, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -20.568153381347656, "logits_per_token": -4.888067722320557, "logits_per_char": -0.7520104188185471, "num_chars": 13}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 141, "native_id": "9-73", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.240467071533203, "incorrect_loss_raw": 7.060332616170247, "correct_loss_per_char": 0.8914952959333148, "incorrect_loss_per_char": 0.95420013155256, "correct_loss_per_token": 6.240467071533203, "incorrect_loss_per_token": 7.060332616170247, "correct_loss_uncond": -4.122844696044922, "incorrect_loss_uncond": -3.4533799489339194}, "model_output": [{"sum_logits": -6.240467071533203, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -10.363311767578125, "logits_per_token": -6.240467071533203, "logits_per_char": -0.8914952959333148, "num_chars": 7}, {"sum_logits": -9.14236068725586, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -11.172551155090332, "logits_per_token": -9.14236068725586, "logits_per_char": -1.1427950859069824, "num_chars": 8}, {"sum_logits": -8.168097496032715, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -10.876677513122559, "logits_per_token": -8.168097496032715, "logits_per_char": -1.1668710708618164, "num_chars": 7}, {"sum_logits": -3.870539665222168, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -9.49190902709961, "logits_per_token": -3.870539665222168, "logits_per_char": -0.5529342378888812, "num_chars": 7}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 142, "native_id": "9-1194", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.1307573318481445, "incorrect_loss_raw": 13.62261708577474, "correct_loss_per_char": 1.4261514663696289, "incorrect_loss_per_char": 2.3084955866374663, "correct_loss_per_token": 7.1307573318481445, "incorrect_loss_per_token": 8.880319913228353, "correct_loss_uncond": -5.437801361083984, "incorrect_loss_uncond": -0.11886024475097656}, "model_output": [{"sum_logits": -12.414068222045898, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -12.99838638305664, "logits_per_token": -12.414068222045898, "logits_per_char": -2.4828136444091795, "num_chars": 5}, {"sum_logits": -15.869572639465332, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -15.133062362670898, "logits_per_token": -7.934786319732666, "logits_per_char": -2.644928773244222, "num_chars": 6}, {"sum_logits": -12.584210395812988, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -13.09298324584961, "logits_per_token": -6.292105197906494, "logits_per_char": -1.7977443422589983, "num_chars": 7}, {"sum_logits": -7.1307573318481445, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -12.568558692932129, "logits_per_token": -7.1307573318481445, "logits_per_char": -1.4261514663696289, "num_chars": 5}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 143, "native_id": "9-416", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.141413688659668, "incorrect_loss_raw": 12.74481471379598, "correct_loss_per_char": 0.19633835554122925, "incorrect_loss_per_char": 1.0389529156994508, "correct_loss_per_token": 1.570706844329834, "incorrect_loss_per_token": 6.37240735689799, "correct_loss_uncond": -10.711182594299316, "incorrect_loss_uncond": -4.6655198733011884}, "model_output": [{"sum_logits": -15.460384368896484, "num_tokens": 2, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -18.402315139770508, "logits_per_token": -7.730192184448242, "logits_per_char": -1.4054894880814985, "num_chars": 11}, {"sum_logits": -7.110659122467041, "num_tokens": 2, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -13.301005363464355, "logits_per_token": -3.5553295612335205, "logits_per_char": -0.5925549268722534, "num_chars": 12}, {"sum_logits": -15.663400650024414, "num_tokens": 2, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -20.52768325805664, "logits_per_token": -7.831700325012207, "logits_per_char": -1.118814332144601, "num_chars": 14}, {"sum_logits": -3.141413688659668, "num_tokens": 2, "num_tokens_all": 113, "is_greedy": true, "sum_logits_uncond": -13.852596282958984, "logits_per_token": -1.570706844329834, "logits_per_char": -0.19633835554122925, "num_chars": 16}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 144, "native_id": "470", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 27.48993682861328, "incorrect_loss_raw": 34.55879465738932, "correct_loss_per_char": 0.6545223054431734, "incorrect_loss_per_char": 0.9450053730598684, "correct_loss_per_token": 3.9271338326590404, "incorrect_loss_per_token": 5.5538912576342385, "correct_loss_uncond": -13.708869934082031, "incorrect_loss_uncond": -8.648053487141928}, "model_output": [{"sum_logits": -27.48993682861328, "num_tokens": 7, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -41.19880676269531, "logits_per_token": -3.9271338326590404, "logits_per_char": -0.6545223054431734, "num_chars": 42}, {"sum_logits": -41.71112060546875, "num_tokens": 6, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -48.4421501159668, "logits_per_token": -6.951853434244792, "logits_per_char": -1.0427780151367188, "num_chars": 40}, {"sum_logits": -36.0208740234375, "num_tokens": 6, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -46.89848709106445, "logits_per_token": -6.00347900390625, "logits_per_char": -1.0291678292410715, "num_chars": 35}, {"sum_logits": -25.94438934326172, "num_tokens": 7, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -34.2799072265625, "logits_per_token": -3.706341334751674, "logits_per_char": -0.7630702748018152, "num_chars": 34}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 145, "native_id": "1297", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 14.415694236755371, "incorrect_loss_raw": 7.562299410502116, "correct_loss_per_char": 1.10889955667349, "incorrect_loss_per_char": 1.3418396631876626, "correct_loss_per_token": 7.2078471183776855, "incorrect_loss_per_token": 7.562299410502116, "correct_loss_uncond": -1.6177186965942383, "incorrect_loss_uncond": -4.717764218648274}, "model_output": [{"sum_logits": -7.715387344360352, "num_tokens": 1, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -11.888578414916992, "logits_per_token": -7.715387344360352, "logits_per_char": -1.2858978907267253, "num_chars": 6}, {"sum_logits": -7.33107852935791, "num_tokens": 1, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -12.073330879211426, "logits_per_token": -7.33107852935791, "logits_per_char": -1.4662157058715821, "num_chars": 5}, {"sum_logits": -7.640432357788086, "num_tokens": 1, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -12.878281593322754, "logits_per_token": -7.640432357788086, "logits_per_char": -1.273405392964681, "num_chars": 6}, {"sum_logits": -14.415694236755371, "num_tokens": 2, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -16.03341293334961, "logits_per_token": -7.2078471183776855, "logits_per_char": -1.10889955667349, "num_chars": 13}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 146, "native_id": "8-346", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.221088409423828, "incorrect_loss_raw": 4.995375315348308, "correct_loss_per_char": 0.8701814015706381, "incorrect_loss_per_char": 0.8513012280539861, "correct_loss_per_token": 5.221088409423828, "incorrect_loss_per_token": 4.0099711418151855, "correct_loss_uncond": -5.632207870483398, "incorrect_loss_uncond": -9.090335210164389}, "model_output": [{"sum_logits": -5.221088409423828, "num_tokens": 1, "num_tokens_all": 125, "is_greedy": false, "sum_logits_uncond": -10.853296279907227, "logits_per_token": -5.221088409423828, "logits_per_char": -0.8701814015706381, "num_chars": 6}, {"sum_logits": -5.9124250411987305, "num_tokens": 2, "num_tokens_all": 126, "is_greedy": false, "sum_logits_uncond": -12.808022499084473, "logits_per_token": -2.9562125205993652, "logits_per_char": -0.9854041735331217, "num_chars": 6}, {"sum_logits": -4.309211730957031, "num_tokens": 1, "num_tokens_all": 125, "is_greedy": false, "sum_logits_uncond": -14.756386756896973, "logits_per_token": -4.309211730957031, "logits_per_char": -0.6156016758510044, "num_chars": 7}, {"sum_logits": -4.76448917388916, "num_tokens": 1, "num_tokens_all": 125, "is_greedy": false, "sum_logits_uncond": -14.69272232055664, "logits_per_token": -4.76448917388916, "logits_per_char": -0.9528978347778321, "num_chars": 5}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 147, "native_id": "7-807", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.818320274353027, "incorrect_loss_raw": 5.918870846430461, "correct_loss_per_char": 1.1363867123921711, "incorrect_loss_per_char": 0.906453635957506, "correct_loss_per_token": 6.818320274353027, "incorrect_loss_per_token": 5.918870846430461, "correct_loss_uncond": -6.059961318969727, "incorrect_loss_uncond": -4.9751730759938555}, "model_output": [{"sum_logits": -6.818320274353027, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -12.878281593322754, "logits_per_token": -6.818320274353027, "logits_per_char": -1.1363867123921711, "num_chars": 6}, {"sum_logits": -10.502832412719727, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -11.620548248291016, "logits_per_token": -10.502832412719727, "logits_per_char": -1.1669813791910808, "num_chars": 9}, {"sum_logits": -3.1327860355377197, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -9.888992309570312, "logits_per_token": -3.1327860355377197, "logits_per_char": -0.5221310059229533, "num_chars": 6}, {"sum_logits": -4.1209940910339355, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -11.172591209411621, "logits_per_token": -4.1209940910339355, "logits_per_char": -1.0302485227584839, "num_chars": 4}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 148, "native_id": "8-463", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.464434623718262, "incorrect_loss_raw": 8.536751588185629, "correct_loss_per_char": 0.28760182230096115, "incorrect_loss_per_char": 0.5029616687032912, "correct_loss_per_token": 1.8214782079060872, "incorrect_loss_per_token": 2.8455838627285424, "correct_loss_uncond": -11.439213752746582, "incorrect_loss_uncond": -9.59607489903768}, "model_output": [{"sum_logits": -5.464434623718262, "num_tokens": 3, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -16.903648376464844, "logits_per_token": -1.8214782079060872, "logits_per_char": -0.28760182230096115, "num_chars": 19}, {"sum_logits": -12.3974027633667, "num_tokens": 3, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -22.137248992919922, "logits_per_token": -4.132467587788899, "logits_per_char": -0.7748376727104187, "num_chars": 16}, {"sum_logits": -6.769327640533447, "num_tokens": 3, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -16.389434814453125, "logits_per_token": -2.2564425468444824, "logits_per_char": -0.37607375780741376, "num_chars": 18}, {"sum_logits": -6.443524360656738, "num_tokens": 3, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -15.871795654296875, "logits_per_token": -2.147841453552246, "logits_per_char": -0.357973575592041, "num_chars": 18}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 149, "native_id": "9-110", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.8541970252990723, "incorrect_loss_raw": 7.619132041931152, "correct_loss_per_char": 0.42824411392211914, "incorrect_loss_per_char": 1.2244737969504462, "correct_loss_per_token": 3.8541970252990723, "incorrect_loss_per_token": 7.619132041931152, "correct_loss_uncond": -8.617974758148193, "incorrect_loss_uncond": -4.867012977600098}, "model_output": [{"sum_logits": -3.8541970252990723, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -12.472171783447266, "logits_per_token": -3.8541970252990723, "logits_per_char": -0.42824411392211914, "num_chars": 9}, {"sum_logits": -7.889637470245361, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -12.679478645324707, "logits_per_token": -7.889637470245361, "logits_per_char": -1.3149395783742268, "num_chars": 6}, {"sum_logits": -6.500159740447998, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -12.072437286376953, "logits_per_token": -6.500159740447998, "logits_per_char": -1.3000319480895997, "num_chars": 5}, {"sum_logits": -8.467598915100098, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -12.70651912689209, "logits_per_token": -8.467598915100098, "logits_per_char": -1.0584498643875122, "num_chars": 8}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 150, "native_id": "1611", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 12.21717643737793, "incorrect_loss_raw": 13.718366622924805, "correct_loss_per_char": 0.4886870574951172, "incorrect_loss_per_char": 0.650002587871787, "correct_loss_per_token": 2.443435287475586, "incorrect_loss_per_token": 3.429591655731201, "correct_loss_uncond": -16.99836540222168, "incorrect_loss_uncond": -13.028786341349283}, "model_output": [{"sum_logits": -14.736318588256836, "num_tokens": 4, "num_tokens_all": 128, "is_greedy": false, "sum_logits_uncond": -30.425174713134766, "logits_per_token": -3.684079647064209, "logits_per_char": -0.5457895773428457, "num_chars": 27}, {"sum_logits": -11.428539276123047, "num_tokens": 4, "num_tokens_all": 128, "is_greedy": false, "sum_logits_uncond": -23.98649024963379, "logits_per_token": -2.8571348190307617, "logits_per_char": -0.5714269638061523, "num_chars": 20}, {"sum_logits": -12.21717643737793, "num_tokens": 5, "num_tokens_all": 129, "is_greedy": false, "sum_logits_uncond": -29.21554183959961, "logits_per_token": -2.443435287475586, "logits_per_char": -0.4886870574951172, "num_chars": 25}, {"sum_logits": -14.990242004394531, "num_tokens": 4, "num_tokens_all": 128, "is_greedy": false, "sum_logits_uncond": -25.82979393005371, "logits_per_token": -3.747560501098633, "logits_per_char": -0.8327912224663628, "num_chars": 18}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 151, "native_id": "9-942", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 11.173284530639648, "incorrect_loss_raw": 11.617956479390463, "correct_loss_per_char": 0.9311070442199707, "incorrect_loss_per_char": 2.081870496840704, "correct_loss_per_token": 11.173284530639648, "incorrect_loss_per_token": 11.617956479390463, "correct_loss_uncond": -2.3971662521362305, "incorrect_loss_uncond": -0.731230099995931}, "model_output": [{"sum_logits": -10.619020462036133, "num_tokens": 1, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -13.200215339660645, "logits_per_token": -10.619020462036133, "logits_per_char": -2.1238040924072266, "num_chars": 5}, {"sum_logits": -12.69034194946289, "num_tokens": 1, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -12.13295841217041, "logits_per_token": -12.69034194946289, "logits_per_char": -1.812905992780413, "num_chars": 7}, {"sum_logits": -11.544507026672363, "num_tokens": 1, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -11.714385986328125, "logits_per_token": -11.544507026672363, "logits_per_char": -2.3089014053344727, "num_chars": 5}, {"sum_logits": -11.173284530639648, "num_tokens": 1, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -13.570450782775879, "logits_per_token": -11.173284530639648, "logits_per_char": -0.9311070442199707, "num_chars": 12}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 152, "native_id": "9-1102", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 18.367334365844727, "incorrect_loss_raw": 13.886483828226725, "correct_loss_per_char": 1.0204074647691515, "incorrect_loss_per_char": 1.7012647484976149, "correct_loss_per_token": 6.122444788614909, "incorrect_loss_per_token": 6.1521896786159935, "correct_loss_uncond": -2.592702865600586, "incorrect_loss_uncond": 1.0642814636230469}, "model_output": [{"sum_logits": -14.238940238952637, "num_tokens": 3, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -15.102205276489258, "logits_per_token": -4.746313412984212, "logits_per_char": -1.186578353246053, "num_chars": 12}, {"sum_logits": -18.367334365844727, "num_tokens": 3, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -20.960037231445312, "logits_per_token": -6.122444788614909, "logits_per_char": -1.0204074647691515, "num_chars": 18}, {"sum_logits": -12.943916320800781, "num_tokens": 2, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -11.282198905944824, "logits_per_token": -6.471958160400391, "logits_per_char": -1.8491309029715401, "num_chars": 7}, {"sum_logits": -14.476594924926758, "num_tokens": 2, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -12.082202911376953, "logits_per_token": -7.238297462463379, "logits_per_char": -2.0680849892752513, "num_chars": 7}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 153, "native_id": "9-774", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 14.501522064208984, "incorrect_loss_raw": 22.481629689534504, "correct_loss_per_char": 0.46779103432932206, "incorrect_loss_per_char": 0.868980202623593, "correct_loss_per_token": 2.4169203440348306, "incorrect_loss_per_token": 3.5068531884087455, "correct_loss_uncond": -16.82219696044922, "incorrect_loss_uncond": -7.103143692016602}, "model_output": [{"sum_logits": -29.00536346435547, "num_tokens": 8, "num_tokens_all": 126, "is_greedy": false, "sum_logits_uncond": -32.80734634399414, "logits_per_token": -3.6256704330444336, "logits_per_char": -0.9356568859469506, "num_chars": 31}, {"sum_logits": -14.649045944213867, "num_tokens": 5, "num_tokens_all": 123, "is_greedy": false, "sum_logits_uncond": -25.616445541381836, "logits_per_token": -2.9298091888427735, "logits_per_char": -0.6369150410527769, "num_chars": 23}, {"sum_logits": -14.501522064208984, "num_tokens": 6, "num_tokens_all": 124, "is_greedy": false, "sum_logits_uncond": -31.323719024658203, "logits_per_token": -2.4169203440348306, "logits_per_char": -0.46779103432932206, "num_chars": 31}, {"sum_logits": -23.79047966003418, "num_tokens": 6, "num_tokens_all": 124, "is_greedy": false, "sum_logits_uncond": -30.330528259277344, "logits_per_token": -3.96507994333903, "logits_per_char": -1.0343686808710513, "num_chars": 23}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 154, "native_id": "8-333", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.397793769836426, "incorrect_loss_raw": 10.36994743347168, "correct_loss_per_char": 0.5598529179890951, "incorrect_loss_per_char": 0.7237930252438499, "correct_loss_per_token": 2.799264589945475, "incorrect_loss_per_token": 3.45664914449056, "correct_loss_uncond": -11.346674919128418, "incorrect_loss_uncond": -11.034451802571615}, "model_output": [{"sum_logits": -8.397793769836426, "num_tokens": 3, "num_tokens_all": 129, "is_greedy": false, "sum_logits_uncond": -19.744468688964844, "logits_per_token": -2.799264589945475, "logits_per_char": -0.5598529179890951, "num_chars": 15}, {"sum_logits": -6.123266220092773, "num_tokens": 3, "num_tokens_all": 129, "is_greedy": false, "sum_logits_uncond": -17.33999252319336, "logits_per_token": -2.0410887400309243, "logits_per_char": -0.43737615857805523, "num_chars": 14}, {"sum_logits": -10.658028602600098, "num_tokens": 3, "num_tokens_all": 129, "is_greedy": false, "sum_logits_uncond": -20.65037727355957, "logits_per_token": -3.552676200866699, "logits_per_char": -0.7105352401733398, "num_chars": 15}, {"sum_logits": -14.328547477722168, "num_tokens": 3, "num_tokens_all": 129, "is_greedy": false, "sum_logits_uncond": -26.222827911376953, "logits_per_token": -4.776182492574056, "logits_per_char": -1.023467676980155, "num_chars": 14}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 155, "native_id": "9-573", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.240975379943848, "incorrect_loss_raw": 13.622042338053385, "correct_loss_per_char": 0.5823305977715386, "incorrect_loss_per_char": 1.0359376728025256, "correct_loss_per_token": 5.240975379943848, "incorrect_loss_per_token": 6.811021169026692, "correct_loss_uncond": -9.309542655944824, "incorrect_loss_uncond": -4.84403928120931}, "model_output": [{"sum_logits": -9.246068954467773, "num_tokens": 2, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -17.747516632080078, "logits_per_token": -4.623034477233887, "logits_per_char": -0.7112360734205979, "num_chars": 13}, {"sum_logits": -5.240975379943848, "num_tokens": 1, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -14.550518035888672, "logits_per_token": -5.240975379943848, "logits_per_char": -0.5823305977715386, "num_chars": 9}, {"sum_logits": -14.305673599243164, "num_tokens": 2, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -16.90858268737793, "logits_per_token": -7.152836799621582, "logits_per_char": -0.9537115732828776, "num_chars": 15}, {"sum_logits": -17.31438446044922, "num_tokens": 2, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -20.742145538330078, "logits_per_token": -8.65719223022461, "logits_per_char": -1.4428653717041016, "num_chars": 12}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 156, "native_id": "1955", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 19.57341766357422, "incorrect_loss_raw": 22.519742329915363, "correct_loss_per_char": 0.8155590693155924, "incorrect_loss_per_char": 0.8417671213246355, "correct_loss_per_token": 4.893354415893555, "incorrect_loss_per_token": 4.852713457743327, "correct_loss_uncond": -10.785900115966797, "incorrect_loss_uncond": -7.092947006225586}, "model_output": [{"sum_logits": -19.57341766357422, "num_tokens": 4, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -30.359317779541016, "logits_per_token": -4.893354415893555, "logits_per_char": -0.8155590693155924, "num_chars": 24}, {"sum_logits": -18.460399627685547, "num_tokens": 5, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -26.793691635131836, "logits_per_token": -3.6920799255371093, "logits_per_char": -0.7691833178202311, "num_chars": 24}, {"sum_logits": -20.925899505615234, "num_tokens": 4, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -29.23074722290039, "logits_per_token": -5.231474876403809, "logits_per_char": -0.9511772502552379, "num_chars": 22}, {"sum_logits": -28.172927856445312, "num_tokens": 5, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -32.813629150390625, "logits_per_token": -5.634585571289063, "logits_per_char": -0.8049407958984375, "num_chars": 35}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 157, "native_id": "8-45", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.248490333557129, "incorrect_loss_raw": 14.5757048924764, "correct_loss_per_char": 1.6069271905081612, "incorrect_loss_per_char": 1.902084952309018, "correct_loss_per_token": 2.2496980667114257, "incorrect_loss_per_token": 2.91514097849528, "correct_loss_uncond": -4.884251594543457, "incorrect_loss_uncond": -0.8012822469075521}, "model_output": [{"sum_logits": -13.460469245910645, "num_tokens": 5, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -14.778972625732422, "logits_per_token": -2.692093849182129, "logits_per_char": -1.922924177987235, "num_chars": 7}, {"sum_logits": -15.226694107055664, "num_tokens": 5, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -15.062117576599121, "logits_per_token": -3.045338821411133, "logits_per_char": -1.903336763381958, "num_chars": 8}, {"sum_logits": -11.248490333557129, "num_tokens": 5, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -16.132741928100586, "logits_per_token": -2.2496980667114257, "logits_per_char": -1.6069271905081612, "num_chars": 7}, {"sum_logits": -15.03995132446289, "num_tokens": 5, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -16.289871215820312, "logits_per_token": -3.007990264892578, "logits_per_char": -1.8799939155578613, "num_chars": 8}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 158, "native_id": "9-674", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 18.74431037902832, "incorrect_loss_raw": 27.009109497070312, "correct_loss_per_char": 0.8925862085251581, "incorrect_loss_per_char": 1.0767749767239676, "correct_loss_per_token": 3.1240517298380532, "incorrect_loss_per_token": 5.837948767344156, "correct_loss_uncond": -7.896648406982422, "incorrect_loss_uncond": -6.002227783203125}, "model_output": [{"sum_logits": -32.157798767089844, "num_tokens": 5, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -36.97087860107422, "logits_per_token": -6.431559753417969, "logits_per_char": -1.2368384141188402, "num_chars": 26}, {"sum_logits": -22.70191764831543, "num_tokens": 5, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -30.058605194091797, "logits_per_token": -4.540383529663086, "logits_per_char": -0.9870398977528447, "num_chars": 23}, {"sum_logits": -18.74431037902832, "num_tokens": 6, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -26.640958786010742, "logits_per_token": -3.1240517298380532, "logits_per_char": -0.8925862085251581, "num_chars": 21}, {"sum_logits": -26.167612075805664, "num_tokens": 4, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -32.0045280456543, "logits_per_token": -6.541903018951416, "logits_per_char": -1.0064466183002179, "num_chars": 26}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 159, "native_id": "898", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 2.3525397777557373, "incorrect_loss_raw": 4.056685288747151, "correct_loss_per_char": 0.39208996295928955, "incorrect_loss_per_char": 0.5251466516464475, "correct_loss_per_token": 2.3525397777557373, "incorrect_loss_per_token": 3.4710518519083657, "correct_loss_uncond": -9.54381012916565, "incorrect_loss_uncond": -7.449708779652913}, "model_output": [{"sum_logits": -3.513800621032715, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -12.253585815429688, "logits_per_token": -1.7569003105163574, "logits_per_char": -0.23425337473551433, "num_chars": 15}, {"sum_logits": -2.3525397777557373, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -11.896349906921387, "logits_per_token": -2.3525397777557373, "logits_per_char": -0.39208996295928955, "num_chars": 6}, {"sum_logits": -4.263950347900391, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -12.376604080200195, "logits_per_token": -4.263950347900391, "logits_per_char": -0.6091357639857701, "num_chars": 7}, {"sum_logits": -4.39230489730835, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -9.888992309570312, "logits_per_token": -4.39230489730835, "logits_per_char": -0.7320508162180582, "num_chars": 6}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 160, "native_id": "7-1159", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 19.396766662597656, "incorrect_loss_raw": 17.2344913482666, "correct_loss_per_char": 0.46182777768089656, "incorrect_loss_per_char": 0.44737245112717533, "correct_loss_per_token": 2.7709666660853793, "incorrect_loss_per_token": 2.4351273188515314, "correct_loss_uncond": -19.89190673828125, "incorrect_loss_uncond": -17.384461720784504}, "model_output": [{"sum_logits": -20.104751586914062, "num_tokens": 8, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -35.53852462768555, "logits_per_token": -2.513093948364258, "logits_per_char": -0.5155064509465144, "num_chars": 39}, {"sum_logits": -19.914960861206055, "num_tokens": 7, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -39.443458557128906, "logits_per_token": -2.8449944087437222, "logits_per_char": -0.42372257151502246, "num_chars": 47}, {"sum_logits": -19.396766662597656, "num_tokens": 7, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -39.288673400878906, "logits_per_token": -2.7709666660853793, "logits_per_char": -0.46182777768089656, "num_chars": 42}, {"sum_logits": -11.683761596679688, "num_tokens": 6, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -28.874876022338867, "logits_per_token": -1.9472935994466145, "logits_per_char": -0.4028883309199892, "num_chars": 29}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 161, "native_id": "568", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.766572952270508, "incorrect_loss_raw": 15.36303424835205, "correct_loss_per_char": 0.8766572952270508, "incorrect_loss_per_char": 1.730235128932529, "correct_loss_per_token": 2.9221909840901694, "incorrect_loss_per_token": 7.681517124176025, "correct_loss_uncond": -5.808135986328125, "incorrect_loss_uncond": 0.2052148183186849}, "model_output": [{"sum_logits": -8.766572952270508, "num_tokens": 3, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -14.574708938598633, "logits_per_token": -2.9221909840901694, "logits_per_char": -0.8766572952270508, "num_chars": 10}, {"sum_logits": -13.688858032226562, "num_tokens": 2, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -13.82774829864502, "logits_per_token": -6.844429016113281, "logits_per_char": -1.3688858032226563, "num_chars": 10}, {"sum_logits": -16.43119239807129, "num_tokens": 2, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -14.898624420166016, "logits_per_token": -8.215596199035645, "logits_per_char": -1.8256880442301433, "num_chars": 9}, {"sum_logits": -15.9690523147583, "num_tokens": 2, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -16.747085571289062, "logits_per_token": -7.98452615737915, "logits_per_char": -1.9961315393447876, "num_chars": 8}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 162, "native_id": "9-877", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.764870643615723, "incorrect_loss_raw": 7.125667174657186, "correct_loss_per_char": 1.1960967381795247, "incorrect_loss_per_char": 1.1044076748224565, "correct_loss_per_token": 5.382435321807861, "incorrect_loss_per_token": 4.36004994975196, "correct_loss_uncond": -12.112479209899902, "incorrect_loss_uncond": -8.280530055363974}, "model_output": [{"sum_logits": -9.602861404418945, "num_tokens": 3, "num_tokens_all": 128, "is_greedy": false, "sum_logits_uncond": -16.238231658935547, "logits_per_token": -3.200953801472982, "logits_per_char": -1.3718373434884208, "num_chars": 7}, {"sum_logits": -10.764870643615723, "num_tokens": 2, "num_tokens_all": 127, "is_greedy": false, "sum_logits_uncond": -22.877349853515625, "logits_per_token": -5.382435321807861, "logits_per_char": -1.1960967381795247, "num_chars": 9}, {"sum_logits": -7.984251976013184, "num_tokens": 1, "num_tokens_all": 126, "is_greedy": false, "sum_logits_uncond": -14.048233032226562, "logits_per_token": -7.984251976013184, "logits_per_char": -1.5968503952026367, "num_chars": 5}, {"sum_logits": -3.7898881435394287, "num_tokens": 2, "num_tokens_all": 127, "is_greedy": false, "sum_logits_uncond": -15.932126998901367, "logits_per_token": -1.8949440717697144, "logits_per_char": -0.3445352857763117, "num_chars": 11}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 163, "native_id": "406", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 27.711328506469727, "incorrect_loss_raw": 29.43298085530599, "correct_loss_per_char": 0.7697591251797147, "incorrect_loss_per_char": 0.7292951113217837, "correct_loss_per_token": 3.0790365007188587, "incorrect_loss_per_token": 4.407552779666961, "correct_loss_uncond": -9.571748733520508, "incorrect_loss_uncond": -4.672737757364909}, "model_output": [{"sum_logits": -25.557994842529297, "num_tokens": 6, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -31.752256393432617, "logits_per_token": -4.259665807088216, "logits_per_char": -0.7302284240722656, "num_chars": 35}, {"sum_logits": -33.425865173339844, "num_tokens": 7, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -36.471073150634766, "logits_per_token": -4.775123596191406, "logits_per_char": -0.7596787539395419, "num_chars": 44}, {"sum_logits": -27.711328506469727, "num_tokens": 9, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -37.283077239990234, "logits_per_token": -3.0790365007188587, "logits_per_char": -0.7697591251797147, "num_chars": 36}, {"sum_logits": -29.315082550048828, "num_tokens": 7, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -34.09382629394531, "logits_per_token": -4.187868935721261, "logits_per_char": -0.6979781559535435, "num_chars": 42}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 164, "native_id": "7-1132", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.313624382019043, "incorrect_loss_raw": 10.080571174621582, "correct_loss_per_char": 0.664874943819913, "incorrect_loss_per_char": 3.3080656925837197, "correct_loss_per_token": 3.6568121910095215, "incorrect_loss_per_token": 10.080571174621582, "correct_loss_uncond": -9.432965278625488, "incorrect_loss_uncond": -0.09073082605997722}, "model_output": [{"sum_logits": -7.313624382019043, "num_tokens": 2, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -16.74658966064453, "logits_per_token": -3.6568121910095215, "logits_per_char": -0.664874943819913, "num_chars": 11}, {"sum_logits": -11.401926040649414, "num_tokens": 1, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -9.90826416015625, "logits_per_token": -11.401926040649414, "logits_per_char": -3.8006420135498047, "num_chars": 3}, {"sum_logits": -8.790236473083496, "num_tokens": 1, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -14.185486793518066, "logits_per_token": -8.790236473083496, "logits_per_char": -1.098779559135437, "num_chars": 8}, {"sum_logits": -10.049551010131836, "num_tokens": 1, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -6.420155048370361, "logits_per_token": -10.049551010131836, "logits_per_char": -5.024775505065918, "num_chars": 2}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 165, "native_id": "7-479", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 22.885883331298828, "incorrect_loss_raw": 19.417699813842773, "correct_loss_per_char": 0.6538823808942522, "incorrect_loss_per_char": 0.7730728873568307, "correct_loss_per_token": 3.2694119044712613, "incorrect_loss_per_token": 3.6352841513497487, "correct_loss_uncond": -15.695430755615234, "incorrect_loss_uncond": -7.3760732014973955}, "model_output": [{"sum_logits": -22.096538543701172, "num_tokens": 7, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -35.679203033447266, "logits_per_token": -3.1566483633858815, "logits_per_char": -0.597203744424356, "num_chars": 37}, {"sum_logits": -22.590660095214844, "num_tokens": 7, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -31.933809280395508, "logits_per_token": -3.2272371564592635, "logits_per_char": -0.7530220031738282, "num_chars": 30}, {"sum_logits": -22.885883331298828, "num_tokens": 7, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -38.58131408691406, "logits_per_token": -3.2694119044712613, "logits_per_char": -0.6538823808942522, "num_chars": 35}, {"sum_logits": -13.565900802612305, "num_tokens": 3, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -12.768306732177734, "logits_per_token": -4.521966934204102, "logits_per_char": -0.9689929144723075, "num_chars": 14}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 166, "native_id": "609", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.874186515808105, "incorrect_loss_raw": 11.116172154744467, "correct_loss_per_char": 1.3874186515808105, "incorrect_loss_per_char": 1.4498252073923747, "correct_loss_per_token": 4.624728838602702, "incorrect_loss_per_token": 7.136648337046306, "correct_loss_uncond": -0.23677444458007812, "incorrect_loss_uncond": -0.5267512003580729}, "model_output": [{"sum_logits": -13.874186515808105, "num_tokens": 3, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -14.110960960388184, "logits_per_token": -4.624728838602702, "logits_per_char": -1.3874186515808105, "num_chars": 10}, {"sum_logits": -9.471373558044434, "num_tokens": 1, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -10.754232406616211, "logits_per_token": -9.471373558044434, "logits_per_char": -0.9471373558044434, "num_chars": 10}, {"sum_logits": -10.024689674377441, "num_tokens": 2, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -11.818822860717773, "logits_per_token": -5.012344837188721, "logits_per_char": -1.6707816123962402, "num_chars": 6}, {"sum_logits": -13.852453231811523, "num_tokens": 2, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -12.355714797973633, "logits_per_token": -6.926226615905762, "logits_per_char": -1.7315566539764404, "num_chars": 8}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 167, "native_id": "1568", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 19.03794288635254, "incorrect_loss_raw": 12.553531010945639, "correct_loss_per_char": 1.5864952405293782, "incorrect_loss_per_char": 1.4271626262874395, "correct_loss_per_token": 6.345980962117513, "incorrect_loss_per_token": 6.856800079345703, "correct_loss_uncond": -2.9767398834228516, "incorrect_loss_uncond": -0.2378390630086263}, "model_output": [{"sum_logits": -11.32065200805664, "num_tokens": 1, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -10.857248306274414, "logits_per_token": -11.32065200805664, "logits_per_char": -1.6172360011509486, "num_chars": 7}, {"sum_logits": -13.067407608032227, "num_tokens": 5, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -14.660751342773438, "logits_per_token": -2.6134815216064453, "logits_per_char": -1.0051852006178637, "num_chars": 13}, {"sum_logits": -19.03794288635254, "num_tokens": 3, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -22.01468276977539, "logits_per_token": -6.345980962117513, "logits_per_char": -1.5864952405293782, "num_chars": 12}, {"sum_logits": -13.272533416748047, "num_tokens": 2, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -12.856110572814941, "logits_per_token": -6.636266708374023, "logits_per_char": -1.6590666770935059, "num_chars": 8}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 168, "native_id": "9-418", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 58.52959442138672, "incorrect_loss_raw": 33.403280893961586, "correct_loss_per_char": 0.9920270240913003, "incorrect_loss_per_char": 0.9233895540528536, "correct_loss_per_token": 4.877466201782227, "incorrect_loss_per_token": 3.8583548687122486, "correct_loss_uncond": -9.645378112792969, "incorrect_loss_uncond": -11.65579096476237}, "model_output": [{"sum_logits": -32.093772888183594, "num_tokens": 9, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -44.359779357910156, "logits_per_token": -3.5659747653537326, "logits_per_char": -0.9169649396623883, "num_chars": 35}, {"sum_logits": -31.725910186767578, "num_tokens": 8, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -44.519798278808594, "logits_per_token": -3.9657387733459473, "logits_per_char": -0.8134848765837841, "num_chars": 39}, {"sum_logits": -58.52959442138672, "num_tokens": 12, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -68.17497253417969, "logits_per_token": -4.877466201782227, "logits_per_char": -0.9920270240913003, "num_chars": 59}, {"sum_logits": -36.390159606933594, "num_tokens": 9, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -46.297637939453125, "logits_per_token": -4.043351067437066, "logits_per_char": -1.0397188459123885, "num_chars": 35}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 169, "native_id": "7-1050", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 38.85136413574219, "incorrect_loss_raw": 17.91333516438802, "correct_loss_per_char": 0.925032479422433, "incorrect_loss_per_char": 0.6523102360007204, "correct_loss_per_token": 5.550194876534598, "incorrect_loss_per_token": 3.809094285964966, "correct_loss_uncond": -12.143280029296875, "incorrect_loss_uncond": -9.9802614847819}, "model_output": [{"sum_logits": -15.143364906311035, "num_tokens": 5, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -26.861621856689453, "logits_per_token": -3.028672981262207, "logits_per_char": -0.5608653669004087, "num_chars": 27}, {"sum_logits": -38.85136413574219, "num_tokens": 7, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -50.99464416503906, "logits_per_token": -5.550194876534598, "logits_per_char": -0.925032479422433, "num_chars": 42}, {"sum_logits": -13.5856351852417, "num_tokens": 4, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -23.934913635253906, "logits_per_token": -3.396408796310425, "logits_per_char": -0.7547575102912055, "num_chars": 18}, {"sum_logits": -25.011005401611328, "num_tokens": 5, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -32.884254455566406, "logits_per_token": -5.002201080322266, "logits_per_char": -0.6413078308105469, "num_chars": 39}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 170, "native_id": "9-510", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.053380012512207, "incorrect_loss_raw": 7.1861599286397295, "correct_loss_per_char": 1.2933400017874581, "incorrect_loss_per_char": 1.3462082306543985, "correct_loss_per_token": 4.5266900062561035, "incorrect_loss_per_token": 7.1861599286397295, "correct_loss_uncond": -5.646174430847168, "incorrect_loss_uncond": -4.557897726694743}, "model_output": [{"sum_logits": -6.1267218589782715, "num_tokens": 1, "num_tokens_all": 130, "is_greedy": false, "sum_logits_uncond": -11.172591209411621, "logits_per_token": -6.1267218589782715, "logits_per_char": -1.5316804647445679, "num_chars": 4}, {"sum_logits": -7.706326484680176, "num_tokens": 1, "num_tokens_all": 130, "is_greedy": false, "sum_logits_uncond": -10.612653732299805, "logits_per_token": -7.706326484680176, "logits_per_char": -1.5412652969360352, "num_chars": 5}, {"sum_logits": -9.053380012512207, "num_tokens": 2, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -14.699554443359375, "logits_per_token": -4.5266900062561035, "logits_per_char": -1.2933400017874581, "num_chars": 7}, {"sum_logits": -7.725431442260742, "num_tokens": 1, "num_tokens_all": 130, "is_greedy": false, "sum_logits_uncond": -13.446928024291992, "logits_per_token": -7.725431442260742, "logits_per_char": -0.9656789302825928, "num_chars": 8}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 171, "native_id": "9-519", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.6146175861358643, "incorrect_loss_raw": 8.662591616312662, "correct_loss_per_char": 0.4016241762373183, "incorrect_loss_per_char": 1.296098033587138, "correct_loss_per_token": 3.6146175861358643, "incorrect_loss_per_token": 8.662591616312662, "correct_loss_uncond": -10.792908430099487, "incorrect_loss_uncond": -4.6919600168863935}, "model_output": [{"sum_logits": -3.6146175861358643, "num_tokens": 1, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -14.407526016235352, "logits_per_token": -3.6146175861358643, "logits_per_char": -0.4016241762373183, "num_chars": 9}, {"sum_logits": -8.885480880737305, "num_tokens": 1, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -12.531612396240234, "logits_per_token": -8.885480880737305, "logits_per_char": -1.480913480122884, "num_chars": 6}, {"sum_logits": -10.632040977478027, "num_tokens": 1, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -15.442890167236328, "logits_per_token": -10.632040977478027, "logits_per_char": -1.3290051221847534, "num_chars": 8}, {"sum_logits": -6.470252990722656, "num_tokens": 1, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -12.089152336120605, "logits_per_token": -6.470252990722656, "logits_per_char": -1.0783754984537761, "num_chars": 6}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 172, "native_id": "9-637", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 15.225274085998535, "incorrect_loss_raw": 7.938220024108887, "correct_loss_per_char": 0.5855874648460975, "incorrect_loss_per_char": 0.6766234288960348, "correct_loss_per_token": 2.537545680999756, "incorrect_loss_per_token": 3.4896519978841147, "correct_loss_uncond": -6.886645317077637, "incorrect_loss_uncond": -5.5143248240153}, "model_output": [{"sum_logits": -8.630244255065918, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -15.885147094726562, "logits_per_token": -2.8767480850219727, "logits_per_char": -0.7845676595514471, "num_chars": 11}, {"sum_logits": -5.524850845336914, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -10.702034950256348, "logits_per_token": -2.762425422668457, "logits_per_char": -0.5022591677579012, "num_chars": 11}, {"sum_logits": -15.225274085998535, "num_tokens": 6, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -22.111919403076172, "logits_per_token": -2.537545680999756, "logits_per_char": -0.5855874648460975, "num_chars": 26}, {"sum_logits": -9.659564971923828, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -13.770452499389648, "logits_per_token": -4.829782485961914, "logits_per_char": -0.743043459378756, "num_chars": 13}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 173, "native_id": "473", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 14.42034912109375, "incorrect_loss_raw": 16.528834025065105, "correct_loss_per_char": 0.8011305067274306, "incorrect_loss_per_char": 1.3380141936259948, "correct_loss_per_token": 3.6050872802734375, "incorrect_loss_per_token": 6.237503528594971, "correct_loss_uncond": -10.150579452514648, "incorrect_loss_uncond": -6.009183883666992}, "model_output": [{"sum_logits": -14.42034912109375, "num_tokens": 4, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -24.5709285736084, "logits_per_token": -3.6050872802734375, "logits_per_char": -0.8011305067274306, "num_chars": 18}, {"sum_logits": -13.102059364318848, "num_tokens": 2, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -17.521345138549805, "logits_per_token": -6.551029682159424, "logits_per_char": -1.0078507203322191, "num_chars": 13}, {"sum_logits": -14.27828311920166, "num_tokens": 3, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -23.58209991455078, "logits_per_token": -4.759427706400554, "logits_per_char": -1.298025738109242, "num_chars": 11}, {"sum_logits": -22.206159591674805, "num_tokens": 3, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -26.510608673095703, "logits_per_token": -7.402053197224935, "logits_per_char": -1.7081661224365234, "num_chars": 13}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 174, "native_id": "8-445", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.103947639465332, "incorrect_loss_raw": 10.12037181854248, "correct_loss_per_char": 0.5051973819732666, "incorrect_loss_per_char": 0.6178587636879364, "correct_loss_per_token": 3.367982546488444, "incorrect_loss_per_token": 3.492023918363783, "correct_loss_uncond": -15.267706871032715, "incorrect_loss_uncond": -11.099424044291178}, "model_output": [{"sum_logits": -8.095420837402344, "num_tokens": 3, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -21.65674591064453, "logits_per_token": -2.6984736124674478, "logits_per_char": -0.47620122572954965, "num_chars": 17}, {"sum_logits": -10.103947639465332, "num_tokens": 3, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -25.371654510498047, "logits_per_token": -3.367982546488444, "logits_per_char": -0.5051973819732666, "num_chars": 20}, {"sum_logits": -8.844697952270508, "num_tokens": 2, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -18.147119522094727, "logits_per_token": -4.422348976135254, "logits_per_char": -0.6317641394478934, "num_chars": 14}, {"sum_logits": -13.42099666595459, "num_tokens": 4, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -23.85552215576172, "logits_per_token": -3.3552491664886475, "logits_per_char": -0.7456109258863661, "num_chars": 18}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 175, "native_id": "9-575", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 14.108428955078125, "incorrect_loss_raw": 15.079848607381185, "correct_loss_per_char": 0.7425488923725329, "incorrect_loss_per_char": 0.7583340130430279, "correct_loss_per_token": 3.5271072387695312, "incorrect_loss_per_token": 3.7198982026841905, "correct_loss_uncond": -5.241851806640625, "incorrect_loss_uncond": -4.274253209431966}, "model_output": [{"sum_logits": -14.22634506225586, "num_tokens": 4, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -17.75646209716797, "logits_per_token": -3.556586265563965, "logits_per_char": -0.6466520482843573, "num_chars": 22}, {"sum_logits": -14.108428955078125, "num_tokens": 4, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -19.35028076171875, "logits_per_token": -3.5271072387695312, "logits_per_char": -0.7425488923725329, "num_chars": 19}, {"sum_logits": -20.509689331054688, "num_tokens": 5, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -28.342998504638672, "logits_per_token": -4.101937866210937, "logits_per_char": -0.8203875732421875, "num_chars": 25}, {"sum_logits": -10.503511428833008, "num_tokens": 3, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -11.962844848632812, "logits_per_token": -3.5011704762776694, "logits_per_char": -0.8079624176025391, "num_chars": 13}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 176, "native_id": "7-284", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 20.445472717285156, "incorrect_loss_raw": 18.14625612894694, "correct_loss_per_char": 0.6815157572428385, "incorrect_loss_per_char": 0.7378451947812681, "correct_loss_per_token": 3.407578786214193, "incorrect_loss_per_token": 3.760510508219401, "correct_loss_uncond": -8.532112121582031, "incorrect_loss_uncond": -8.77149772644043}, "model_output": [{"sum_logits": -18.066421508789062, "num_tokens": 6, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -30.710145950317383, "logits_per_token": -3.0110702514648438, "logits_per_char": -0.6691267225477431, "num_chars": 27}, {"sum_logits": -20.445472717285156, "num_tokens": 6, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -28.977584838867188, "logits_per_token": -3.407578786214193, "logits_per_char": -0.6815157572428385, "num_chars": 30}, {"sum_logits": -19.919837951660156, "num_tokens": 4, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -24.592496871948242, "logits_per_token": -4.979959487915039, "logits_per_char": -0.9959918975830078, "num_chars": 20}, {"sum_logits": -16.4525089263916, "num_tokens": 5, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -25.450618743896484, "logits_per_token": -3.2905017852783205, "logits_per_char": -0.5484169642130534, "num_chars": 30}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 177, "native_id": "8-135", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 25.92523193359375, "incorrect_loss_raw": 29.991313298543293, "correct_loss_per_char": 0.6647495367588141, "incorrect_loss_per_char": 1.097135026681707, "correct_loss_per_token": 3.7036045619419644, "incorrect_loss_per_token": 5.689094066619873, "correct_loss_uncond": -8.891349792480469, "incorrect_loss_uncond": -7.000864664713542}, "model_output": [{"sum_logits": -25.92523193359375, "num_tokens": 7, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -34.81658172607422, "logits_per_token": -3.7036045619419644, "logits_per_char": -0.6647495367588141, "num_chars": 39}, {"sum_logits": -36.479339599609375, "num_tokens": 5, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -43.14884567260742, "logits_per_token": -7.295867919921875, "logits_per_char": -1.2579082620554958, "num_chars": 29}, {"sum_logits": -24.676713943481445, "num_tokens": 4, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -28.858503341674805, "logits_per_token": -6.169178485870361, "logits_per_char": -1.0729006062383237, "num_chars": 23}, {"sum_logits": -28.817886352539062, "num_tokens": 8, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -38.96918487548828, "logits_per_token": -3.602235794067383, "logits_per_char": -0.960596211751302, "num_chars": 30}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 178, "native_id": "397", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 36.936256408691406, "incorrect_loss_raw": 25.51159922281901, "correct_loss_per_char": 1.4774502563476561, "incorrect_loss_per_char": 0.8243270787776384, "correct_loss_per_token": 5.276608058384487, "incorrect_loss_per_token": 4.531155586242676, "correct_loss_uncond": -4.065422058105469, "incorrect_loss_uncond": -12.42177708943685}, "model_output": [{"sum_logits": -22.298202514648438, "num_tokens": 5, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -36.378448486328125, "logits_per_token": -4.4596405029296875, "logits_per_char": -0.8576231736403245, "num_chars": 26}, {"sum_logits": -35.40258026123047, "num_tokens": 8, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -44.79181671142578, "logits_per_token": -4.425322532653809, "logits_per_char": -1.1063306331634521, "num_chars": 32}, {"sum_logits": -18.834014892578125, "num_tokens": 4, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -32.62986373901367, "logits_per_token": -4.708503723144531, "logits_per_char": -0.5090274295291385, "num_chars": 37}, {"sum_logits": -36.936256408691406, "num_tokens": 7, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -41.001678466796875, "logits_per_token": -5.276608058384487, "logits_per_char": -1.4774502563476561, "num_chars": 25}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 179, "native_id": "9-32", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 15.793221473693848, "incorrect_loss_raw": 11.311026573181152, "correct_loss_per_char": 0.929013027864344, "incorrect_loss_per_char": 1.2888996585967047, "correct_loss_per_token": 3.948305368423462, "incorrect_loss_per_token": 10.424364884694418, "correct_loss_uncond": -8.414660453796387, "incorrect_loss_uncond": -3.9161481857299805}, "model_output": [{"sum_logits": -15.336928367614746, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -15.919893264770508, "logits_per_token": -15.336928367614746, "logits_per_char": -1.704103151957194, "num_chars": 9}, {"sum_logits": -15.793221473693848, "num_tokens": 4, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -24.207881927490234, "logits_per_token": -3.948305368423462, "logits_per_char": -0.929013027864344, "num_chars": 17}, {"sum_logits": -13.2761812210083, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -12.480047225952148, "logits_per_token": -13.2761812210083, "logits_per_char": -1.8965973172869002, "num_chars": 7}, {"sum_logits": -5.31997013092041, "num_tokens": 2, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -17.281583786010742, "logits_per_token": -2.659985065460205, "logits_per_char": -0.26599850654602053, "num_chars": 20}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 180, "native_id": "48", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.794574737548828, "incorrect_loss_raw": 8.934983253479004, "correct_loss_per_char": 0.48532676696777344, "incorrect_loss_per_char": 1.0860218116215297, "correct_loss_per_token": 3.397287368774414, "incorrect_loss_per_token": 7.3171695073445635, "correct_loss_uncond": -9.38200569152832, "incorrect_loss_uncond": -5.060258229573567}, "model_output": [{"sum_logits": -9.70688247680664, "num_tokens": 2, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -15.418344497680664, "logits_per_token": -4.85344123840332, "logits_per_char": -0.9706882476806641, "num_chars": 10}, {"sum_logits": -8.691415786743164, "num_tokens": 1, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -12.358104705810547, "logits_per_token": -8.691415786743164, "logits_per_char": -1.0864269733428955, "num_chars": 8}, {"sum_logits": -6.794574737548828, "num_tokens": 2, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -16.17658042907715, "logits_per_token": -3.397287368774414, "logits_per_char": -0.48532676696777344, "num_chars": 14}, {"sum_logits": -8.406651496887207, "num_tokens": 1, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -14.209275245666504, "logits_per_token": -8.406651496887207, "logits_per_char": -1.2009502138410295, "num_chars": 7}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 181, "native_id": "8-69", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 34.25611877441406, "incorrect_loss_raw": 17.830395380655926, "correct_loss_per_char": 0.8783620198567709, "incorrect_loss_per_char": 0.6770919521496256, "correct_loss_per_token": 8.564029693603516, "incorrect_loss_per_token": 3.5868246290418835, "correct_loss_uncond": -6.957550048828125, "incorrect_loss_uncond": -9.472215016682943}, "model_output": [{"sum_logits": -22.251964569091797, "num_tokens": 6, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -30.035547256469727, "logits_per_token": -3.7086607615152993, "logits_per_char": -0.6953738927841187, "num_chars": 32}, {"sum_logits": -20.167564392089844, "num_tokens": 6, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -39.10397720336914, "logits_per_token": -3.361260732014974, "logits_per_char": -0.5450693078943201, "num_chars": 37}, {"sum_logits": -34.25611877441406, "num_tokens": 4, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -41.21366882324219, "logits_per_token": -8.564029693603516, "logits_per_char": -0.8783620198567709, "num_chars": 39}, {"sum_logits": -11.071657180786133, "num_tokens": 3, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -12.768306732177734, "logits_per_token": -3.6905523935953775, "logits_per_char": -0.790832655770438, "num_chars": 14}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 182, "native_id": "9-159", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 27.9686279296875, "incorrect_loss_raw": 20.892263730367024, "correct_loss_per_char": 0.9988795689174107, "incorrect_loss_per_char": 1.020021485538743, "correct_loss_per_token": 5.5937255859375, "incorrect_loss_per_token": 4.528823407491048, "correct_loss_uncond": -17.360671997070312, "incorrect_loss_uncond": -7.548737525939941}, "model_output": [{"sum_logits": -27.376649856567383, "num_tokens": 5, "num_tokens_all": 126, "is_greedy": false, "sum_logits_uncond": -31.585588455200195, "logits_per_token": -5.475329971313476, "logits_per_char": -1.1406937440236409, "num_chars": 24}, {"sum_logits": -14.277901649475098, "num_tokens": 5, "num_tokens_all": 126, "is_greedy": false, "sum_logits_uncond": -28.98802375793457, "logits_per_token": -2.8555803298950195, "logits_per_char": -0.7514685078671104, "num_chars": 19}, {"sum_logits": -27.9686279296875, "num_tokens": 5, "num_tokens_all": 126, "is_greedy": false, "sum_logits_uncond": -45.32929992675781, "logits_per_token": -5.5937255859375, "logits_per_char": -0.9988795689174107, "num_chars": 28}, {"sum_logits": -21.022239685058594, "num_tokens": 4, "num_tokens_all": 125, "is_greedy": false, "sum_logits_uncond": -24.749391555786133, "logits_per_token": -5.255559921264648, "logits_per_char": -1.1679022047254775, "num_chars": 18}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 183, "native_id": "9-317", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.7106757164001465, "incorrect_loss_raw": 8.28845739364624, "correct_loss_per_char": 0.3925563097000122, "incorrect_loss_per_char": 1.3085121976004706, "correct_loss_per_token": 4.7106757164001465, "incorrect_loss_per_token": 6.510608990987142, "correct_loss_uncond": -6.7845635414123535, "incorrect_loss_uncond": -4.27387793858846}, "model_output": [{"sum_logits": -6.773099899291992, "num_tokens": 1, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -12.073330879211426, "logits_per_token": -6.773099899291992, "logits_per_char": -1.3546199798583984, "num_chars": 5}, {"sum_logits": -10.66709041595459, "num_tokens": 2, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -13.082062721252441, "logits_per_token": -5.333545207977295, "logits_per_char": -1.3333863019943237, "num_chars": 8}, {"sum_logits": -4.7106757164001465, "num_tokens": 1, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -11.4952392578125, "logits_per_token": -4.7106757164001465, "logits_per_char": -0.3925563097000122, "num_chars": 12}, {"sum_logits": -7.425181865692139, "num_tokens": 1, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -12.531612396240234, "logits_per_token": -7.425181865692139, "logits_per_char": -1.2375303109486897, "num_chars": 6}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 184, "native_id": "423", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 8.270133018493652, "incorrect_loss_raw": 8.549899101257324, "correct_loss_per_char": 1.3783555030822754, "incorrect_loss_per_char": 1.3519632778470478, "correct_loss_per_token": 8.270133018493652, "incorrect_loss_per_token": 8.549899101257324, "correct_loss_uncond": -4.608148574829102, "incorrect_loss_uncond": -3.4167114893595376}, "model_output": [{"sum_logits": -10.058989524841309, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -12.531612396240234, "logits_per_token": -10.058989524841309, "logits_per_char": -1.676498254140218, "num_chars": 6}, {"sum_logits": -6.390199661254883, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -9.888992309570312, "logits_per_token": -6.390199661254883, "logits_per_char": -1.0650332768758137, "num_chars": 6}, {"sum_logits": -9.200508117675781, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -13.479227066040039, "logits_per_token": -9.200508117675781, "logits_per_char": -1.3143583025251115, "num_chars": 7}, {"sum_logits": -8.270133018493652, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -12.878281593322754, "logits_per_token": -8.270133018493652, "logits_per_char": -1.3783555030822754, "num_chars": 6}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 185, "native_id": "8-304", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 18.496170043945312, "incorrect_loss_raw": 19.69006093343099, "correct_loss_per_char": 0.5284620012555804, "incorrect_loss_per_char": 0.6445125410511071, "correct_loss_per_token": 2.0551300048828125, "incorrect_loss_per_token": 2.8567946751912436, "correct_loss_uncond": -13.577247619628906, "incorrect_loss_uncond": -10.764769236246744}, "model_output": [{"sum_logits": -18.496170043945312, "num_tokens": 9, "num_tokens_all": 123, "is_greedy": false, "sum_logits_uncond": -32.07341766357422, "logits_per_token": -2.0551300048828125, "logits_per_char": -0.5284620012555804, "num_chars": 35}, {"sum_logits": -24.26665496826172, "num_tokens": 12, "num_tokens_all": 126, "is_greedy": false, "sum_logits_uncond": -42.80366897583008, "logits_per_token": -2.022221247355143, "logits_per_char": -0.46666644169734073, "num_chars": 52}, {"sum_logits": -10.549064636230469, "num_tokens": 3, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -12.768306732177734, "logits_per_token": -3.5163548787434897, "logits_per_char": -0.7535046168736049, "num_chars": 14}, {"sum_logits": -24.25446319580078, "num_tokens": 8, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -35.79251480102539, "logits_per_token": -3.0318078994750977, "logits_per_char": -0.7133665645823759, "num_chars": 34}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 186, "native_id": "785", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 13.095746040344238, "incorrect_loss_raw": 23.361820856730144, "correct_loss_per_char": 0.4515774496670427, "incorrect_loss_per_char": 0.9583791533704558, "correct_loss_per_token": 2.6191492080688477, "incorrect_loss_per_token": 4.192236614227295, "correct_loss_uncond": -16.847182273864746, "incorrect_loss_uncond": -7.847576141357422}, "model_output": [{"sum_logits": -23.989532470703125, "num_tokens": 5, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -29.62256622314453, "logits_per_token": -4.797906494140625, "logits_per_char": -1.1994766235351562, "num_chars": 20}, {"sum_logits": -26.965290069580078, "num_tokens": 9, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -34.74879455566406, "logits_per_token": -2.996143341064453, "logits_per_char": -0.6128475015813654, "num_chars": 44}, {"sum_logits": -19.130640029907227, "num_tokens": 4, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -29.2568302154541, "logits_per_token": -4.782660007476807, "logits_per_char": -1.0628133349948459, "num_chars": 18}, {"sum_logits": -13.095746040344238, "num_tokens": 5, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -29.942928314208984, "logits_per_token": -2.6191492080688477, "logits_per_char": -0.4515774496670427, "num_chars": 29}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 187, "native_id": "9-1087", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 1.5029183626174927, "incorrect_loss_raw": 6.957307457923889, "correct_loss_per_char": 0.16699092917972141, "incorrect_loss_per_char": 0.4686275914269669, "correct_loss_per_token": 1.5029183626174927, "incorrect_loss_per_token": 2.7228068908055625, "correct_loss_uncond": -10.912653803825378, "incorrect_loss_uncond": -10.800363262494406}, "model_output": [{"sum_logits": -6.5952467918396, "num_tokens": 3, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -17.578475952148438, "logits_per_token": -2.1984155972798667, "logits_per_char": -0.4710890565599714, "num_chars": 14}, {"sum_logits": -12.460005760192871, "num_tokens": 3, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -23.910310745239258, "logits_per_token": -4.153335253397624, "logits_per_char": -0.732941515305463, "num_chars": 17}, {"sum_logits": -1.5029183626174927, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": true, "sum_logits_uncond": -12.415572166442871, "logits_per_token": -1.5029183626174927, "logits_per_char": -0.16699092917972141, "num_chars": 9}, {"sum_logits": -1.8166698217391968, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -11.784225463867188, "logits_per_token": -1.8166698217391968, "logits_per_char": -0.2018522024154663, "num_chars": 9}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 188, "native_id": "485", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 20.0006103515625, "incorrect_loss_raw": 15.431018511454264, "correct_loss_per_char": 1.2500381469726562, "incorrect_loss_per_char": 0.9573327930749805, "correct_loss_per_token": 5.000152587890625, "incorrect_loss_per_token": 5.13969612121582, "correct_loss_uncond": -8.05965805053711, "incorrect_loss_uncond": -5.365731239318848}, "model_output": [{"sum_logits": -16.459060668945312, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -19.36465072631836, "logits_per_token": -8.229530334472656, "logits_per_char": -1.097270711263021, "num_chars": 15}, {"sum_logits": -13.303353309631348, "num_tokens": 3, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -21.85127067565918, "logits_per_token": -4.434451103210449, "logits_per_char": -1.023334869971642, "num_chars": 13}, {"sum_logits": -16.530641555786133, "num_tokens": 6, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -21.174327850341797, "logits_per_token": -2.7551069259643555, "logits_per_char": -0.7513927979902788, "num_chars": 22}, {"sum_logits": -20.0006103515625, "num_tokens": 4, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -28.06026840209961, "logits_per_token": -5.000152587890625, "logits_per_char": -1.2500381469726562, "num_chars": 16}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 189, "native_id": "9-908", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.0553412437438965, "incorrect_loss_raw": 14.459960301717123, "correct_loss_per_char": 0.3394823604159885, "incorrect_loss_per_char": 1.175469844661958, "correct_loss_per_token": 3.0553412437438965, "incorrect_loss_per_token": 7.2299801508585615, "correct_loss_uncond": -11.352184772491455, "incorrect_loss_uncond": -4.58775266011556}, "model_output": [{"sum_logits": -14.520454406738281, "num_tokens": 2, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -18.67514419555664, "logits_per_token": -7.260227203369141, "logits_per_char": -1.2100378672281902, "num_chars": 12}, {"sum_logits": -17.840055465698242, "num_tokens": 2, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -21.429500579833984, "logits_per_token": -8.920027732849121, "logits_per_char": -0.9389502876683286, "num_chars": 19}, {"sum_logits": -3.0553412437438965, "num_tokens": 1, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -14.407526016235352, "logits_per_token": -3.0553412437438965, "logits_per_char": -0.3394823604159885, "num_chars": 9}, {"sum_logits": -11.019371032714844, "num_tokens": 2, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -17.038494110107422, "logits_per_token": -5.509685516357422, "logits_per_char": -1.3774213790893555, "num_chars": 8}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 190, "native_id": "1231", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.232733726501465, "incorrect_loss_raw": 9.717583020528158, "correct_loss_per_char": 1.021157611500133, "incorrect_loss_per_char": 1.1338309049606323, "correct_loss_per_token": 5.616366863250732, "incorrect_loss_per_token": 7.770002524058024, "correct_loss_uncond": -5.426569938659668, "incorrect_loss_uncond": -6.240963300069173}, "model_output": [{"sum_logits": -8.599916458129883, "num_tokens": 1, "num_tokens_all": 105, "is_greedy": false, "sum_logits_uncond": -14.57796859741211, "logits_per_token": -8.599916458129883, "logits_per_char": -0.9555462731255425, "num_chars": 9}, {"sum_logits": -8.867349624633789, "num_tokens": 1, "num_tokens_all": 105, "is_greedy": false, "sum_logits_uncond": -14.793773651123047, "logits_per_token": -8.867349624633789, "logits_per_char": -0.9852610694037544, "num_chars": 9}, {"sum_logits": -11.232733726501465, "num_tokens": 2, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -16.659303665161133, "logits_per_token": -5.616366863250732, "logits_per_char": -1.021157611500133, "num_chars": 11}, {"sum_logits": -11.6854829788208, "num_tokens": 2, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -18.503896713256836, "logits_per_token": -5.8427414894104, "logits_per_char": -1.4606853723526, "num_chars": 8}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 191, "native_id": "810", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 25.801136016845703, "incorrect_loss_raw": 25.501540184020996, "correct_loss_per_char": 0.6143127623058501, "incorrect_loss_per_char": 0.7610341550005438, "correct_loss_per_token": 4.30018933614095, "incorrect_loss_per_token": 4.682867765426636, "correct_loss_uncond": -11.025203704833984, "incorrect_loss_uncond": -8.486976941426596}, "model_output": [{"sum_logits": -36.76012420654297, "num_tokens": 6, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -44.891937255859375, "logits_per_token": -6.126687367757161, "logits_per_char": -0.9935168704471072, "num_chars": 37}, {"sum_logits": -25.801136016845703, "num_tokens": 6, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -36.82633972167969, "logits_per_token": -4.30018933614095, "logits_per_char": -0.6143127623058501, "num_chars": 42}, {"sum_logits": -15.57399845123291, "num_tokens": 4, "num_tokens_all": 105, "is_greedy": false, "sum_logits_uncond": -23.696943283081055, "logits_per_token": -3.8934996128082275, "logits_per_char": -0.598999940432035, "num_chars": 26}, {"sum_logits": -24.17049789428711, "num_tokens": 6, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -33.376670837402344, "logits_per_token": -4.0284163157145185, "logits_per_char": -0.6905856541224888, "num_chars": 35}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 192, "native_id": "158", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.225593566894531, "incorrect_loss_raw": 5.443796952565511, "correct_loss_per_char": 1.4451187133789063, "incorrect_loss_per_char": 0.6281107840141233, "correct_loss_per_token": 7.225593566894531, "incorrect_loss_per_token": 3.372188064787123, "correct_loss_uncond": -5.9822540283203125, "incorrect_loss_uncond": -8.78029171625773}, "model_output": [{"sum_logits": -4.927052974700928, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -13.394623756408691, "logits_per_token": -2.463526487350464, "logits_per_char": -0.5474503305223253, "num_chars": 9}, {"sum_logits": -5.626950263977051, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -16.182703018188477, "logits_per_token": -1.8756500879923503, "logits_per_char": -0.5115409330888228, "num_chars": 11}, {"sum_logits": -5.777387619018555, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -13.094939231872559, "logits_per_token": -5.777387619018555, "logits_per_char": -0.8253410884312221, "num_chars": 7}, {"sum_logits": -7.225593566894531, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -13.207847595214844, "logits_per_token": -7.225593566894531, "logits_per_char": -1.4451187133789063, "num_chars": 5}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 193, "native_id": "7-445", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.937356948852539, "incorrect_loss_raw": 17.289235432942707, "correct_loss_per_char": 0.3141409723382247, "incorrect_loss_per_char": 0.5593997917299263, "correct_loss_per_token": 1.4921696186065674, "incorrect_loss_per_token": 3.2938658820258246, "correct_loss_uncond": -12.067264556884766, "incorrect_loss_uncond": -8.521427790323893}, "model_output": [{"sum_logits": -11.937356948852539, "num_tokens": 8, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -24.004621505737305, "logits_per_token": -1.4921696186065674, "logits_per_char": -0.3141409723382247, "num_chars": 38}, {"sum_logits": -15.958566665649414, "num_tokens": 5, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -24.679811477661133, "logits_per_token": -3.191713333129883, "logits_per_char": -0.5147924730854649, "num_chars": 31}, {"sum_logits": -21.15083122253418, "num_tokens": 5, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -27.025253295898438, "logits_per_token": -4.230166244506836, "logits_per_char": -0.7293390076735924, "num_chars": 29}, {"sum_logits": -14.758308410644531, "num_tokens": 6, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -25.726924896240234, "logits_per_token": -2.4597180684407554, "logits_per_char": -0.4340678944307215, "num_chars": 34}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 194, "native_id": "1502", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 36.93046569824219, "incorrect_loss_raw": 23.801263173421223, "correct_loss_per_char": 0.647902906986705, "incorrect_loss_per_char": 0.7318789094302982, "correct_loss_per_token": 3.077538808186849, "incorrect_loss_per_token": 3.516961750656209, "correct_loss_uncond": -10.270153045654297, "incorrect_loss_uncond": -11.621781667073568}, "model_output": [{"sum_logits": -20.575546264648438, "num_tokens": 5, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -32.9932746887207, "logits_per_token": -4.115109252929687, "logits_per_char": -0.7348409380231585, "num_chars": 28}, {"sum_logits": -24.828092575073242, "num_tokens": 7, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -32.284271240234375, "logits_per_token": -3.546870367867606, "logits_per_char": -0.8561411232783877, "num_chars": 29}, {"sum_logits": -36.93046569824219, "num_tokens": 12, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -47.200618743896484, "logits_per_token": -3.077538808186849, "logits_per_char": -0.647902906986705, "num_chars": 57}, {"sum_logits": -26.000150680541992, "num_tokens": 9, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -40.9915885925293, "logits_per_token": -2.8889056311713324, "logits_per_char": -0.6046546669893487, "num_chars": 43}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 195, "native_id": "1200", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.974274635314941, "incorrect_loss_raw": 11.787014325459799, "correct_loss_per_char": 1.441586070590549, "incorrect_loss_per_char": 1.4733767906824748, "correct_loss_per_token": 4.324758211771647, "incorrect_loss_per_token": 3.9290047751532655, "correct_loss_uncond": -4.578888893127441, "incorrect_loss_uncond": -6.202763557434082}, "model_output": [{"sum_logits": -14.50958251953125, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -18.553813934326172, "logits_per_token": -4.836527506510417, "logits_per_char": -1.8136978149414062, "num_chars": 8}, {"sum_logits": -12.974274635314941, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -17.553163528442383, "logits_per_token": -4.324758211771647, "logits_per_char": -1.441586070590549, "num_chars": 9}, {"sum_logits": -8.601869583129883, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -15.908740997314453, "logits_per_token": -2.8672898610432944, "logits_per_char": -1.0752336978912354, "num_chars": 8}, {"sum_logits": -12.249590873718262, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -19.506778717041016, "logits_per_token": -4.083196957906087, "logits_per_char": -1.5311988592147827, "num_chars": 8}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 196, "native_id": "437", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 39.50229263305664, "incorrect_loss_raw": 20.090415954589844, "correct_loss_per_char": 2.1945718129475913, "incorrect_loss_per_char": 1.501623625225491, "correct_loss_per_token": 6.583715438842773, "incorrect_loss_per_token": 5.49824456108941, "correct_loss_uncond": -1.7473487854003906, "incorrect_loss_uncond": -4.031070709228516}, "model_output": [{"sum_logits": -39.50229263305664, "num_tokens": 6, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -41.24964141845703, "logits_per_token": -6.583715438842773, "logits_per_char": -2.1945718129475913, "num_chars": 18}, {"sum_logits": -15.329048156738281, "num_tokens": 3, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -18.181838989257812, "logits_per_token": -5.109682718912761, "logits_per_char": -1.021936543782552, "num_chars": 15}, {"sum_logits": -26.96761703491211, "num_tokens": 5, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -32.854957580566406, "logits_per_token": -5.393523406982422, "logits_per_char": -1.6854760646820068, "num_chars": 16}, {"sum_logits": -17.97458267211914, "num_tokens": 3, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -21.32766342163086, "logits_per_token": -5.991527557373047, "logits_per_char": -1.7974582672119142, "num_chars": 10}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 197, "native_id": "8-205", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.326285362243652, "incorrect_loss_raw": 21.469915707906086, "correct_loss_per_char": 0.5393469220116025, "incorrect_loss_per_char": 0.8952408026837041, "correct_loss_per_token": 2.2652570724487306, "incorrect_loss_per_token": 3.9824480056762694, "correct_loss_uncond": -10.30473804473877, "incorrect_loss_uncond": -1.5105695724487305}, "model_output": [{"sum_logits": -11.326285362243652, "num_tokens": 5, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -21.631023406982422, "logits_per_token": -2.2652570724487306, "logits_per_char": -0.5393469220116025, "num_chars": 21}, {"sum_logits": -28.038162231445312, "num_tokens": 6, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -24.40603256225586, "logits_per_token": -4.673027038574219, "logits_per_char": -1.078390855055589, "num_chars": 26}, {"sum_logits": -23.23668670654297, "num_tokens": 5, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -24.219505310058594, "logits_per_token": -4.647337341308594, "logits_per_char": -1.0102907263714334, "num_chars": 23}, {"sum_logits": -13.13489818572998, "num_tokens": 5, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -20.31591796875, "logits_per_token": -2.626979637145996, "logits_per_char": -0.59704082662409, "num_chars": 22}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 198, "native_id": "9-270", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.722781658172607, "incorrect_loss_raw": 8.57012414932251, "correct_loss_per_char": 0.5516272612980434, "incorrect_loss_per_char": 0.6269861930455917, "correct_loss_per_token": 2.5742605527242026, "incorrect_loss_per_token": 3.6632523271772595, "correct_loss_uncond": -15.152334690093994, "incorrect_loss_uncond": -12.980406602223715}, "model_output": [{"sum_logits": -9.434331893920898, "num_tokens": 2, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -21.371540069580078, "logits_per_token": -4.717165946960449, "logits_per_char": -0.6289554595947265, "num_chars": 15}, {"sum_logits": -5.083465099334717, "num_tokens": 2, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -18.37354850769043, "logits_per_token": -2.5417325496673584, "logits_per_char": -0.3910357768719013, "num_chars": 13}, {"sum_logits": -7.722781658172607, "num_tokens": 3, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -22.8751163482666, "logits_per_token": -2.5742605527242026, "logits_per_char": -0.5516272612980434, "num_chars": 14}, {"sum_logits": -11.192575454711914, "num_tokens": 3, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -24.906503677368164, "logits_per_token": -3.730858484903971, "logits_per_char": -0.8609673426701472, "num_chars": 13}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 199, "native_id": "8-130", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.819320678710938, "incorrect_loss_raw": 7.928935368855794, "correct_loss_per_char": 0.9849433898925781, "incorrect_loss_per_char": 1.3043866316477457, "correct_loss_per_token": 3.9397735595703125, "incorrect_loss_per_token": 7.928935368855794, "correct_loss_uncond": -4.056092262268066, "incorrect_loss_uncond": -4.353801409403483}, "model_output": [{"sum_logits": -8.26546573638916, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.723397254943848, "logits_per_token": -8.26546573638916, "logits_per_char": -1.653093147277832, "num_chars": 5}, {"sum_logits": -7.677580833435059, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.531612396240234, "logits_per_token": -7.677580833435059, "logits_per_char": -1.2795968055725098, "num_chars": 6}, {"sum_logits": -11.819320678710938, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.875412940979004, "logits_per_token": -3.9397735595703125, "logits_per_char": -0.9849433898925781, "num_chars": 12}, {"sum_logits": -7.843759536743164, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.59320068359375, "logits_per_token": -7.843759536743164, "logits_per_char": -0.9804699420928955, "num_chars": 8}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 200, "native_id": "229", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.705219268798828, "incorrect_loss_raw": 14.704486211140951, "correct_loss_per_char": 0.7136812845865885, "incorrect_loss_per_char": 0.8207417297363282, "correct_loss_per_token": 2.676304817199707, "incorrect_loss_per_token": 5.951841460333931, "correct_loss_uncond": -6.71649169921875, "incorrect_loss_uncond": -7.791841506958008}, "model_output": [{"sum_logits": -25.207229614257812, "num_tokens": 3, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -31.803756713867188, "logits_per_token": -8.402409871419271, "logits_per_char": -1.0082891845703126, "num_chars": 25}, {"sum_logits": -5.477495193481445, "num_tokens": 2, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -14.849370956420898, "logits_per_token": -2.7387475967407227, "logits_per_char": -0.7824993133544922, "num_chars": 7}, {"sum_logits": -13.428733825683594, "num_tokens": 2, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -20.83585548400879, "logits_per_token": -6.714366912841797, "logits_per_char": -0.6714366912841797, "num_chars": 20}, {"sum_logits": -10.705219268798828, "num_tokens": 4, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -17.421710968017578, "logits_per_token": -2.676304817199707, "logits_per_char": -0.7136812845865885, "num_chars": 15}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 201, "native_id": "9-390", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 4.851330757141113, "incorrect_loss_raw": 6.36570421854655, "correct_loss_per_char": 0.9702661514282227, "incorrect_loss_per_char": 1.2443093723720975, "correct_loss_per_token": 4.851330757141113, "incorrect_loss_per_token": 6.36570421854655, "correct_loss_uncond": -5.945230484008789, "incorrect_loss_uncond": -3.7442169189453125}, "model_output": [{"sum_logits": -5.681941986083984, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -10.519886016845703, "logits_per_token": -5.681941986083984, "logits_per_char": -0.9469903310139974, "num_chars": 6}, {"sum_logits": -6.600912094116211, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -9.8740234375, "logits_per_token": -6.600912094116211, "logits_per_char": -1.6502280235290527, "num_chars": 4}, {"sum_logits": -6.814258575439453, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -9.935853958129883, "logits_per_token": -6.814258575439453, "logits_per_char": -1.1357097625732422, "num_chars": 6}, {"sum_logits": -4.851330757141113, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -10.796561241149902, "logits_per_token": -4.851330757141113, "logits_per_char": -0.9702661514282227, "num_chars": 5}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 202, "native_id": "8-107", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 19.31073760986328, "incorrect_loss_raw": 16.836398442586262, "correct_loss_per_char": 0.5081773055227179, "incorrect_loss_per_char": 0.7442205104660079, "correct_loss_per_token": 4.82768440246582, "incorrect_loss_per_token": 3.491078976222447, "correct_loss_uncond": -15.21197509765625, "incorrect_loss_uncond": -13.719270706176758}, "model_output": [{"sum_logits": -21.278545379638672, "num_tokens": 4, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -30.758100509643555, "logits_per_token": -5.319636344909668, "logits_per_char": -1.0639272689819337, "num_chars": 20}, {"sum_logits": -17.111385345458984, "num_tokens": 5, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -34.26408386230469, "logits_per_token": -3.422277069091797, "logits_per_char": -0.7777902429754083, "num_chars": 22}, {"sum_logits": -19.31073760986328, "num_tokens": 4, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -34.52271270751953, "logits_per_token": -4.82768440246582, "logits_per_char": -0.5081773055227179, "num_chars": 38}, {"sum_logits": -12.119264602661133, "num_tokens": 7, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -26.64482307434082, "logits_per_token": -1.731323514665876, "logits_per_char": -0.3909440194406817, "num_chars": 31}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 203, "native_id": "7-527", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 20.880210876464844, "incorrect_loss_raw": 15.82859738667806, "correct_loss_per_char": 0.8352084350585938, "incorrect_loss_per_char": 0.541471992964032, "correct_loss_per_token": 5.220052719116211, "incorrect_loss_per_token": 3.2276614824930827, "correct_loss_uncond": -14.705852508544922, "incorrect_loss_uncond": -15.192279179890951}, "model_output": [{"sum_logits": -20.880210876464844, "num_tokens": 4, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -35.586063385009766, "logits_per_token": -5.220052719116211, "logits_per_char": -0.8352084350585938, "num_chars": 25}, {"sum_logits": -21.868404388427734, "num_tokens": 5, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -33.33327102661133, "logits_per_token": -4.373680877685547, "logits_per_char": -0.6833876371383667, "num_chars": 32}, {"sum_logits": -21.900867462158203, "num_tokens": 5, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -35.21387481689453, "logits_per_token": -4.3801734924316404, "logits_per_char": -0.7552023262813173, "num_chars": 29}, {"sum_logits": -3.716520309448242, "num_tokens": 4, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -24.515483856201172, "logits_per_token": -0.9291300773620605, "logits_per_char": -0.18582601547241212, "num_chars": 20}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 204, "native_id": "7-333", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 26.37710952758789, "incorrect_loss_raw": 17.052589734395344, "correct_loss_per_char": 0.7536317007882255, "incorrect_loss_per_char": 0.6681340155098852, "correct_loss_per_token": 5.275421905517578, "incorrect_loss_per_token": 3.49028197394477, "correct_loss_uncond": -21.471332550048828, "incorrect_loss_uncond": -11.128442446390787}, "model_output": [{"sum_logits": -15.647503852844238, "num_tokens": 5, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -27.05678939819336, "logits_per_token": -3.1295007705688476, "logits_per_char": -0.5795371797349718, "num_chars": 27}, {"sum_logits": -18.434654235839844, "num_tokens": 6, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -21.96516990661621, "logits_per_token": -3.072442372639974, "logits_per_char": -0.7681105931599935, "num_chars": 24}, {"sum_logits": -26.37710952758789, "num_tokens": 5, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -47.84844207763672, "logits_per_token": -5.275421905517578, "logits_per_char": -0.7536317007882255, "num_chars": 35}, {"sum_logits": -17.075611114501953, "num_tokens": 4, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -35.52113723754883, "logits_per_token": -4.268902778625488, "logits_per_char": -0.6567542736346905, "num_chars": 26}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 205, "native_id": "9-44", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 11.38214111328125, "incorrect_loss_raw": 15.958872477213541, "correct_loss_per_char": 1.0347401012073865, "incorrect_loss_per_char": 1.1694696445657748, "correct_loss_per_token": 3.7940470377604165, "incorrect_loss_per_token": 6.04524474673801, "correct_loss_uncond": -7.848222732543945, "incorrect_loss_uncond": -8.311307271321615}, "model_output": [{"sum_logits": -13.06117057800293, "num_tokens": 2, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -20.60626220703125, "logits_per_token": -6.530585289001465, "logits_per_char": -1.1873791434548118, "num_chars": 11}, {"sum_logits": -16.04696273803711, "num_tokens": 3, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -25.759525299072266, "logits_per_token": -5.348987579345703, "logits_per_char": -1.0697975158691406, "num_chars": 15}, {"sum_logits": -11.38214111328125, "num_tokens": 3, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -19.230363845825195, "logits_per_token": -3.7940470377604165, "logits_per_char": -1.0347401012073865, "num_chars": 11}, {"sum_logits": -18.768484115600586, "num_tokens": 3, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -26.444751739501953, "logits_per_token": -6.256161371866862, "logits_per_char": -1.2512322743733724, "num_chars": 15}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 206, "native_id": "7-160", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 21.52983283996582, "incorrect_loss_raw": 15.646263122558594, "correct_loss_per_char": 1.1961018244425456, "incorrect_loss_per_char": 1.2161358731054204, "correct_loss_per_token": 7.176610946655273, "incorrect_loss_per_token": 7.056034511990017, "correct_loss_uncond": -9.203390121459961, "incorrect_loss_uncond": -3.4548606872558594}, "model_output": [{"sum_logits": -13.807746887207031, "num_tokens": 3, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -18.468456268310547, "logits_per_token": -4.602582295735677, "logits_per_char": -0.8629841804504395, "num_chars": 16}, {"sum_logits": -16.194000244140625, "num_tokens": 2, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -19.486576080322266, "logits_per_token": -8.097000122070312, "logits_per_char": -1.2456923264723558, "num_chars": 13}, {"sum_logits": -16.937042236328125, "num_tokens": 2, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -19.348339080810547, "logits_per_token": -8.468521118164062, "logits_per_char": -1.5397311123934658, "num_chars": 11}, {"sum_logits": -21.52983283996582, "num_tokens": 3, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -30.73322296142578, "logits_per_token": -7.176610946655273, "logits_per_char": -1.1961018244425456, "num_chars": 18}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 207, "native_id": "1942", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 35.97999572753906, "incorrect_loss_raw": 25.56779607137044, "correct_loss_per_char": 0.6541817405007102, "incorrect_loss_per_char": 0.6667881803115, "correct_loss_per_token": 4.497499465942383, "incorrect_loss_per_token": 3.475915681748163, "correct_loss_uncond": -10.441818237304688, "incorrect_loss_uncond": -12.970209757486979}, "model_output": [{"sum_logits": -29.67327117919922, "num_tokens": 8, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -42.45783996582031, "logits_per_token": -3.7091588973999023, "logits_per_char": -0.6055769628408004, "num_chars": 49}, {"sum_logits": -22.474788665771484, "num_tokens": 7, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -35.611209869384766, "logits_per_token": -3.2106840951102122, "logits_per_char": -0.5762766324556791, "num_chars": 39}, {"sum_logits": -24.555328369140625, "num_tokens": 7, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -37.54496765136719, "logits_per_token": -3.507904052734375, "logits_per_char": -0.8185109456380208, "num_chars": 30}, {"sum_logits": -35.97999572753906, "num_tokens": 8, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -46.42181396484375, "logits_per_token": -4.497499465942383, "logits_per_char": -0.6541817405007102, "num_chars": 55}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 208, "native_id": "9-597", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.964744567871094, "incorrect_loss_raw": 10.27417246500651, "correct_loss_per_char": 0.996794960715554, "incorrect_loss_per_char": 1.5581956466039022, "correct_loss_per_token": 5.482372283935547, "incorrect_loss_per_token": 10.27417246500651, "correct_loss_uncond": -5.831718444824219, "incorrect_loss_uncond": -3.417123476664225}, "model_output": [{"sum_logits": -12.977916717529297, "num_tokens": 1, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -12.70651912689209, "logits_per_token": -12.977916717529297, "logits_per_char": -1.622239589691162, "num_chars": 8}, {"sum_logits": -10.964744567871094, "num_tokens": 2, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -16.796463012695312, "logits_per_token": -5.482372283935547, "logits_per_char": -0.996794960715554, "num_chars": 11}, {"sum_logits": -10.956963539123535, "num_tokens": 1, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -13.183154106140137, "logits_per_token": -10.956963539123535, "logits_per_char": -2.191392707824707, "num_chars": 5}, {"sum_logits": -6.887637138366699, "num_tokens": 1, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -15.18421459197998, "logits_per_token": -6.887637138366699, "logits_per_char": -0.8609546422958374, "num_chars": 8}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 209, "native_id": "9-35", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.3411784172058105, "incorrect_loss_raw": 8.179455121358236, "correct_loss_per_char": 0.24117657873365614, "incorrect_loss_per_char": 1.0702815232453524, "correct_loss_per_token": 1.4470594724019368, "incorrect_loss_per_token": 4.089727560679118, "correct_loss_uncond": -10.954684734344482, "incorrect_loss_uncond": -5.664031982421875}, "model_output": [{"sum_logits": -4.3411784172058105, "num_tokens": 3, "num_tokens_all": 127, "is_greedy": false, "sum_logits_uncond": -15.295863151550293, "logits_per_token": -1.4470594724019368, "logits_per_char": -0.24117657873365614, "num_chars": 18}, {"sum_logits": -6.7160491943359375, "num_tokens": 2, "num_tokens_all": 126, "is_greedy": false, "sum_logits_uncond": -13.60191535949707, "logits_per_token": -3.3580245971679688, "logits_per_char": -1.119341532389323, "num_chars": 6}, {"sum_logits": -8.009689331054688, "num_tokens": 2, "num_tokens_all": 126, "is_greedy": false, "sum_logits_uncond": -13.531503677368164, "logits_per_token": -4.004844665527344, "logits_per_char": -1.001211166381836, "num_chars": 8}, {"sum_logits": -9.812626838684082, "num_tokens": 2, "num_tokens_all": 126, "is_greedy": false, "sum_logits_uncond": -14.397042274475098, "logits_per_token": -4.906313419342041, "logits_per_char": -1.090291870964898, "num_chars": 9}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 210, "native_id": "1161", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 37.36616516113281, "incorrect_loss_raw": 29.238835016886394, "correct_loss_per_char": 0.9833201358192846, "incorrect_loss_per_char": 1.0078226962765515, "correct_loss_per_token": 4.151796129014757, "incorrect_loss_per_token": 5.847767003377278, "correct_loss_uncond": -12.13818359375, "incorrect_loss_uncond": -11.806193669637045}, "model_output": [{"sum_logits": -23.3356876373291, "num_tokens": 5, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -32.96669006347656, "logits_per_token": -4.667137527465821, "logits_per_char": -0.8334174156188965, "num_chars": 28}, {"sum_logits": -37.36616516113281, "num_tokens": 9, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -49.50434875488281, "logits_per_token": -4.151796129014757, "logits_per_char": -0.9833201358192846, "num_chars": 38}, {"sum_logits": -38.30038070678711, "num_tokens": 5, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -50.152587890625, "logits_per_token": -7.660076141357422, "logits_per_char": -1.320702782992659, "num_chars": 29}, {"sum_logits": -26.08043670654297, "num_tokens": 5, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -40.01580810546875, "logits_per_token": -5.216087341308594, "logits_per_char": -0.869347890218099, "num_chars": 30}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 211, "native_id": "7-171", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 25.28568458557129, "incorrect_loss_raw": 10.858925978342691, "correct_loss_per_char": 0.8156672446958481, "incorrect_loss_per_char": 0.42751462167484466, "correct_loss_per_token": 4.214280764261882, "incorrect_loss_per_token": 2.3699244340260823, "correct_loss_uncond": -17.125146865844727, "incorrect_loss_uncond": -16.501601060231526}, "model_output": [{"sum_logits": -6.653195858001709, "num_tokens": 5, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -27.51702117919922, "logits_per_token": -1.3306391716003418, "logits_per_char": -0.3024179935455322, "num_chars": 22}, {"sum_logits": -25.28568458557129, "num_tokens": 6, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -42.410831451416016, "logits_per_token": -4.214280764261882, "logits_per_char": -0.8156672446958481, "num_chars": 31}, {"sum_logits": -14.03522777557373, "num_tokens": 5, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -28.863937377929688, "logits_per_token": -2.807045555114746, "logits_per_char": -0.5398164529066819, "num_chars": 26}, {"sum_logits": -11.888354301452637, "num_tokens": 4, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -25.70062255859375, "logits_per_token": -2.972088575363159, "logits_per_char": -0.44030941857231987, "num_chars": 27}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 212, "native_id": "1139", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 12.907837867736816, "incorrect_loss_raw": 14.93211555480957, "correct_loss_per_char": 0.806739866733551, "incorrect_loss_per_char": 1.2353460523817275, "correct_loss_per_token": 2.1513063112894693, "incorrect_loss_per_token": 6.310299873352051, "correct_loss_uncond": -10.647488594055176, "incorrect_loss_uncond": -4.613409678141276}, "model_output": [{"sum_logits": -20.80364227294922, "num_tokens": 3, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -23.577110290527344, "logits_per_token": -6.934547424316406, "logits_per_char": -1.040182113647461, "num_chars": 20}, {"sum_logits": -12.500080108642578, "num_tokens": 2, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -16.857107162475586, "logits_per_token": -6.250040054321289, "logits_per_char": -1.3888977898491754, "num_chars": 9}, {"sum_logits": -11.492624282836914, "num_tokens": 2, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -18.20235824584961, "logits_per_token": -5.746312141418457, "logits_per_char": -1.276958253648546, "num_chars": 9}, {"sum_logits": -12.907837867736816, "num_tokens": 6, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -23.555326461791992, "logits_per_token": -2.1513063112894693, "logits_per_char": -0.806739866733551, "num_chars": 16}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 213, "native_id": "1924", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 14.540848731994629, "incorrect_loss_raw": 17.08389155069987, "correct_loss_per_char": 1.2117373943328857, "incorrect_loss_per_char": 0.9328640381495158, "correct_loss_per_token": 4.846949577331543, "incorrect_loss_per_token": 6.651518662770589, "correct_loss_uncond": -8.321223258972168, "incorrect_loss_uncond": -6.736488978068034}, "model_output": [{"sum_logits": -11.131967544555664, "num_tokens": 2, "num_tokens_all": 105, "is_greedy": false, "sum_logits_uncond": -18.125995635986328, "logits_per_token": -5.565983772277832, "logits_per_char": -0.695747971534729, "num_chars": 16}, {"sum_logits": -14.540848731994629, "num_tokens": 3, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -22.862071990966797, "logits_per_token": -4.846949577331543, "logits_per_char": -1.2117373943328857, "num_chars": 12}, {"sum_logits": -22.68512535095215, "num_tokens": 4, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -30.221817016601562, "logits_per_token": -5.671281337738037, "logits_per_char": -1.1342562675476073, "num_chars": 20}, {"sum_logits": -17.434581756591797, "num_tokens": 2, "num_tokens_all": 105, "is_greedy": false, "sum_logits_uncond": -23.11332893371582, "logits_per_token": -8.717290878295898, "logits_per_char": -0.9685878753662109, "num_chars": 18}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 214, "native_id": "9-440", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.548750400543213, "incorrect_loss_raw": 13.355090141296387, "correct_loss_per_char": 0.4623958667119344, "incorrect_loss_per_char": 1.0128137296081607, "correct_loss_per_token": 2.7743752002716064, "incorrect_loss_per_token": 4.451696713765463, "correct_loss_uncond": -8.137412548065186, "incorrect_loss_uncond": -5.486540794372559}, "model_output": [{"sum_logits": -14.157903671264648, "num_tokens": 3, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -21.413646697998047, "logits_per_token": -4.719301223754883, "logits_per_char": -1.0890695131742036, "num_chars": 13}, {"sum_logits": -5.548750400543213, "num_tokens": 2, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -13.686162948608398, "logits_per_token": -2.7743752002716064, "logits_per_char": -0.4623958667119344, "num_chars": 12}, {"sum_logits": -15.847740173339844, "num_tokens": 3, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -18.146137237548828, "logits_per_token": -5.282580057779948, "logits_per_char": -1.320645014444987, "num_chars": 12}, {"sum_logits": -10.059626579284668, "num_tokens": 3, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -16.96510887145996, "logits_per_token": -3.353208859761556, "logits_per_char": -0.6287266612052917, "num_chars": 16}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 215, "native_id": "9-528", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 2.422616481781006, "incorrect_loss_raw": 3.7581287225087485, "correct_loss_per_char": 0.24226164817810059, "incorrect_loss_per_char": 0.3641993681589762, "correct_loss_per_token": 1.211308240890503, "incorrect_loss_per_token": 1.8790643612543743, "correct_loss_uncond": -10.434200763702393, "incorrect_loss_uncond": -11.192617177963257}, "model_output": [{"sum_logits": -4.1243109703063965, "num_tokens": 2, "num_tokens_all": 126, "is_greedy": false, "sum_logits_uncond": -15.306095123291016, "logits_per_token": -2.0621554851531982, "logits_per_char": -0.4124310970306396, "num_chars": 10}, {"sum_logits": -3.832456350326538, "num_tokens": 2, "num_tokens_all": 126, "is_greedy": false, "sum_logits_uncond": -15.186735153198242, "logits_per_token": -1.916228175163269, "logits_per_char": -0.348405122756958, "num_chars": 11}, {"sum_logits": -2.422616481781006, "num_tokens": 2, "num_tokens_all": 126, "is_greedy": true, "sum_logits_uncond": -12.856817245483398, "logits_per_token": -1.211308240890503, "logits_per_char": -0.24226164817810059, "num_chars": 10}, {"sum_logits": -3.3176188468933105, "num_tokens": 2, "num_tokens_all": 126, "is_greedy": false, "sum_logits_uncond": -14.359407424926758, "logits_per_token": -1.6588094234466553, "logits_per_char": -0.33176188468933104, "num_chars": 10}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 216, "native_id": "170", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 16.12051773071289, "incorrect_loss_raw": 12.353519439697266, "correct_loss_per_char": 1.0075323581695557, "incorrect_loss_per_char": 0.7047979541313953, "correct_loss_per_token": 4.030129432678223, "incorrect_loss_per_token": 2.6333527406056723, "correct_loss_uncond": -7.074701309204102, "incorrect_loss_uncond": -11.686267217000326}, "model_output": [{"sum_logits": -9.758931159973145, "num_tokens": 4, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -21.500383377075195, "logits_per_token": -2.439732789993286, "logits_per_char": -0.6099331974983215, "num_chars": 16}, {"sum_logits": -16.110477447509766, "num_tokens": 5, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -27.935710906982422, "logits_per_token": -3.222095489501953, "logits_per_char": -1.0740318298339844, "num_chars": 15}, {"sum_logits": -16.12051773071289, "num_tokens": 4, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -23.195219039916992, "logits_per_token": -4.030129432678223, "logits_per_char": -1.0075323581695557, "num_chars": 16}, {"sum_logits": -11.191149711608887, "num_tokens": 5, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -22.683265686035156, "logits_per_token": -2.2382299423217775, "logits_per_char": -0.43042883506188023, "num_chars": 26}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 217, "native_id": "395", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.310515403747559, "incorrect_loss_raw": 8.333603700002035, "correct_loss_per_char": 1.5517525672912598, "incorrect_loss_per_char": 1.433148165354653, "correct_loss_per_token": 4.655257701873779, "incorrect_loss_per_token": 6.65567668279012, "correct_loss_uncond": -5.937310218811035, "incorrect_loss_uncond": -5.5848565101623535}, "model_output": [{"sum_logits": -10.067562103271484, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -15.651657104492188, "logits_per_token": -5.033781051635742, "logits_per_char": -2.0135124206542967, "num_chars": 5}, {"sum_logits": -6.409653186798096, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -12.718417167663574, "logits_per_token": -6.409653186798096, "logits_per_char": -1.0682755311330159, "num_chars": 6}, {"sum_logits": -8.523595809936523, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -13.385306358337402, "logits_per_token": -8.523595809936523, "logits_per_char": -1.2176565442766463, "num_chars": 7}, {"sum_logits": -9.310515403747559, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -15.247825622558594, "logits_per_token": -4.655257701873779, "logits_per_char": -1.5517525672912598, "num_chars": 6}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 218, "native_id": "9-633", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 15.76956844329834, "incorrect_loss_raw": 13.558068116505941, "correct_loss_per_char": 0.6307827377319336, "incorrect_loss_per_char": 1.5184289483166247, "correct_loss_per_token": 5.25652281443278, "incorrect_loss_per_token": 6.779034058252971, "correct_loss_uncond": -12.446112632751465, "incorrect_loss_uncond": -0.6084504127502441}, "model_output": [{"sum_logits": -17.571571350097656, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.845720291137695, "logits_per_token": -8.785785675048828, "logits_per_char": -1.7571571350097657, "num_chars": 10}, {"sum_logits": -15.820762634277344, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.91702651977539, "logits_per_token": -7.910381317138672, "logits_per_char": -1.757862514919705, "num_chars": 9}, {"sum_logits": -7.281870365142822, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.736808776855469, "logits_per_token": -3.640935182571411, "logits_per_char": -1.0402671950204032, "num_chars": 7}, {"sum_logits": -15.76956844329834, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -28.215681076049805, "logits_per_token": -5.25652281443278, "logits_per_char": -0.6307827377319336, "num_chars": 25}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 219, "native_id": "9-504", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.947868347167969, "incorrect_loss_raw": 9.272907574971518, "correct_loss_per_char": 1.1053187052408855, "incorrect_loss_per_char": 1.2258682127352114, "correct_loss_per_token": 3.3159561157226562, "incorrect_loss_per_token": 9.272907574971518, "correct_loss_uncond": -7.944517135620117, "incorrect_loss_uncond": -5.20982551574707}, "model_output": [{"sum_logits": -9.947868347167969, "num_tokens": 3, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -17.892385482788086, "logits_per_token": -3.3159561157226562, "logits_per_char": -1.1053187052408855, "num_chars": 9}, {"sum_logits": -14.444765090942383, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -15.909667015075684, "logits_per_token": -14.444765090942383, "logits_per_char": -1.604973898993598, "num_chars": 9}, {"sum_logits": -8.028810501098633, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -14.607885360717773, "logits_per_token": -8.028810501098633, "logits_per_char": -1.003601312637329, "num_chars": 8}, {"sum_logits": -5.345147132873535, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -12.930646896362305, "logits_per_token": -5.345147132873535, "logits_per_char": -1.0690294265747071, "num_chars": 5}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 220, "native_id": "8-192", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.844604015350342, "incorrect_loss_raw": 5.710615237553914, "correct_loss_per_char": 0.9689208030700683, "incorrect_loss_per_char": 0.6521671822777501, "correct_loss_per_token": 4.844604015350342, "incorrect_loss_per_token": 5.075057625770569, "correct_loss_uncond": -4.7777886390686035, "incorrect_loss_uncond": -4.544265031814575}, "model_output": [{"sum_logits": -4.844604015350342, "num_tokens": 1, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -9.622392654418945, "logits_per_token": -4.844604015350342, "logits_per_char": -0.9689208030700683, "num_chars": 5}, {"sum_logits": -3.8133456707000732, "num_tokens": 2, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -11.09013557434082, "logits_per_token": -1.9066728353500366, "logits_per_char": -0.47666820883750916, "num_chars": 8}, {"sum_logits": -7.855668544769287, "num_tokens": 1, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -10.000263214111328, "logits_per_token": -7.855668544769287, "logits_per_char": -0.8728520605299208, "num_chars": 9}, {"sum_logits": -5.462831497192383, "num_tokens": 1, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -9.67424201965332, "logits_per_token": -5.462831497192383, "logits_per_char": -0.6069812774658203, "num_chars": 9}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 221, "native_id": "7-1108", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 29.96575927734375, "incorrect_loss_raw": 18.754762013753254, "correct_loss_per_char": 0.8813458610983456, "incorrect_loss_per_char": 0.651561275347913, "correct_loss_per_token": 5.99315185546875, "incorrect_loss_per_token": 3.847915013631185, "correct_loss_uncond": -4.4245147705078125, "incorrect_loss_uncond": -10.82027498881022}, "model_output": [{"sum_logits": -30.26791763305664, "num_tokens": 6, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -37.08943176269531, "logits_per_token": -5.044652938842773, "logits_per_char": -0.8647976466587611, "num_chars": 35}, {"sum_logits": -11.61232852935791, "num_tokens": 4, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -25.87139892578125, "logits_per_token": -2.9030821323394775, "logits_per_char": -0.4644931411743164, "num_chars": 25}, {"sum_logits": -14.384039878845215, "num_tokens": 4, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -25.764280319213867, "logits_per_token": -3.5960099697113037, "logits_per_char": -0.6253930382106615, "num_chars": 23}, {"sum_logits": -29.96575927734375, "num_tokens": 5, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -34.39027404785156, "logits_per_token": -5.99315185546875, "logits_per_char": -0.8813458610983456, "num_chars": 34}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 222, "native_id": "7-852", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.762334823608398, "incorrect_loss_raw": 14.775251706441244, "correct_loss_per_char": 0.48919703743674536, "incorrect_loss_per_char": 0.6367508052575468, "correct_loss_per_token": 2.15246696472168, "incorrect_loss_per_token": 2.9550503412882487, "correct_loss_uncond": -22.26334571838379, "incorrect_loss_uncond": -15.4251495997111}, "model_output": [{"sum_logits": -10.762334823608398, "num_tokens": 5, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -33.02568054199219, "logits_per_token": -2.15246696472168, "logits_per_char": -0.48919703743674536, "num_chars": 22}, {"sum_logits": -10.284771919250488, "num_tokens": 5, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -29.361125946044922, "logits_per_token": -2.0569543838500977, "logits_per_char": -0.4285321633021037, "num_chars": 24}, {"sum_logits": -17.317651748657227, "num_tokens": 5, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -28.82004165649414, "logits_per_token": -3.463530349731445, "logits_per_char": -0.7215688228607178, "num_chars": 24}, {"sum_logits": -16.723331451416016, "num_tokens": 5, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -32.42003631591797, "logits_per_token": -3.344666290283203, "logits_per_char": -0.7601514296098189, "num_chars": 22}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 223, "native_id": "761", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.836263656616211, "incorrect_loss_raw": 10.263675371805826, "correct_loss_per_char": 1.3151404062906902, "incorrect_loss_per_char": 1.2829594214757283, "correct_loss_per_token": 11.836263656616211, "incorrect_loss_per_token": 10.263675371805826, "correct_loss_uncond": -0.5726003646850586, "incorrect_loss_uncond": -0.3556219736735026}, "model_output": [{"sum_logits": -10.75968074798584, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -10.799592018127441, "logits_per_token": -10.75968074798584, "logits_per_char": -1.34496009349823, "num_chars": 8}, {"sum_logits": -11.107221603393555, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -10.438045501708984, "logits_per_token": -11.107221603393555, "logits_per_char": -1.3884027004241943, "num_chars": 8}, {"sum_logits": -8.924123764038086, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -10.620254516601562, "logits_per_token": -8.924123764038086, "logits_per_char": -1.1155154705047607, "num_chars": 8}, {"sum_logits": -11.836263656616211, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -12.40886402130127, "logits_per_token": -11.836263656616211, "logits_per_char": -1.3151404062906902, "num_chars": 9}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 224, "native_id": "8-318", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 13.816044807434082, "incorrect_loss_raw": 13.377712885538736, "correct_loss_per_char": 0.5313863387474647, "incorrect_loss_per_char": 0.6594435149170333, "correct_loss_per_token": 2.7632089614868165, "incorrect_loss_per_token": 3.344428221384684, "correct_loss_uncond": -15.413151741027832, "incorrect_loss_uncond": -14.176284154256185}, "model_output": [{"sum_logits": -15.255599975585938, "num_tokens": 4, "num_tokens_all": 130, "is_greedy": false, "sum_logits_uncond": -26.29206085205078, "logits_per_token": -3.8138999938964844, "logits_per_char": -0.8475333319769965, "num_chars": 18}, {"sum_logits": -13.816044807434082, "num_tokens": 5, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -29.229196548461914, "logits_per_token": -2.7632089614868165, "logits_per_char": -0.5313863387474647, "num_chars": 26}, {"sum_logits": -13.070327758789062, "num_tokens": 4, "num_tokens_all": 130, "is_greedy": false, "sum_logits_uncond": -28.00533676147461, "logits_per_token": -3.2675819396972656, "logits_per_char": -0.5941058072176847, "num_chars": 22}, {"sum_logits": -11.807210922241211, "num_tokens": 4, "num_tokens_all": 130, "is_greedy": false, "sum_logits_uncond": -28.364593505859375, "logits_per_token": -2.9518027305603027, "logits_per_char": -0.5366914055564187, "num_chars": 22}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 225, "native_id": "636", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.452248573303223, "incorrect_loss_raw": 7.801188627878825, "correct_loss_per_char": 0.8280276192559136, "incorrect_loss_per_char": 1.0613074189140683, "correct_loss_per_token": 2.4840828577677407, "incorrect_loss_per_token": 4.992867390314738, "correct_loss_uncond": -8.292431831359863, "incorrect_loss_uncond": -8.660032749176025}, "model_output": [{"sum_logits": -7.921046733856201, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -17.290300369262695, "logits_per_token": -3.9605233669281006, "logits_per_char": -1.1315781048366003, "num_chars": 7}, {"sum_logits": -8.92888069152832, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -16.931983947753906, "logits_per_token": -4.46444034576416, "logits_per_char": -1.11611008644104, "num_chars": 8}, {"sum_logits": -7.452248573303223, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -15.744680404663086, "logits_per_token": -2.4840828577677407, "logits_per_char": -0.8280276192559136, "num_chars": 9}, {"sum_logits": -6.553638458251953, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -15.16137981414795, "logits_per_token": -6.553638458251953, "logits_per_char": -0.9362340654645648, "num_chars": 7}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 226, "native_id": "7-444", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 17.60626220703125, "incorrect_loss_raw": 20.116200129191082, "correct_loss_per_char": 1.0356624827665442, "incorrect_loss_per_char": 1.0967492684374114, "correct_loss_per_token": 3.52125244140625, "incorrect_loss_per_token": 6.705400043063693, "correct_loss_uncond": -12.679512023925781, "incorrect_loss_uncond": -5.463250478108724}, "model_output": [{"sum_logits": -15.611246109008789, "num_tokens": 3, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -26.771835327148438, "logits_per_token": -5.20374870300293, "logits_per_char": -0.9183085946475759, "num_chars": 17}, {"sum_logits": -16.23478126525879, "num_tokens": 3, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -21.1319637298584, "logits_per_token": -5.411593755086263, "logits_per_char": -1.0146738290786743, "num_chars": 16}, {"sum_logits": -28.502573013305664, "num_tokens": 3, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -28.834552764892578, "logits_per_token": -9.500857671101889, "logits_per_char": -1.357265381585984, "num_chars": 21}, {"sum_logits": -17.60626220703125, "num_tokens": 5, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -30.28577423095703, "logits_per_token": -3.52125244140625, "logits_per_char": -1.0356624827665442, "num_chars": 17}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 227, "native_id": "8-57", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 33.522438049316406, "incorrect_loss_raw": 31.507686614990234, "correct_loss_per_char": 0.8176204402272295, "incorrect_loss_per_char": 0.8245543692588065, "correct_loss_per_token": 4.788919721330915, "incorrect_loss_per_token": 5.099780132152415, "correct_loss_uncond": -17.11446762084961, "incorrect_loss_uncond": -9.748554229736328}, "model_output": [{"sum_logits": -31.122085571289062, "num_tokens": 6, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -38.76332473754883, "logits_per_token": -5.187014261881511, "logits_per_char": -0.798002194135617, "num_chars": 39}, {"sum_logits": -33.522438049316406, "num_tokens": 7, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -50.636905670166016, "logits_per_token": -4.788919721330915, "logits_per_char": -0.8176204402272295, "num_chars": 41}, {"sum_logits": -34.512451171875, "num_tokens": 5, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -41.464515686035156, "logits_per_token": -6.902490234375, "logits_per_char": -1.1504150390625, "num_chars": 30}, {"sum_logits": -28.88852310180664, "num_tokens": 9, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -43.5408821105957, "logits_per_token": -3.209835900200738, "logits_per_char": -0.5252458745783025, "num_chars": 55}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 228, "native_id": "9-187", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.075244665145874, "incorrect_loss_raw": 6.400188604990642, "correct_loss_per_char": 0.25940558314323425, "incorrect_loss_per_char": 0.8021145968210129, "correct_loss_per_token": 2.075244665145874, "incorrect_loss_per_token": 4.865920225779216, "correct_loss_uncond": -10.631274461746216, "incorrect_loss_uncond": -7.744977156321208}, "model_output": [{"sum_logits": -6.904207706451416, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -16.36471176147461, "logits_per_token": -2.3014025688171387, "logits_per_char": -0.4931576933179583, "num_chars": 14}, {"sum_logits": -2.075244665145874, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": true, "sum_logits_uncond": -12.70651912689209, "logits_per_token": -2.075244665145874, "logits_per_char": -0.25940558314323425, "num_chars": 8}, {"sum_logits": -7.281140327453613, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -13.998348236083984, "logits_per_token": -7.281140327453613, "logits_per_char": -0.9101425409317017, "num_chars": 8}, {"sum_logits": -5.0152177810668945, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -12.072437286376953, "logits_per_token": -5.0152177810668945, "logits_per_char": -1.0030435562133788, "num_chars": 5}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 229, "native_id": "1345", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 17.325471878051758, "incorrect_loss_raw": 13.190027236938477, "correct_loss_per_char": 0.6416841436315466, "incorrect_loss_per_char": 0.8203011830647786, "correct_loss_per_token": 4.3313679695129395, "incorrect_loss_per_token": 4.313885847727458, "correct_loss_uncond": -11.952795028686523, "incorrect_loss_uncond": -11.126338958740234}, "model_output": [{"sum_logits": -18.271791458129883, "num_tokens": 4, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -33.603004455566406, "logits_per_token": -4.567947864532471, "logits_per_char": -0.9135895729064941, "num_chars": 20}, {"sum_logits": -17.325471878051758, "num_tokens": 4, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -29.27826690673828, "logits_per_token": -4.3313679695129395, "logits_per_char": -0.6416841436315466, "num_chars": 27}, {"sum_logits": -13.652612686157227, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -20.089529037475586, "logits_per_token": -4.550870895385742, "logits_per_char": -0.9101741790771485, "num_chars": 15}, {"sum_logits": -7.64567756652832, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -19.25656509399414, "logits_per_token": -3.82283878326416, "logits_per_char": -0.6371397972106934, "num_chars": 12}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 230, "native_id": "8-59", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 16.70244789123535, "incorrect_loss_raw": 17.880125681559246, "correct_loss_per_char": 0.7953546614873976, "incorrect_loss_per_char": 0.7972419952927975, "correct_loss_per_token": 3.3404895782470705, "incorrect_loss_per_token": 3.6309327973259813, "correct_loss_uncond": -4.92717170715332, "incorrect_loss_uncond": -6.269903818766276}, "model_output": [{"sum_logits": -15.639314651489258, "num_tokens": 4, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -21.01740264892578, "logits_per_token": -3.9098286628723145, "logits_per_char": -0.6255725860595703, "num_chars": 25}, {"sum_logits": -18.517282485961914, "num_tokens": 6, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -24.29971694946289, "logits_per_token": -3.086213747660319, "logits_per_char": -0.7406912994384766, "num_chars": 25}, {"sum_logits": -19.483779907226562, "num_tokens": 5, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -27.13296890258789, "logits_per_token": -3.8967559814453123, "logits_per_char": -1.0254621003803455, "num_chars": 19}, {"sum_logits": -16.70244789123535, "num_tokens": 5, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -21.629619598388672, "logits_per_token": -3.3404895782470705, "logits_per_char": -0.7953546614873976, "num_chars": 21}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 231, "native_id": "178", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.594672441482544, "incorrect_loss_raw": 5.465172449747722, "correct_loss_per_char": 0.648668110370636, "incorrect_loss_per_char": 1.1842249552408852, "correct_loss_per_token": 2.594672441482544, "incorrect_loss_per_token": 5.465172449747722, "correct_loss_uncond": -10.322937726974487, "incorrect_loss_uncond": -6.938432057698567}, "model_output": [{"sum_logits": -5.471427917480469, "num_tokens": 1, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -11.172591209411621, "logits_per_token": -5.471427917480469, "logits_per_char": -1.3678569793701172, "num_chars": 4}, {"sum_logits": -2.594672441482544, "num_tokens": 1, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -12.917610168457031, "logits_per_token": -2.594672441482544, "logits_per_char": -0.648668110370636, "num_chars": 4}, {"sum_logits": -5.120123863220215, "num_tokens": 1, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -13.236160278320312, "logits_per_token": -5.120123863220215, "logits_per_char": -1.024024772644043, "num_chars": 5}, {"sum_logits": -5.8039655685424805, "num_tokens": 1, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -12.802062034606934, "logits_per_token": -5.8039655685424805, "logits_per_char": -1.1607931137084961, "num_chars": 5}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 232, "native_id": "9-1186", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 19.806278228759766, "incorrect_loss_raw": 10.499547004699707, "correct_loss_per_char": 1.414734159197126, "incorrect_loss_per_char": 1.2815354735762985, "correct_loss_per_token": 4.951569557189941, "incorrect_loss_per_token": 6.205533769395617, "correct_loss_uncond": -0.6152458190917969, "incorrect_loss_uncond": -0.9897311528523763}, "model_output": [{"sum_logits": -19.806278228759766, "num_tokens": 4, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -20.421524047851562, "logits_per_token": -4.951569557189941, "logits_per_char": -1.414734159197126, "num_chars": 14}, {"sum_logits": -10.915754318237305, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -11.014161109924316, "logits_per_token": -5.457877159118652, "logits_per_char": -1.212861590915256, "num_chars": 9}, {"sum_logits": -11.13624382019043, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -12.657112121582031, "logits_per_token": -3.71208127339681, "logits_per_char": -0.742416254679362, "num_chars": 15}, {"sum_logits": -9.446642875671387, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -10.796561241149902, "logits_per_token": -9.446642875671387, "logits_per_char": -1.8893285751342774, "num_chars": 5}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 233, "native_id": "82", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.521461486816406, "incorrect_loss_raw": 5.491483370463054, "correct_loss_per_char": 0.7887802124023438, "incorrect_loss_per_char": 0.7748908069398667, "correct_loss_per_token": 5.521461486816406, "incorrect_loss_per_token": 5.491483370463054, "correct_loss_uncond": -6.233526229858398, "incorrect_loss_uncond": -8.042685508728027}, "model_output": [{"sum_logits": -6.36878776550293, "num_tokens": 1, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -13.200847625732422, "logits_per_token": -6.36878776550293, "logits_per_char": -1.0614646275838215, "num_chars": 6}, {"sum_logits": -2.3803539276123047, "num_tokens": 1, "num_tokens_all": 120, "is_greedy": true, "sum_logits_uncond": -14.65502643585205, "logits_per_token": -2.3803539276123047, "logits_per_char": -0.2975442409515381, "num_chars": 8}, {"sum_logits": -5.521461486816406, "num_tokens": 1, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -11.754987716674805, "logits_per_token": -5.521461486816406, "logits_per_char": -0.7887802124023438, "num_chars": 7}, {"sum_logits": -7.725308418273926, "num_tokens": 1, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -12.74663257598877, "logits_per_token": -7.725308418273926, "logits_per_char": -0.9656635522842407, "num_chars": 8}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 234, "native_id": "8-165", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 47.499603271484375, "incorrect_loss_raw": 34.909741719563804, "correct_loss_per_char": 0.7089493025594683, "incorrect_loss_per_char": 0.8941631366208274, "correct_loss_per_token": 3.6538156362680287, "incorrect_loss_per_token": 3.5879716455735746, "correct_loss_uncond": -13.715911865234375, "incorrect_loss_uncond": -4.773671468098958}, "model_output": [{"sum_logits": -33.15540313720703, "num_tokens": 9, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -41.53504180908203, "logits_per_token": -3.6839336819118924, "logits_per_char": -0.9751589158002067, "num_chars": 34}, {"sum_logits": -28.376873016357422, "num_tokens": 9, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -32.03106689453125, "logits_per_token": -3.1529858907063804, "logits_per_char": -0.7882464726765951, "num_chars": 36}, {"sum_logits": -47.499603271484375, "num_tokens": 13, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -61.21551513671875, "logits_per_token": -3.6538156362680287, "logits_per_char": -0.7089493025594683, "num_chars": 67}, {"sum_logits": -43.19694900512695, "num_tokens": 11, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -45.484130859375, "logits_per_token": -3.92699536410245, "logits_per_char": -0.9190840213856799, "num_chars": 47}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 235, "native_id": "404", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.473417282104492, "incorrect_loss_raw": 8.13037633895874, "correct_loss_per_char": 1.0473417282104491, "incorrect_loss_per_char": 0.6962639466680661, "correct_loss_per_token": 3.4911390940348306, "incorrect_loss_per_token": 5.388677040735881, "correct_loss_uncond": -7.439348220825195, "incorrect_loss_uncond": -7.60000212987264}, "model_output": [{"sum_logits": -7.4181036949157715, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -16.182392120361328, "logits_per_token": -3.7090518474578857, "logits_per_char": -0.674373063174161, "num_chars": 11}, {"sum_logits": -10.473417282104492, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -17.912765502929688, "logits_per_token": -3.4911390940348306, "logits_per_char": -1.0473417282104491, "num_chars": 10}, {"sum_logits": -7.9409332275390625, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -13.715766906738281, "logits_per_token": -7.9409332275390625, "logits_per_char": -0.6617444356282552, "num_chars": 12}, {"sum_logits": -9.032092094421387, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -17.29297637939453, "logits_per_token": -4.516046047210693, "logits_per_char": -0.7526743412017822, "num_chars": 12}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 236, "native_id": "279", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.298539161682129, "incorrect_loss_raw": 15.928740819295248, "correct_loss_per_char": 1.3298539161682128, "incorrect_loss_per_char": 1.7441974248641576, "correct_loss_per_token": 6.6492695808410645, "incorrect_loss_per_token": 10.0724409421285, "correct_loss_uncond": -1.038294792175293, "incorrect_loss_uncond": -2.3934653600056968}, "model_output": [{"sum_logits": -13.298539161682129, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -14.336833953857422, "logits_per_token": -6.6492695808410645, "logits_per_char": -1.3298539161682128, "num_chars": 10}, {"sum_logits": -12.648423194885254, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -13.90964412689209, "logits_per_token": -12.648423194885254, "logits_per_char": -2.529684638977051, "num_chars": 5}, {"sum_logits": -15.29272174835205, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -18.344898223876953, "logits_per_token": -7.646360874176025, "logits_per_char": -1.1763632114116962, "num_chars": 13}, {"sum_logits": -19.845077514648438, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -22.71207618713379, "logits_per_token": -9.922538757324219, "logits_per_char": -1.526544424203726, "num_chars": 13}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 237, "native_id": "9-532", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.541828155517578, "incorrect_loss_raw": 7.587190310160319, "correct_loss_per_char": 1.7083656311035156, "incorrect_loss_per_char": 1.1806670325142996, "correct_loss_per_token": 8.541828155517578, "incorrect_loss_per_token": 5.857139269510905, "correct_loss_uncond": -3.6541748046875, "incorrect_loss_uncond": -6.323829332987468}, "model_output": [{"sum_logits": -8.541828155517578, "num_tokens": 1, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -12.196002960205078, "logits_per_token": -8.541828155517578, "logits_per_char": -1.7083656311035156, "num_chars": 5}, {"sum_logits": -7.300172805786133, "num_tokens": 1, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -12.974124908447266, "logits_per_token": -7.300172805786133, "logits_per_char": -1.0428818293980189, "num_chars": 7}, {"sum_logits": -10.380306243896484, "num_tokens": 2, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -16.128332138061523, "logits_per_token": -5.190153121948242, "logits_per_char": -1.482900891985212, "num_chars": 7}, {"sum_logits": -5.08109188079834, "num_tokens": 1, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -12.63060188293457, "logits_per_token": -5.08109188079834, "logits_per_char": -1.016218376159668, "num_chars": 5}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 238, "native_id": "268", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 28.04390525817871, "incorrect_loss_raw": 18.571879704793293, "correct_loss_per_char": 0.683997689223871, "incorrect_loss_per_char": 0.536412721096488, "correct_loss_per_token": 3.115989473130968, "incorrect_loss_per_token": 2.5141221417321096, "correct_loss_uncond": -18.267370223999023, "incorrect_loss_uncond": -16.455342610677082}, "model_output": [{"sum_logits": -20.40552520751953, "num_tokens": 8, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -36.95421600341797, "logits_per_token": -2.5506906509399414, "logits_per_char": -0.5515006812843116, "num_chars": 37}, {"sum_logits": -13.869876861572266, "num_tokens": 6, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -30.10854148864746, "logits_per_token": -2.3116461435953775, "logits_per_char": -0.47827161591628503, "num_chars": 29}, {"sum_logits": -28.04390525817871, "num_tokens": 9, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -46.311275482177734, "logits_per_token": -3.115989473130968, "logits_per_char": -0.683997689223871, "num_chars": 41}, {"sum_logits": -21.440237045288086, "num_tokens": 8, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -38.0189094543457, "logits_per_token": -2.6800296306610107, "logits_per_char": -0.5794658660888672, "num_chars": 37}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 239, "native_id": "7-1018", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 34.010169982910156, "incorrect_loss_raw": 19.875094731648762, "correct_loss_per_char": 1.3080834608811598, "incorrect_loss_per_char": 0.766032985267409, "correct_loss_per_token": 5.668361663818359, "incorrect_loss_per_token": 4.6474841117858885, "correct_loss_uncond": -7.820064544677734, "incorrect_loss_uncond": -8.16761843363444}, "model_output": [{"sum_logits": -16.24949836730957, "num_tokens": 4, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -30.365856170654297, "logits_per_token": -4.062374591827393, "logits_per_char": -0.7064999290134596, "num_chars": 23}, {"sum_logits": -19.277374267578125, "num_tokens": 5, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -25.303359985351562, "logits_per_token": -3.855474853515625, "logits_per_char": -0.6647370437095905, "num_chars": 29}, {"sum_logits": -24.098411560058594, "num_tokens": 4, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -28.45892333984375, "logits_per_token": -6.024602890014648, "logits_per_char": -0.9268619830791767, "num_chars": 26}, {"sum_logits": -34.010169982910156, "num_tokens": 6, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -41.83023452758789, "logits_per_token": -5.668361663818359, "logits_per_char": -1.3080834608811598, "num_chars": 26}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 240, "native_id": "1756", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.865455627441406, "incorrect_loss_raw": 9.821455319722494, "correct_loss_per_char": 2.373091125488281, "incorrect_loss_per_char": 1.3554538938734266, "correct_loss_per_token": 5.932727813720703, "incorrect_loss_per_token": 5.94104544321696, "correct_loss_uncond": -4.787771224975586, "incorrect_loss_uncond": -4.798532485961914}, "model_output": [{"sum_logits": -11.865455627441406, "num_tokens": 2, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -16.653226852416992, "logits_per_token": -5.932727813720703, "logits_per_char": -2.373091125488281, "num_chars": 5}, {"sum_logits": -9.944914817810059, "num_tokens": 1, "num_tokens_all": 105, "is_greedy": false, "sum_logits_uncond": -12.99838638305664, "logits_per_token": -9.944914817810059, "logits_per_char": -1.9889829635620118, "num_chars": 5}, {"sum_logits": -8.230426788330078, "num_tokens": 2, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -14.942266464233398, "logits_per_token": -4.115213394165039, "logits_per_char": -0.8230426788330079, "num_chars": 10}, {"sum_logits": -11.289024353027344, "num_tokens": 3, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -15.919310569763184, "logits_per_token": -3.7630081176757812, "logits_per_char": -1.2543360392252605, "num_chars": 9}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 241, "native_id": "1137", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 8.955410957336426, "incorrect_loss_raw": 8.416250228881836, "correct_loss_per_char": 1.1194263696670532, "incorrect_loss_per_char": 1.2571041345596312, "correct_loss_per_token": 4.477705478668213, "incorrect_loss_per_token": 8.416250228881836, "correct_loss_uncond": -7.215287208557129, "incorrect_loss_uncond": -4.9938303629557295}, "model_output": [{"sum_logits": -8.853402137756348, "num_tokens": 1, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -13.946208000183105, "logits_per_token": -8.853402137756348, "logits_per_char": -1.1066752672195435, "num_chars": 8}, {"sum_logits": -8.202914237976074, "num_tokens": 1, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -11.676148414611816, "logits_per_token": -8.202914237976074, "logits_per_char": -1.6405828475952149, "num_chars": 5}, {"sum_logits": -8.192434310913086, "num_tokens": 1, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -14.607885360717773, "logits_per_token": -8.192434310913086, "logits_per_char": -1.0240542888641357, "num_chars": 8}, {"sum_logits": -8.955410957336426, "num_tokens": 2, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -16.170698165893555, "logits_per_token": -4.477705478668213, "logits_per_char": -1.1194263696670532, "num_chars": 8}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 242, "native_id": "7-203", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 27.138587951660156, "incorrect_loss_raw": 24.75859769185384, "correct_loss_per_char": 1.0051328870985243, "incorrect_loss_per_char": 0.891281024527637, "correct_loss_per_token": 4.523097991943359, "incorrect_loss_per_token": 3.9426458600967655, "correct_loss_uncond": -2.6401634216308594, "incorrect_loss_uncond": -4.846595128377278}, "model_output": [{"sum_logits": -21.147907257080078, "num_tokens": 6, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -28.59203338623047, "logits_per_token": -3.524651209513346, "logits_per_char": -0.8133810483492337, "num_chars": 26}, {"sum_logits": -23.15717315673828, "num_tokens": 7, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -27.719379425048828, "logits_per_token": -3.3081675938197543, "logits_per_char": -0.6616335187639509, "num_chars": 35}, {"sum_logits": -29.970712661743164, "num_tokens": 6, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -32.50416564941406, "logits_per_token": -4.995118776957194, "logits_per_char": -1.1988285064697266, "num_chars": 25}, {"sum_logits": -27.138587951660156, "num_tokens": 6, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -29.778751373291016, "logits_per_token": -4.523097991943359, "logits_per_char": -1.0051328870985243, "num_chars": 27}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 243, "native_id": "745", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.849895477294922, "incorrect_loss_raw": 5.490482489267985, "correct_loss_per_char": 0.5499850681849888, "incorrect_loss_per_char": 0.9747249497307671, "correct_loss_per_token": 3.849895477294922, "incorrect_loss_per_token": 5.490482489267985, "correct_loss_uncond": -9.16692066192627, "incorrect_loss_uncond": -6.007496039072673}, "model_output": [{"sum_logits": -5.368008136749268, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -12.073330879211426, "logits_per_token": -5.368008136749268, "logits_per_char": -1.0736016273498534, "num_chars": 5}, {"sum_logits": -3.849895477294922, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -13.016816139221191, "logits_per_token": -3.849895477294922, "logits_per_char": -0.5499850681849888, "num_chars": 7}, {"sum_logits": -4.2821760177612305, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -9.888992309570312, "logits_per_token": -4.2821760177612305, "logits_per_char": -0.7136960029602051, "num_chars": 6}, {"sum_logits": -6.821263313293457, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -12.531612396240234, "logits_per_token": -6.821263313293457, "logits_per_char": -1.136877218882243, "num_chars": 6}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 244, "native_id": "7-902", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.007780075073242, "incorrect_loss_raw": 18.05469290415446, "correct_loss_per_char": 0.611543337504069, "incorrect_loss_per_char": 0.9502469949554979, "correct_loss_per_token": 3.669260025024414, "incorrect_loss_per_token": 6.018230968051487, "correct_loss_uncond": -15.327791213989258, "incorrect_loss_uncond": -8.41398843129476}, "model_output": [{"sum_logits": -11.007780075073242, "num_tokens": 3, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -26.3355712890625, "logits_per_token": -3.669260025024414, "logits_per_char": -0.611543337504069, "num_chars": 18}, {"sum_logits": -20.494604110717773, "num_tokens": 3, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -26.409324645996094, "logits_per_token": -6.831534703572591, "logits_per_char": -1.0786633742483038, "num_chars": 19}, {"sum_logits": -19.79216957092285, "num_tokens": 3, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -24.97624969482422, "logits_per_token": -6.597389856974284, "logits_per_char": -1.0416931353117291, "num_chars": 19}, {"sum_logits": -13.877305030822754, "num_tokens": 3, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -28.020469665527344, "logits_per_token": -4.625768343607585, "logits_per_char": -0.7303844753064608, "num_chars": 19}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 245, "native_id": "1095", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 20.822555541992188, "incorrect_loss_raw": 26.362154642740887, "correct_loss_per_char": 0.7180191566204203, "incorrect_loss_per_char": 1.0842329358297682, "correct_loss_per_token": 5.205638885498047, "incorrect_loss_per_token": 6.590538660685222, "correct_loss_uncond": -17.26919174194336, "incorrect_loss_uncond": -6.362445831298828}, "model_output": [{"sum_logits": -20.822555541992188, "num_tokens": 4, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -38.09174728393555, "logits_per_token": -5.205638885498047, "logits_per_char": -0.7180191566204203, "num_chars": 29}, {"sum_logits": -22.792003631591797, "num_tokens": 4, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -32.81572341918945, "logits_per_token": -5.698000907897949, "logits_per_char": -1.0853335062662761, "num_chars": 21}, {"sum_logits": -26.350608825683594, "num_tokens": 4, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -36.5895881652832, "logits_per_token": -6.587652206420898, "logits_per_char": -1.0979420344034831, "num_chars": 24}, {"sum_logits": -29.943851470947266, "num_tokens": 4, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -28.768489837646484, "logits_per_token": -7.485962867736816, "logits_per_char": -1.0694232668195451, "num_chars": 28}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 246, "native_id": "7-163", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 23.276397705078125, "incorrect_loss_raw": 25.715588251749676, "correct_loss_per_char": 0.9698499043782552, "incorrect_loss_per_char": 1.0259860320685288, "correct_loss_per_token": 5.819099426269531, "incorrect_loss_per_token": 6.428897062937419, "correct_loss_uncond": -5.624576568603516, "incorrect_loss_uncond": -4.524841944376628}, "model_output": [{"sum_logits": -23.276397705078125, "num_tokens": 4, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -28.90097427368164, "logits_per_token": -5.819099426269531, "logits_per_char": -0.9698499043782552, "num_chars": 24}, {"sum_logits": -18.17314910888672, "num_tokens": 4, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -27.811546325683594, "logits_per_token": -4.54328727722168, "logits_per_char": -0.7269259643554687, "num_chars": 25}, {"sum_logits": -19.818708419799805, "num_tokens": 4, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -23.675113677978516, "logits_per_token": -4.954677104949951, "logits_per_char": -0.900850382718173, "num_chars": 22}, {"sum_logits": -39.1549072265625, "num_tokens": 4, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -39.2346305847168, "logits_per_token": -9.788726806640625, "logits_per_char": -1.4501817491319444, "num_chars": 27}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 247, "native_id": "9-858", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.250890731811523, "incorrect_loss_raw": 8.833088239034018, "correct_loss_per_char": 1.1563613414764404, "incorrect_loss_per_char": 1.0848016920543853, "correct_loss_per_token": 4.625445365905762, "incorrect_loss_per_token": 8.833088239034018, "correct_loss_uncond": -8.139707565307617, "incorrect_loss_uncond": -4.8007857004801435}, "model_output": [{"sum_logits": -5.863345146179199, "num_tokens": 1, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -13.008694648742676, "logits_per_token": -5.863345146179199, "logits_per_char": -0.837620735168457, "num_chars": 7}, {"sum_logits": -8.241155624389648, "num_tokens": 1, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -14.572688102722168, "logits_per_token": -8.241155624389648, "logits_per_char": -1.1773079463413783, "num_chars": 7}, {"sum_logits": -9.250890731811523, "num_tokens": 2, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -17.39059829711914, "logits_per_token": -4.625445365905762, "logits_per_char": -1.1563613414764404, "num_chars": 8}, {"sum_logits": -12.394763946533203, "num_tokens": 1, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -13.320239067077637, "logits_per_token": -12.394763946533203, "logits_per_char": -1.2394763946533203, "num_chars": 10}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 248, "native_id": "1530", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.064823150634766, "incorrect_loss_raw": 9.968945503234863, "correct_loss_per_char": 1.5108038584391277, "incorrect_loss_per_char": 1.0868280307211058, "correct_loss_per_token": 9.064823150634766, "incorrect_loss_per_token": 9.968945503234863, "correct_loss_uncond": -3.76633358001709, "incorrect_loss_uncond": -3.9637486139933267}, "model_output": [{"sum_logits": -10.648140907287598, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -14.039393424987793, "logits_per_token": -10.648140907287598, "logits_per_char": -1.3310176134109497, "num_chars": 8}, {"sum_logits": -8.844460487365723, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -13.420456886291504, "logits_per_token": -8.844460487365723, "logits_per_char": -0.9827178319295248, "num_chars": 9}, {"sum_logits": -10.41423511505127, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -14.338232040405273, "logits_per_token": -10.41423511505127, "logits_per_char": -0.9467486468228427, "num_chars": 11}, {"sum_logits": -9.064823150634766, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -12.831156730651855, "logits_per_token": -9.064823150634766, "logits_per_char": -1.5108038584391277, "num_chars": 6}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 249, "native_id": "9-993", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.151135444641113, "incorrect_loss_raw": 7.788774490356445, "correct_loss_per_char": 1.0251892407735188, "incorrect_loss_per_char": 1.2981290817260742, "correct_loss_per_token": 6.151135444641113, "incorrect_loss_per_token": 7.788774490356445, "correct_loss_uncond": -5.905396461486816, "incorrect_loss_uncond": -4.653595606486003}, "model_output": [{"sum_logits": -9.596403121948242, "num_tokens": 1, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -15.233739852905273, "logits_per_token": -9.596403121948242, "logits_per_char": -1.599400520324707, "num_chars": 6}, {"sum_logits": -6.196686744689941, "num_tokens": 1, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -9.888992309570312, "logits_per_token": -6.196686744689941, "logits_per_char": -1.0327811241149902, "num_chars": 6}, {"sum_logits": -7.573233604431152, "num_tokens": 1, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -12.204378128051758, "logits_per_token": -7.573233604431152, "logits_per_char": -1.2622056007385254, "num_chars": 6}, {"sum_logits": -6.151135444641113, "num_tokens": 1, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -12.05653190612793, "logits_per_token": -6.151135444641113, "logits_per_char": -1.0251892407735188, "num_chars": 6}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 250, "native_id": "8-340", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.353462219238281, "incorrect_loss_raw": 10.800294558207193, "correct_loss_per_char": 0.8345913887023926, "incorrect_loss_per_char": 0.8054208240811788, "correct_loss_per_token": 6.676731109619141, "incorrect_loss_per_token": 4.28930123647054, "correct_loss_uncond": -6.728420257568359, "incorrect_loss_uncond": -6.643679618835449}, "model_output": [{"sum_logits": -10.382955551147461, "num_tokens": 3, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -16.137557983398438, "logits_per_token": -3.4609851837158203, "logits_per_char": -0.7416396822248187, "num_chars": 14}, {"sum_logits": -9.612273216247559, "num_tokens": 3, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -15.48298454284668, "logits_per_token": -3.2040910720825195, "logits_per_char": -0.6408182144165039, "num_chars": 15}, {"sum_logits": -12.405654907226562, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -20.711380004882812, "logits_per_token": -6.202827453613281, "logits_per_char": -1.0338045756022136, "num_chars": 12}, {"sum_logits": -13.353462219238281, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -20.08188247680664, "logits_per_token": -6.676731109619141, "logits_per_char": -0.8345913887023926, "num_chars": 16}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 251, "native_id": "3", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.262568473815918, "incorrect_loss_raw": 11.248438040415445, "correct_loss_per_char": 1.0262568473815918, "incorrect_loss_per_char": 1.709741380479601, "correct_loss_per_token": 5.131284236907959, "incorrect_loss_per_token": 8.919358730316162, "correct_loss_uncond": -5.053018569946289, "incorrect_loss_uncond": 0.9818167686462402}, "model_output": [{"sum_logits": -11.879877090454102, "num_tokens": 1, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -10.230915069580078, "logits_per_token": -11.879877090454102, "logits_per_char": -1.4849846363067627, "num_chars": 8}, {"sum_logits": -10.262568473815918, "num_tokens": 2, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -15.315587043762207, "logits_per_token": -5.131284236907959, "logits_per_char": -1.0262568473815918, "num_chars": 10}, {"sum_logits": -7.890961170196533, "num_tokens": 1, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -8.184921264648438, "logits_per_token": -7.890961170196533, "logits_per_char": -1.3151601950327556, "num_chars": 6}, {"sum_logits": -13.974475860595703, "num_tokens": 2, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -12.384027481079102, "logits_per_token": -6.987237930297852, "logits_per_char": -2.3290793100992837, "num_chars": 6}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 252, "native_id": "1074", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.856602668762207, "incorrect_loss_raw": 7.040258248647054, "correct_loss_per_char": 0.9761004447937012, "incorrect_loss_per_char": 1.1287411703003778, "correct_loss_per_token": 5.856602668762207, "incorrect_loss_per_token": 7.040258248647054, "correct_loss_uncond": -6.675009727478027, "incorrect_loss_uncond": -5.315125306447347}, "model_output": [{"sum_logits": -5.454128265380859, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -12.073330879211426, "logits_per_token": -5.454128265380859, "logits_per_char": -1.090825653076172, "num_chars": 5}, {"sum_logits": -7.577037334442139, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -12.59320068359375, "logits_per_token": -7.577037334442139, "logits_per_char": -0.9471296668052673, "num_chars": 8}, {"sum_logits": -5.856602668762207, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -12.531612396240234, "logits_per_token": -5.856602668762207, "logits_per_char": -0.9761004447937012, "num_chars": 6}, {"sum_logits": -8.089609146118164, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -12.399619102478027, "logits_per_token": -8.089609146118164, "logits_per_char": -1.348268191019694, "num_chars": 6}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 253, "native_id": "9-431", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.560545921325684, "incorrect_loss_raw": 9.40429973602295, "correct_loss_per_char": 0.5560545921325684, "incorrect_loss_per_char": 0.9355111828556767, "correct_loss_per_token": 5.560545921325684, "incorrect_loss_per_token": 6.77843549516466, "correct_loss_uncond": -8.31057071685791, "incorrect_loss_uncond": -4.0583845774332685}, "model_output": [{"sum_logits": -5.560545921325684, "num_tokens": 1, "num_tokens_all": 123, "is_greedy": false, "sum_logits_uncond": -13.871116638183594, "logits_per_token": -5.560545921325684, "logits_per_char": -0.5560545921325684, "num_chars": 10}, {"sum_logits": -7.464715003967285, "num_tokens": 1, "num_tokens_all": 123, "is_greedy": false, "sum_logits_uncond": -13.439889907836914, "logits_per_token": -7.464715003967285, "logits_per_char": -0.8294127782185873, "num_chars": 9}, {"sum_logits": -11.816389083862305, "num_tokens": 3, "num_tokens_all": 125, "is_greedy": false, "sum_logits_uncond": -12.851141929626465, "logits_per_token": -3.938796361287435, "logits_per_char": -0.9846990903218588, "num_chars": 12}, {"sum_logits": -8.931795120239258, "num_tokens": 1, "num_tokens_all": 123, "is_greedy": false, "sum_logits_uncond": -14.097021102905273, "logits_per_token": -8.931795120239258, "logits_per_char": -0.9924216800265842, "num_chars": 9}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 254, "native_id": "9-638", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.797323226928711, "incorrect_loss_raw": 8.13746945063273, "correct_loss_per_char": 0.2531548817952474, "incorrect_loss_per_char": 0.5021056229819353, "correct_loss_per_token": 3.797323226928711, "incorrect_loss_per_token": 4.068734725316365, "correct_loss_uncond": -9.893261909484863, "incorrect_loss_uncond": -9.544760862986246}, "model_output": [{"sum_logits": -4.244892597198486, "num_tokens": 2, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -16.617727279663086, "logits_per_token": -2.122446298599243, "logits_per_char": -0.3858993270180442, "num_chars": 11}, {"sum_logits": -3.797323226928711, "num_tokens": 1, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -13.690585136413574, "logits_per_token": -3.797323226928711, "logits_per_char": -0.2531548817952474, "num_chars": 15}, {"sum_logits": -10.627961158752441, "num_tokens": 2, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -20.53169059753418, "logits_per_token": -5.313980579376221, "logits_per_char": -0.5904422865973579, "num_chars": 18}, {"sum_logits": -9.539554595947266, "num_tokens": 2, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -15.897273063659668, "logits_per_token": -4.769777297973633, "logits_per_char": -0.5299752553304037, "num_chars": 18}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 255, "native_id": "9-352", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.186563491821289, "incorrect_loss_raw": 15.154982248942057, "correct_loss_per_char": 0.6366602182388306, "incorrect_loss_per_char": 1.0234558551060167, "correct_loss_per_token": 3.3955211639404297, "incorrect_loss_per_token": 5.051660749647351, "correct_loss_uncond": -9.375484466552734, "incorrect_loss_uncond": -3.071129480997721}, "model_output": [{"sum_logits": -16.936729431152344, "num_tokens": 3, "num_tokens_all": 124, "is_greedy": false, "sum_logits_uncond": -17.257184982299805, "logits_per_token": -5.645576477050781, "logits_per_char": -1.3028253408578725, "num_chars": 13}, {"sum_logits": -12.090503692626953, "num_tokens": 3, "num_tokens_all": 124, "is_greedy": false, "sum_logits_uncond": -16.46533203125, "logits_per_token": -4.030167897542317, "logits_per_char": -0.6716946495903863, "num_chars": 18}, {"sum_logits": -16.437713623046875, "num_tokens": 3, "num_tokens_all": 124, "is_greedy": false, "sum_logits_uncond": -20.95581817626953, "logits_per_token": -5.479237874348958, "logits_per_char": -1.0958475748697916, "num_chars": 15}, {"sum_logits": -10.186563491821289, "num_tokens": 3, "num_tokens_all": 124, "is_greedy": false, "sum_logits_uncond": -19.562047958374023, "logits_per_token": -3.3955211639404297, "logits_per_char": -0.6366602182388306, "num_chars": 16}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 256, "native_id": "226", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.957669258117676, "incorrect_loss_raw": 10.600075085957846, "correct_loss_per_char": 1.0957669258117675, "incorrect_loss_per_char": 0.7813404951340113, "correct_loss_per_token": 5.478834629058838, "incorrect_loss_per_token": 3.8528873125712075, "correct_loss_uncond": -4.5691118240356445, "incorrect_loss_uncond": -6.425695101420085}, "model_output": [{"sum_logits": -6.427140235900879, "num_tokens": 2, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -14.062032699584961, "logits_per_token": -3.2135701179504395, "logits_per_char": -0.6427140235900879, "num_chars": 10}, {"sum_logits": -8.007282257080078, "num_tokens": 2, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -14.59238052368164, "logits_per_token": -4.003641128540039, "logits_per_char": -0.6159447890061599, "num_chars": 13}, {"sum_logits": -17.365802764892578, "num_tokens": 4, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -22.422897338867188, "logits_per_token": -4.3414506912231445, "logits_per_char": -1.0853626728057861, "num_chars": 16}, {"sum_logits": -10.957669258117676, "num_tokens": 2, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -15.52678108215332, "logits_per_token": -5.478834629058838, "logits_per_char": -1.0957669258117675, "num_chars": 10}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 257, "native_id": "9-132", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 16.66541862487793, "incorrect_loss_raw": 8.127334912618002, "correct_loss_per_char": 1.1903870446341378, "incorrect_loss_per_char": 0.9707850319998604, "correct_loss_per_token": 5.555139541625977, "incorrect_loss_per_token": 6.4993917147318525, "correct_loss_uncond": -5.29084587097168, "incorrect_loss_uncond": -5.288205464680989}, "model_output": [{"sum_logits": -16.66541862487793, "num_tokens": 3, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -21.95626449584961, "logits_per_token": -5.555139541625977, "logits_per_char": -1.1903870446341378, "num_chars": 14}, {"sum_logits": -8.521770477294922, "num_tokens": 1, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -12.59320068359375, "logits_per_token": -8.521770477294922, "logits_per_char": -1.0652213096618652, "num_chars": 8}, {"sum_logits": -6.0925750732421875, "num_tokens": 1, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -13.181275367736816, "logits_per_token": -6.0925750732421875, "logits_per_char": -0.8703678676060268, "num_chars": 7}, {"sum_logits": -9.767659187316895, "num_tokens": 2, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -14.472145080566406, "logits_per_token": -4.883829593658447, "logits_per_char": -0.9767659187316895, "num_chars": 10}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 258, "native_id": "9-222", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.101806640625, "incorrect_loss_raw": 20.339811325073242, "correct_loss_per_char": 0.6530474494485294, "incorrect_loss_per_char": 1.108952980308266, "correct_loss_per_token": 2.77545166015625, "incorrect_loss_per_token": 4.726404158274333, "correct_loss_uncond": -14.448211669921875, "incorrect_loss_uncond": -1.9170827865600586}, "model_output": [{"sum_logits": -18.68651580810547, "num_tokens": 4, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -28.671188354492188, "logits_per_token": -4.671628952026367, "logits_per_char": -0.7474606323242188, "num_chars": 25}, {"sum_logits": -21.512920379638672, "num_tokens": 5, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -22.69550132751465, "logits_per_token": -4.302584075927735, "logits_per_char": -0.9778600172563032, "num_chars": 22}, {"sum_logits": -20.819997787475586, "num_tokens": 4, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -15.403992652893066, "logits_per_token": -5.2049994468688965, "logits_per_char": -1.601538291344276, "num_chars": 13}, {"sum_logits": -11.101806640625, "num_tokens": 4, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -25.550018310546875, "logits_per_token": -2.77545166015625, "logits_per_char": -0.6530474494485294, "num_chars": 17}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 259, "native_id": "9-105", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 20.828380584716797, "incorrect_loss_raw": 19.341875076293945, "correct_loss_per_char": 0.7714215031376591, "incorrect_loss_per_char": 0.9886183069034358, "correct_loss_per_token": 4.165676116943359, "incorrect_loss_per_token": 4.991617605421278, "correct_loss_uncond": -7.601722717285156, "incorrect_loss_uncond": -3.7881971995035806}, "model_output": [{"sum_logits": -24.713260650634766, "num_tokens": 4, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -33.298065185546875, "logits_per_token": -6.178315162658691, "logits_per_char": -0.8237753550211588, "num_chars": 30}, {"sum_logits": -17.3068790435791, "num_tokens": 5, "num_tokens_all": 123, "is_greedy": false, "sum_logits_uncond": -24.12930679321289, "logits_per_token": -3.4613758087158204, "logits_per_char": -0.9108883707146895, "num_chars": 19}, {"sum_logits": -16.00548553466797, "num_tokens": 3, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -11.962844848632812, "logits_per_token": -5.335161844889323, "logits_per_char": -1.2311911949744592, "num_chars": 13}, {"sum_logits": -20.828380584716797, "num_tokens": 5, "num_tokens_all": 123, "is_greedy": false, "sum_logits_uncond": -28.430103302001953, "logits_per_token": -4.165676116943359, "logits_per_char": -0.7714215031376591, "num_chars": 27}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 260, "native_id": "7-459", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 4.041422367095947, "incorrect_loss_raw": 6.066263675689697, "correct_loss_per_char": 0.8082844734191894, "incorrect_loss_per_char": 1.1218667427698772, "correct_loss_per_token": 4.041422367095947, "incorrect_loss_per_token": 6.066263675689697, "correct_loss_uncond": -10.010131359100342, "incorrect_loss_uncond": -5.69445276260376}, "model_output": [{"sum_logits": -7.7784247398376465, "num_tokens": 1, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -11.677422523498535, "logits_per_token": -7.7784247398376465, "logits_per_char": -1.9446061849594116, "num_chars": 4}, {"sum_logits": -4.041422367095947, "num_tokens": 1, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -14.051553726196289, "logits_per_token": -4.041422367095947, "logits_per_char": -0.8082844734191894, "num_chars": 5}, {"sum_logits": -4.7371602058410645, "num_tokens": 1, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -9.888992309570312, "logits_per_token": -4.7371602058410645, "logits_per_char": -0.7895267009735107, "num_chars": 6}, {"sum_logits": -5.683206081390381, "num_tokens": 1, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -13.715734481811523, "logits_per_token": -5.683206081390381, "logits_per_char": -0.631467342376709, "num_chars": 9}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 261, "native_id": "9-881", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.1688711643218994, "incorrect_loss_raw": 6.3004811604817705, "correct_loss_per_char": 0.15491936888013566, "incorrect_loss_per_char": 0.6723171642848423, "correct_loss_per_token": 2.1688711643218994, "incorrect_loss_per_token": 4.650863223605685, "correct_loss_uncond": -12.537039518356323, "incorrect_loss_uncond": -9.00328509012858}, "model_output": [{"sum_logits": -7.423280715942383, "num_tokens": 3, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -17.0803165435791, "logits_per_token": -2.4744269053141275, "logits_per_char": -0.5302343368530273, "num_chars": 14}, {"sum_logits": -2.1688711643218994, "num_tokens": 1, "num_tokens_all": 118, "is_greedy": true, "sum_logits_uncond": -14.705910682678223, "logits_per_token": -2.1688711643218994, "logits_per_char": -0.15491936888013566, "num_chars": 14}, {"sum_logits": -3.5704755783081055, "num_tokens": 1, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -13.887076377868652, "logits_per_token": -3.5704755783081055, "logits_per_char": -0.35704755783081055, "num_chars": 10}, {"sum_logits": -7.907687187194824, "num_tokens": 1, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -14.9439058303833, "logits_per_token": -7.907687187194824, "logits_per_char": -1.1296695981706892, "num_chars": 7}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 262, "native_id": "280", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 15.020406723022461, "incorrect_loss_raw": 16.45170529683431, "correct_loss_per_char": 1.1554159017709584, "incorrect_loss_per_char": 1.2863144451458508, "correct_loss_per_token": 7.5102033615112305, "incorrect_loss_per_token": 6.844470977783203, "correct_loss_uncond": -4.845697402954102, "incorrect_loss_uncond": -4.644325256347656}, "model_output": [{"sum_logits": -16.16106605529785, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -18.409473419189453, "logits_per_token": -8.080533027648926, "logits_per_char": -1.243158927330604, "num_chars": 13}, {"sum_logits": -15.020406723022461, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -19.866104125976562, "logits_per_token": -7.5102033615112305, "logits_per_char": -1.1554159017709584, "num_chars": 13}, {"sum_logits": -16.576580047607422, "num_tokens": 4, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -23.537809371948242, "logits_per_token": -4.1441450119018555, "logits_per_char": -1.1051053365071615, "num_chars": 15}, {"sum_logits": -16.617469787597656, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -21.340808868408203, "logits_per_token": -8.308734893798828, "logits_per_char": -1.5106790715997869, "num_chars": 11}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 263, "native_id": "187", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.870880126953125, "incorrect_loss_raw": 2.5357811053593955, "correct_loss_per_char": 0.6958400181361607, "incorrect_loss_per_char": 0.30559173778251364, "correct_loss_per_token": 4.870880126953125, "incorrect_loss_per_token": 2.212668001651764, "correct_loss_uncond": -7.50572395324707, "incorrect_loss_uncond": -9.520407478014628}, "model_output": [{"sum_logits": -2.8304545879364014, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -14.02598762512207, "logits_per_token": -2.8304545879364014, "logits_per_char": -0.3144949542151557, "num_chars": 9}, {"sum_logits": -4.870880126953125, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -12.376604080200195, "logits_per_token": -4.870880126953125, "logits_per_char": -0.6958400181361607, "num_chars": 7}, {"sum_logits": -2.838210105895996, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -9.888992309570312, "logits_per_token": -2.838210105895996, "logits_per_char": -0.4730350176493327, "num_chars": 6}, {"sum_logits": -1.9386786222457886, "num_tokens": 2, "num_tokens_all": 114, "is_greedy": true, "sum_logits_uncond": -12.253585815429688, "logits_per_token": -0.9693393111228943, "logits_per_char": -0.12924524148305258, "num_chars": 15}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 264, "native_id": "8-253", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 36.23849868774414, "incorrect_loss_raw": 18.170162518819172, "correct_loss_per_char": 1.24960340302566, "incorrect_loss_per_char": 0.9979504900767392, "correct_loss_per_token": 4.529812335968018, "incorrect_loss_per_token": 4.152617030673557, "correct_loss_uncond": -4.099113464355469, "incorrect_loss_uncond": -5.647848447163899}, "model_output": [{"sum_logits": -21.23919677734375, "num_tokens": 4, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -28.018043518066406, "logits_per_token": -5.3097991943359375, "logits_per_char": -1.0619598388671876, "num_chars": 20}, {"sum_logits": -23.65427017211914, "num_tokens": 6, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -30.667682647705078, "logits_per_token": -3.942378362019857, "logits_per_char": -1.2449615880062705, "num_chars": 19}, {"sum_logits": -36.23849868774414, "num_tokens": 8, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -40.33761215209961, "logits_per_token": -4.529812335968018, "logits_per_char": -1.24960340302566, "num_chars": 29}, {"sum_logits": -9.617020606994629, "num_tokens": 3, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -12.768306732177734, "logits_per_token": -3.2056735356648765, "logits_per_char": -0.6869300433567592, "num_chars": 14}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 265, "native_id": "9-482", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.268592834472656, "incorrect_loss_raw": 13.28343137105306, "correct_loss_per_char": 0.5452113432042739, "incorrect_loss_per_char": 0.7210735493202242, "correct_loss_per_token": 2.317148208618164, "incorrect_loss_per_token": 3.320857842763265, "correct_loss_uncond": -10.745777130126953, "incorrect_loss_uncond": -6.079195022583008}, "model_output": [{"sum_logits": -12.978433609008789, "num_tokens": 4, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -16.955955505371094, "logits_per_token": -3.2446084022521973, "logits_per_char": -0.8111521005630493, "num_chars": 16}, {"sum_logits": -11.972862243652344, "num_tokens": 4, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -19.713260650634766, "logits_per_token": -2.993215560913086, "logits_per_char": -0.7042860143324908, "num_chars": 17}, {"sum_logits": -14.898998260498047, "num_tokens": 4, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -21.418663024902344, "logits_per_token": -3.7247495651245117, "logits_per_char": -0.6477825330651324, "num_chars": 23}, {"sum_logits": -9.268592834472656, "num_tokens": 4, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -20.01436996459961, "logits_per_token": -2.317148208618164, "logits_per_char": -0.5452113432042739, "num_chars": 17}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 266, "native_id": "496", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 23.678985595703125, "incorrect_loss_raw": 8.77534818649292, "correct_loss_per_char": 0.8769994665075231, "incorrect_loss_per_char": 0.8489091996793393, "correct_loss_per_token": 3.946497599283854, "incorrect_loss_per_token": 4.38767409324646, "correct_loss_uncond": -2.368377685546875, "incorrect_loss_uncond": -7.1567966143290205}, "model_output": [{"sum_logits": -23.678985595703125, "num_tokens": 6, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -26.04736328125, "logits_per_token": -3.946497599283854, "logits_per_char": -0.8769994665075231, "num_chars": 27}, {"sum_logits": -10.084283828735352, "num_tokens": 2, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -16.489046096801758, "logits_per_token": -5.042141914367676, "logits_per_char": -1.260535478591919, "num_chars": 8}, {"sum_logits": -4.576681613922119, "num_tokens": 2, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -14.392349243164062, "logits_per_token": -2.2883408069610596, "logits_per_char": -0.5085201793246799, "num_chars": 9}, {"sum_logits": -11.665079116821289, "num_tokens": 2, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -16.9150390625, "logits_per_token": -5.8325395584106445, "logits_per_char": -0.7776719411214192, "num_chars": 15}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 267, "native_id": "630", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.316349029541016, "incorrect_loss_raw": 8.391539255777994, "correct_loss_per_char": 0.7560317299582742, "incorrect_loss_per_char": 0.7391582223702762, "correct_loss_per_token": 4.158174514770508, "incorrect_loss_per_token": 4.195769627888997, "correct_loss_uncond": -2.6374006271362305, "incorrect_loss_uncond": -4.021611213684082}, "model_output": [{"sum_logits": -9.010997772216797, "num_tokens": 2, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -12.464652061462402, "logits_per_token": -4.505498886108398, "logits_per_char": -0.8191816156560724, "num_chars": 11}, {"sum_logits": -7.983088493347168, "num_tokens": 2, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -12.50688648223877, "logits_per_token": -3.991544246673584, "logits_per_char": -0.8870098325941298, "num_chars": 9}, {"sum_logits": -8.316349029541016, "num_tokens": 2, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -10.953749656677246, "logits_per_token": -4.158174514770508, "logits_per_char": -0.7560317299582742, "num_chars": 11}, {"sum_logits": -8.18053150177002, "num_tokens": 2, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -12.267912864685059, "logits_per_token": -4.09026575088501, "logits_per_char": -0.5112832188606262, "num_chars": 16}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 268, "native_id": "9-16", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.232270240783691, "incorrect_loss_raw": 8.88148307800293, "correct_loss_per_char": 0.47940540313720703, "incorrect_loss_per_char": 0.6620244843619211, "correct_loss_per_token": 2.077423413594564, "incorrect_loss_per_token": 3.2177132765452066, "correct_loss_uncond": -11.983902931213379, "incorrect_loss_uncond": -8.193916956583658}, "model_output": [{"sum_logits": -9.058576583862305, "num_tokens": 2, "num_tokens_all": 123, "is_greedy": false, "sum_logits_uncond": -15.148197174072266, "logits_per_token": -4.529288291931152, "logits_per_char": -0.6470411845615932, "num_chars": 14}, {"sum_logits": -8.72860050201416, "num_tokens": 3, "num_tokens_all": 124, "is_greedy": false, "sum_logits_uncond": -17.835168838500977, "logits_per_token": -2.9095335006713867, "logits_per_char": -0.872860050201416, "num_chars": 10}, {"sum_logits": -8.857272148132324, "num_tokens": 4, "num_tokens_all": 125, "is_greedy": false, "sum_logits_uncond": -18.242834091186523, "logits_per_token": -2.214318037033081, "logits_per_char": -0.4661722183227539, "num_chars": 19}, {"sum_logits": -6.232270240783691, "num_tokens": 3, "num_tokens_all": 124, "is_greedy": false, "sum_logits_uncond": -18.21617317199707, "logits_per_token": -2.077423413594564, "logits_per_char": -0.47940540313720703, "num_chars": 13}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 269, "native_id": "7-986", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 14.310664176940918, "incorrect_loss_raw": 15.455038070678711, "correct_loss_per_char": 0.47702213923136394, "incorrect_loss_per_char": 0.5903726843509304, "correct_loss_per_token": 2.8621328353881834, "incorrect_loss_per_token": 3.297917938232422, "correct_loss_uncond": -19.055119514465332, "incorrect_loss_uncond": -13.25869051615397}, "model_output": [{"sum_logits": -20.56194496154785, "num_tokens": 5, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -32.53086853027344, "logits_per_token": -4.112388992309571, "logits_per_char": -0.7615535170943649, "num_chars": 27}, {"sum_logits": -12.414619445800781, "num_tokens": 4, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -28.607694625854492, "logits_per_token": -3.1036548614501953, "logits_per_char": -0.591172354561942, "num_chars": 21}, {"sum_logits": -13.3885498046875, "num_tokens": 5, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -25.002622604370117, "logits_per_token": -2.6777099609375, "logits_per_char": -0.4183921813964844, "num_chars": 32}, {"sum_logits": -14.310664176940918, "num_tokens": 5, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -33.36578369140625, "logits_per_token": -2.8621328353881834, "logits_per_char": -0.47702213923136394, "num_chars": 30}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 270, "native_id": "7-787", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 19.233827590942383, "incorrect_loss_raw": 16.82617982228597, "correct_loss_per_char": 0.7693531036376953, "incorrect_loss_per_char": 1.026042905641871, "correct_loss_per_token": 6.411275863647461, "incorrect_loss_per_token": 5.608726607428657, "correct_loss_uncond": -8.241697311401367, "incorrect_loss_uncond": -6.533645311991374}, "model_output": [{"sum_logits": -16.588958740234375, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -24.45315170288086, "logits_per_token": -5.529652913411458, "logits_per_char": -0.8294479370117187, "num_chars": 20}, {"sum_logits": -20.24396514892578, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -22.63903045654297, "logits_per_token": -6.747988382975261, "logits_per_char": -1.4459975106375558, "num_chars": 14}, {"sum_logits": -19.233827590942383, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -27.47552490234375, "logits_per_token": -6.411275863647461, "logits_per_char": -0.7693531036376953, "num_chars": 25}, {"sum_logits": -13.645615577697754, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -22.987293243408203, "logits_per_token": -4.548538525899251, "logits_per_char": -0.8026832692763385, "num_chars": 17}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 271, "native_id": "9-181", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 21.57707977294922, "incorrect_loss_raw": 12.282518068949381, "correct_loss_per_char": 0.9381339031717052, "incorrect_loss_per_char": 1.1754305865967776, "correct_loss_per_token": 4.315415954589843, "incorrect_loss_per_token": 6.1412590344746905, "correct_loss_uncond": -8.383180618286133, "incorrect_loss_uncond": -4.259879430135091}, "model_output": [{"sum_logits": -21.57707977294922, "num_tokens": 5, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -29.96026039123535, "logits_per_token": -4.315415954589843, "logits_per_char": -0.9381339031717052, "num_chars": 23}, {"sum_logits": -10.593973159790039, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -17.85519790649414, "logits_per_token": -5.2969865798950195, "logits_per_char": -0.8149210122915415, "num_chars": 13}, {"sum_logits": -10.181843757629395, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -14.780802726745605, "logits_per_token": -5.090921878814697, "logits_per_char": -0.9256221597844904, "num_chars": 11}, {"sum_logits": -16.07173728942871, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -16.991191864013672, "logits_per_token": -8.035868644714355, "logits_per_char": -1.7857485877143011, "num_chars": 9}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 272, "native_id": "1240", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 23.984783172607422, "incorrect_loss_raw": 20.56912390391032, "correct_loss_per_char": 0.7054347991943359, "incorrect_loss_per_char": 0.6828919074203353, "correct_loss_per_token": 2.9980978965759277, "incorrect_loss_per_token": 3.5492527182140052, "correct_loss_uncond": -14.20273208618164, "incorrect_loss_uncond": -11.90178648630778}, "model_output": [{"sum_logits": -12.754923820495605, "num_tokens": 4, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -26.64212417602539, "logits_per_token": -3.1887309551239014, "logits_per_char": -0.5797692645679821, "num_chars": 22}, {"sum_logits": -23.984783172607422, "num_tokens": 8, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -38.18751525878906, "logits_per_token": -2.9980978965759277, "logits_per_char": -0.7054347991943359, "num_chars": 34}, {"sum_logits": -19.564455032348633, "num_tokens": 6, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -30.949981689453125, "logits_per_token": -3.260742505391439, "logits_per_char": -0.674636380425815, "num_chars": 29}, {"sum_logits": -29.38799285888672, "num_tokens": 7, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -39.82062530517578, "logits_per_token": -4.1982846941266745, "logits_per_char": -0.7942700772672087, "num_chars": 37}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 273, "native_id": "474", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 27.4263916015625, "incorrect_loss_raw": 26.301516850789387, "correct_loss_per_char": 0.54852783203125, "incorrect_loss_per_char": 0.6296363685152652, "correct_loss_per_token": 2.74263916015625, "incorrect_loss_per_token": 3.6349013646443686, "correct_loss_uncond": -5.799282073974609, "incorrect_loss_uncond": -4.306535085042317}, "model_output": [{"sum_logits": -23.28011131286621, "num_tokens": 6, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -26.73708152770996, "logits_per_token": -3.8800185521443686, "logits_per_char": -0.5678075929967369, "num_chars": 41}, {"sum_logits": -33.69081497192383, "num_tokens": 10, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -40.76321029663086, "logits_per_token": -3.369081497192383, "logits_per_char": -0.6356757541872421, "num_chars": 53}, {"sum_logits": -21.933624267578125, "num_tokens": 6, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -24.323863983154297, "logits_per_token": -3.655604044596354, "logits_per_char": -0.6854257583618164, "num_chars": 32}, {"sum_logits": -27.4263916015625, "num_tokens": 10, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -33.22567367553711, "logits_per_token": -2.74263916015625, "logits_per_char": -0.54852783203125, "num_chars": 50}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 274, "native_id": "1274", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.706547260284424, "incorrect_loss_raw": 9.469989776611328, "correct_loss_per_char": 0.3356792506049661, "incorrect_loss_per_char": 0.7502216144756123, "correct_loss_per_token": 1.9021824200948079, "incorrect_loss_per_token": 3.156663258870443, "correct_loss_uncond": -12.502984523773193, "incorrect_loss_uncond": -11.96773592631022}, "model_output": [{"sum_logits": -12.2613525390625, "num_tokens": 3, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -22.79598045349121, "logits_per_token": -4.087117513020833, "logits_per_char": -0.8758108956473214, "num_chars": 14}, {"sum_logits": -6.663951873779297, "num_tokens": 3, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -19.84447479248047, "logits_per_token": -2.2213172912597656, "logits_per_char": -0.5126116825984075, "num_chars": 13}, {"sum_logits": -9.484664916992188, "num_tokens": 3, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -21.67272186279297, "logits_per_token": -3.161554972330729, "logits_per_char": -0.8622422651811079, "num_chars": 11}, {"sum_logits": -5.706547260284424, "num_tokens": 3, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -18.209531784057617, "logits_per_token": -1.9021824200948079, "logits_per_char": -0.3356792506049661, "num_chars": 17}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 275, "native_id": "1531", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 18.574386596679688, "incorrect_loss_raw": 15.397964477539062, "correct_loss_per_char": 0.5306967599051339, "incorrect_loss_per_char": 0.4299373945632538, "correct_loss_per_token": 3.0957310994466147, "incorrect_loss_per_token": 2.5717533429463706, "correct_loss_uncond": -17.694190979003906, "incorrect_loss_uncond": -16.133129119873047}, "model_output": [{"sum_logits": -17.470481872558594, "num_tokens": 6, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -34.488914489746094, "logits_per_token": -2.9117469787597656, "logits_per_char": -0.49915662493024554, "num_chars": 35}, {"sum_logits": -16.174488067626953, "num_tokens": 5, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -30.085906982421875, "logits_per_token": -3.2348976135253906, "logits_per_char": -0.5054527521133423, "num_chars": 32}, {"sum_logits": -12.54892349243164, "num_tokens": 8, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -30.01845932006836, "logits_per_token": -1.568615436553955, "logits_per_char": -0.28520280664617365, "num_chars": 44}, {"sum_logits": -18.574386596679688, "num_tokens": 6, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -36.268577575683594, "logits_per_token": -3.0957310994466147, "logits_per_char": -0.5306967599051339, "num_chars": 35}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 276, "native_id": "8-321", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.437628746032715, "incorrect_loss_raw": 11.801692326863607, "correct_loss_per_char": 0.6761480678211559, "incorrect_loss_per_char": 1.5082727008395727, "correct_loss_per_token": 7.437628746032715, "incorrect_loss_per_token": 11.801692326863607, "correct_loss_uncond": -6.007597923278809, "incorrect_loss_uncond": -0.14534950256347656}, "model_output": [{"sum_logits": -14.701972961425781, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -12.661859512329102, "logits_per_token": -14.701972961425781, "logits_per_char": -1.8377466201782227, "num_chars": 8}, {"sum_logits": -7.437628746032715, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -13.445226669311523, "logits_per_token": -7.437628746032715, "logits_per_char": -0.6761480678211559, "num_chars": 11}, {"sum_logits": -6.961078643798828, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -9.888992309570312, "logits_per_token": -6.961078643798828, "logits_per_char": -1.1601797739664714, "num_chars": 6}, {"sum_logits": -13.742025375366211, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -13.290273666381836, "logits_per_token": -13.742025375366211, "logits_per_char": -1.5268917083740234, "num_chars": 9}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 277, "native_id": "1321", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 33.549156188964844, "incorrect_loss_raw": 23.71978696187337, "correct_loss_per_char": 0.6989407539367676, "incorrect_loss_per_char": 0.6083050492745165, "correct_loss_per_token": 3.7276840209960938, "incorrect_loss_per_token": 2.982860527341328, "correct_loss_uncond": -19.658504486083984, "incorrect_loss_uncond": -13.435300827026367}, "model_output": [{"sum_logits": -22.864288330078125, "num_tokens": 9, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -34.515472412109375, "logits_per_token": -2.5404764811197915, "logits_per_char": -0.5196429165926847, "num_chars": 44}, {"sum_logits": -27.506694793701172, "num_tokens": 8, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -39.01850509643555, "logits_per_token": -3.4383368492126465, "logits_per_char": -0.7434241836135452, "num_chars": 37}, {"sum_logits": -33.549156188964844, "num_tokens": 9, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -53.20766067504883, "logits_per_token": -3.7276840209960938, "logits_per_char": -0.6989407539367676, "num_chars": 48}, {"sum_logits": -20.78837776184082, "num_tokens": 7, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -37.9312858581543, "logits_per_token": -2.9697682516915456, "logits_per_char": -0.5618480476173194, "num_chars": 37}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 278, "native_id": "9-51", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 13.52505111694336, "incorrect_loss_raw": 13.634646733601889, "correct_loss_per_char": 0.7513917287190756, "incorrect_loss_per_char": 0.9282639882503411, "correct_loss_per_token": 4.508350372314453, "incorrect_loss_per_token": 5.2911750475565595, "correct_loss_uncond": -6.204042434692383, "incorrect_loss_uncond": -3.4436299006144204}, "model_output": [{"sum_logits": -13.009785652160645, "num_tokens": 3, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -18.665279388427734, "logits_per_token": -4.336595217386882, "logits_per_char": -1.0007527424738958, "num_chars": 13}, {"sum_logits": -14.460884094238281, "num_tokens": 3, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -17.333763122558594, "logits_per_token": -4.820294698079427, "logits_per_char": -1.1123756995567908, "num_chars": 13}, {"sum_logits": -13.433270454406738, "num_tokens": 2, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -15.235787391662598, "logits_per_token": -6.716635227203369, "logits_per_char": -0.6716635227203369, "num_chars": 20}, {"sum_logits": -13.52505111694336, "num_tokens": 3, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -19.729093551635742, "logits_per_token": -4.508350372314453, "logits_per_char": -0.7513917287190756, "num_chars": 18}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 279, "native_id": "7-685", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 14.316373825073242, "incorrect_loss_raw": 9.595874627431234, "correct_loss_per_char": 1.5907082027859158, "incorrect_loss_per_char": 1.1467443373468187, "correct_loss_per_token": 7.158186912536621, "incorrect_loss_per_token": 5.29169225692749, "correct_loss_uncond": -8.883687973022461, "incorrect_loss_uncond": -6.924708843231201}, "model_output": [{"sum_logits": -8.89802360534668, "num_tokens": 2, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -15.101316452026367, "logits_per_token": -4.44901180267334, "logits_per_char": -1.4830039342244465, "num_chars": 6}, {"sum_logits": -14.316373825073242, "num_tokens": 2, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -23.200061798095703, "logits_per_token": -7.158186912536621, "logits_per_char": -1.5907082027859158, "num_chars": 9}, {"sum_logits": -12.695302963256836, "num_tokens": 3, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -21.049514770507812, "logits_per_token": -4.231767654418945, "logits_per_char": -1.0579419136047363, "num_chars": 12}, {"sum_logits": -7.1942973136901855, "num_tokens": 1, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -13.410919189453125, "logits_per_token": -7.1942973136901855, "logits_per_char": -0.8992871642112732, "num_chars": 8}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 280, "native_id": "7-59", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 19.4390869140625, "incorrect_loss_raw": 11.278111457824707, "correct_loss_per_char": 0.8099619547526041, "incorrect_loss_per_char": 0.7748424495969499, "correct_loss_per_token": 6.479695638020833, "incorrect_loss_per_token": 5.6390557289123535, "correct_loss_uncond": -10.550146102905273, "incorrect_loss_uncond": -5.519593874613444}, "model_output": [{"sum_logits": -19.4390869140625, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -29.989233016967773, "logits_per_token": -6.479695638020833, "logits_per_char": -0.8099619547526041, "num_chars": 24}, {"sum_logits": -12.46048641204834, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -16.2314395904541, "logits_per_token": -6.23024320602417, "logits_per_char": -0.8900347437177386, "num_chars": 14}, {"sum_logits": -10.327611923217773, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -17.506080627441406, "logits_per_token": -5.163805961608887, "logits_per_char": -0.6454757452011108, "num_chars": 16}, {"sum_logits": -11.046236038208008, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -16.655595779418945, "logits_per_token": -5.523118019104004, "logits_per_char": -0.7890168598720005, "num_chars": 14}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 281, "native_id": "7-270", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 34.28822326660156, "incorrect_loss_raw": 24.413230895996094, "correct_loss_per_char": 0.857205581665039, "incorrect_loss_per_char": 1.0377159741341293, "correct_loss_per_token": 5.714703877766927, "incorrect_loss_per_token": 4.946274375915528, "correct_loss_uncond": -11.578815460205078, "incorrect_loss_uncond": -4.28422737121582}, "model_output": [{"sum_logits": -21.31930923461914, "num_tokens": 4, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -24.279172897338867, "logits_per_token": -5.329827308654785, "logits_per_char": -1.122068907085218, "num_chars": 19}, {"sum_logits": -25.667957305908203, "num_tokens": 5, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -31.90033531188965, "logits_per_token": -5.1335914611816404, "logits_per_char": -1.1159981437351392, "num_chars": 23}, {"sum_logits": -26.252426147460938, "num_tokens": 6, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -29.912866592407227, "logits_per_token": -4.375404357910156, "logits_per_char": -0.8750808715820313, "num_chars": 30}, {"sum_logits": -34.28822326660156, "num_tokens": 6, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -45.86703872680664, "logits_per_token": -5.714703877766927, "logits_per_char": -0.857205581665039, "num_chars": 40}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 282, "native_id": "7-736", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 17.48173713684082, "incorrect_loss_raw": 15.229385375976562, "correct_loss_per_char": 1.0926085710525513, "incorrect_loss_per_char": 0.9281050584934376, "correct_loss_per_token": 5.827245712280273, "incorrect_loss_per_token": 5.0764617919921875, "correct_loss_uncond": -3.2901668548583984, "incorrect_loss_uncond": -5.952016830444336}, "model_output": [{"sum_logits": -11.044702529907227, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -21.09210968017578, "logits_per_token": -3.6815675099690757, "logits_per_char": -0.7363135019938151, "num_chars": 15}, {"sum_logits": -16.878841400146484, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -18.040327072143555, "logits_per_token": -5.626280466715495, "logits_per_char": -0.9377134111192491, "num_chars": 18}, {"sum_logits": -17.48173713684082, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -20.77190399169922, "logits_per_token": -5.827245712280273, "logits_per_char": -1.0926085710525513, "num_chars": 16}, {"sum_logits": -17.764612197875977, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -24.41176986694336, "logits_per_token": -5.921537399291992, "logits_per_char": -1.1102882623672485, "num_chars": 16}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 283, "native_id": "8-186", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 20.406036376953125, "incorrect_loss_raw": 23.073116302490234, "correct_loss_per_char": 0.6802012125651041, "incorrect_loss_per_char": 0.8446911016259033, "correct_loss_per_token": 5.101509094238281, "incorrect_loss_per_token": 4.504997180757068, "correct_loss_uncond": -7.980140686035156, "incorrect_loss_uncond": -6.2434431711832685}, "model_output": [{"sum_logits": -22.38235092163086, "num_tokens": 5, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -32.952457427978516, "logits_per_token": -4.476470184326172, "logits_per_char": -0.7993696757725307, "num_chars": 28}, {"sum_logits": -24.926795959472656, "num_tokens": 7, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -28.943998336791992, "logits_per_token": -3.5609708513532365, "logits_per_char": -0.923214665165654, "num_chars": 27}, {"sum_logits": -21.910202026367188, "num_tokens": 4, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -26.05322265625, "logits_per_token": -5.477550506591797, "logits_per_char": -0.8114889639395254, "num_chars": 27}, {"sum_logits": -20.406036376953125, "num_tokens": 4, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -28.38617706298828, "logits_per_token": -5.101509094238281, "logits_per_char": -0.6802012125651041, "num_chars": 30}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 284, "native_id": "224", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.450983047485352, "incorrect_loss_raw": 8.350023905436197, "correct_loss_per_char": 0.3794695910285501, "incorrect_loss_per_char": 0.7470777184813172, "correct_loss_per_token": 2.150327682495117, "incorrect_loss_per_token": 2.7833413018120665, "correct_loss_uncond": -15.662115097045898, "incorrect_loss_uncond": -9.01870600382487}, "model_output": [{"sum_logits": -8.358073234558105, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -18.552783966064453, "logits_per_token": -2.7860244115193686, "logits_per_char": -0.8358073234558105, "num_chars": 10}, {"sum_logits": -6.450983047485352, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -22.11309814453125, "logits_per_token": -2.150327682495117, "logits_per_char": -0.3794695910285501, "num_chars": 17}, {"sum_logits": -8.681955337524414, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -16.747161865234375, "logits_per_token": -2.893985112508138, "logits_per_char": -0.789268667047674, "num_chars": 11}, {"sum_logits": -8.010043144226074, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -16.806243896484375, "logits_per_token": -2.6700143814086914, "logits_per_char": -0.6161571649404672, "num_chars": 13}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 285, "native_id": "8-206", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 21.339397430419922, "incorrect_loss_raw": 13.855299631754557, "correct_loss_per_char": 0.9277998882791271, "incorrect_loss_per_char": 0.5219959941987069, "correct_loss_per_token": 5.3348493576049805, "incorrect_loss_per_token": 2.840778711107042, "correct_loss_uncond": 1.1350555419921875, "incorrect_loss_uncond": -10.185721715291342}, "model_output": [{"sum_logits": -21.339397430419922, "num_tokens": 4, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -20.204341888427734, "logits_per_token": -5.3348493576049805, "logits_per_char": -0.9277998882791271, "num_chars": 23}, {"sum_logits": -14.25644302368164, "num_tokens": 4, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -26.0737247467041, "logits_per_token": -3.56411075592041, "logits_per_char": -0.5702577209472657, "num_chars": 25}, {"sum_logits": -12.199481964111328, "num_tokens": 5, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -19.78729248046875, "logits_per_token": -2.4398963928222654, "logits_per_char": -0.5083117485046387, "num_chars": 24}, {"sum_logits": -15.109973907470703, "num_tokens": 6, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -26.262046813964844, "logits_per_token": -2.5183289845784507, "logits_per_char": -0.48741851314421625, "num_chars": 31}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 286, "native_id": "8-190", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 15.822832107543945, "incorrect_loss_raw": 14.72757625579834, "correct_loss_per_char": 1.438439282503995, "incorrect_loss_per_char": 0.9121501226273794, "correct_loss_per_token": 5.274277369181315, "incorrect_loss_per_token": 5.9266402986314555, "correct_loss_uncond": -10.452688217163086, "incorrect_loss_uncond": -8.626876513163248}, "model_output": [{"sum_logits": -11.914203643798828, "num_tokens": 3, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -22.899465560913086, "logits_per_token": -3.9714012145996094, "logits_per_char": -0.8510145459856305, "num_chars": 14}, {"sum_logits": -15.822832107543945, "num_tokens": 3, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -26.27552032470703, "logits_per_token": -5.274277369181315, "logits_per_char": -1.438439282503995, "num_chars": 11}, {"sum_logits": -13.95445728302002, "num_tokens": 3, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -24.80964469909668, "logits_per_token": -4.651485761006673, "logits_per_char": -0.6644979658580962, "num_chars": 21}, {"sum_logits": -18.314067840576172, "num_tokens": 2, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -22.354248046875, "logits_per_token": -9.157033920288086, "logits_per_char": -1.2209378560384114, "num_chars": 15}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 287, "native_id": "7-334", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 22.44152069091797, "incorrect_loss_raw": 24.863470713297527, "correct_loss_per_char": 0.6600447262034697, "incorrect_loss_per_char": 0.7932843032520487, "correct_loss_per_token": 4.488304138183594, "incorrect_loss_per_token": 4.972694142659505, "correct_loss_uncond": -11.31634521484375, "incorrect_loss_uncond": -7.894967397054036}, "model_output": [{"sum_logits": -29.161317825317383, "num_tokens": 5, "num_tokens_all": 130, "is_greedy": false, "sum_logits_uncond": -33.65779113769531, "logits_per_token": -5.832263565063476, "logits_per_char": -0.9406876717844317, "num_chars": 31}, {"sum_logits": -22.44152069091797, "num_tokens": 5, "num_tokens_all": 130, "is_greedy": false, "sum_logits_uncond": -33.75786590576172, "logits_per_token": -4.488304138183594, "logits_per_char": -0.6600447262034697, "num_chars": 34}, {"sum_logits": -19.766756057739258, "num_tokens": 5, "num_tokens_all": 130, "is_greedy": false, "sum_logits_uncond": -32.31047058105469, "logits_per_token": -3.9533512115478517, "logits_per_char": -0.7059555734906878, "num_chars": 28}, {"sum_logits": -25.662338256835938, "num_tokens": 5, "num_tokens_all": 130, "is_greedy": false, "sum_logits_uncond": -32.30705261230469, "logits_per_token": -5.132467651367188, "logits_per_char": -0.7332096644810268, "num_chars": 35}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 288, "native_id": "9-853", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 8.14782428741455, "incorrect_loss_raw": 8.101635932922363, "correct_loss_per_char": 0.5092390179634094, "incorrect_loss_per_char": 0.8193105379740397, "correct_loss_per_token": 2.7159414291381836, "incorrect_loss_per_token": 4.402665562099881, "correct_loss_uncond": -17.483548164367676, "incorrect_loss_uncond": -12.07070541381836}, "model_output": [{"sum_logits": -10.19683837890625, "num_tokens": 3, "num_tokens_all": 130, "is_greedy": false, "sum_logits_uncond": -27.857410430908203, "logits_per_token": -3.3989461263020835, "logits_per_char": -0.6797892252604166, "num_chars": 15}, {"sum_logits": -8.14782428741455, "num_tokens": 3, "num_tokens_all": 130, "is_greedy": false, "sum_logits_uncond": -25.631372451782227, "logits_per_token": -2.7159414291381836, "logits_per_char": -0.5092390179634094, "num_chars": 16}, {"sum_logits": -8.598037719726562, "num_tokens": 2, "num_tokens_all": 129, "is_greedy": false, "sum_logits_uncond": -19.193553924560547, "logits_per_token": -4.299018859863281, "logits_per_char": -0.8598037719726562, "num_chars": 10}, {"sum_logits": -5.510031700134277, "num_tokens": 1, "num_tokens_all": 128, "is_greedy": false, "sum_logits_uncond": -13.466059684753418, "logits_per_token": -5.510031700134277, "logits_per_char": -0.9183386166890463, "num_chars": 6}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 289, "native_id": "8-367", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 24.178152084350586, "incorrect_loss_raw": 21.372870763142902, "correct_loss_per_char": 0.5372922685411241, "incorrect_loss_per_char": 0.7514910588319275, "correct_loss_per_token": 4.835630416870117, "incorrect_loss_per_token": 4.280229886372884, "correct_loss_uncond": -7.943777084350586, "incorrect_loss_uncond": -8.824260711669922}, "model_output": [{"sum_logits": -24.178152084350586, "num_tokens": 5, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -32.12192916870117, "logits_per_token": -4.835630416870117, "logits_per_char": -0.5372922685411241, "num_chars": 45}, {"sum_logits": -15.766706466674805, "num_tokens": 5, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -28.953149795532227, "logits_per_token": -3.153341293334961, "logits_per_char": -0.5630966595241002, "num_chars": 28}, {"sum_logits": -28.807537078857422, "num_tokens": 6, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -35.691375732421875, "logits_per_token": -4.80125617980957, "logits_per_char": -0.9933633475468077, "num_chars": 29}, {"sum_logits": -19.544368743896484, "num_tokens": 4, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -25.946868896484375, "logits_per_token": -4.886092185974121, "logits_per_char": -0.6980131694248745, "num_chars": 28}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 290, "native_id": "1047", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 31.86322784423828, "incorrect_loss_raw": 27.14244206746419, "correct_loss_per_char": 0.817005842159956, "incorrect_loss_per_char": 0.844351012430509, "correct_loss_per_token": 3.540358649359809, "incorrect_loss_per_token": 4.5964167337568975, "correct_loss_uncond": -19.015365600585938, "incorrect_loss_uncond": -10.726698557535807}, "model_output": [{"sum_logits": -31.86322784423828, "num_tokens": 9, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -50.87859344482422, "logits_per_token": -3.540358649359809, "logits_per_char": -0.817005842159956, "num_chars": 39}, {"sum_logits": -29.502212524414062, "num_tokens": 7, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -42.18058776855469, "logits_per_token": -4.214601789202009, "logits_per_char": -0.7763740138003701, "num_chars": 38}, {"sum_logits": -24.311229705810547, "num_tokens": 6, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -35.83039855957031, "logits_per_token": -4.051871617635091, "logits_per_char": -0.6946065630231585, "num_chars": 35}, {"sum_logits": -27.61388397216797, "num_tokens": 5, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -35.596435546875, "logits_per_token": -5.522776794433594, "logits_per_char": -1.0620724604679987, "num_chars": 26}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 291, "native_id": "9-454", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.164108276367188, "incorrect_loss_raw": 15.805128415425619, "correct_loss_per_char": 1.5948726109095983, "incorrect_loss_per_char": 2.05981259118943, "correct_loss_per_token": 5.582054138183594, "incorrect_loss_per_token": 9.352318975660536, "correct_loss_uncond": -4.650638580322266, "incorrect_loss_uncond": -0.5737552642822266}, "model_output": [{"sum_logits": -16.32686996459961, "num_tokens": 3, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -17.476146697998047, "logits_per_token": -5.44228998819987, "logits_per_char": -2.040858745574951, "num_chars": 8}, {"sum_logits": -14.14081859588623, "num_tokens": 1, "num_tokens_all": 129, "is_greedy": false, "sum_logits_uncond": -13.156607627868652, "logits_per_token": -14.14081859588623, "logits_per_char": -2.0201169422694614, "num_chars": 7}, {"sum_logits": -11.164108276367188, "num_tokens": 2, "num_tokens_all": 130, "is_greedy": false, "sum_logits_uncond": -15.814746856689453, "logits_per_token": -5.582054138183594, "logits_per_char": -1.5948726109095983, "num_chars": 7}, {"sum_logits": -16.947696685791016, "num_tokens": 2, "num_tokens_all": 130, "is_greedy": false, "sum_logits_uncond": -18.503896713256836, "logits_per_token": -8.473848342895508, "logits_per_char": -2.118462085723877, "num_chars": 8}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 292, "native_id": "1572", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.496731758117676, "incorrect_loss_raw": 10.884774525960287, "correct_loss_per_char": 1.3566759654453822, "incorrect_loss_per_char": 1.9798785068370677, "correct_loss_per_token": 4.748365879058838, "incorrect_loss_per_token": 10.884774525960287, "correct_loss_uncond": -4.2850751876831055, "incorrect_loss_uncond": -1.5832064946492512}, "model_output": [{"sum_logits": -11.996113777160645, "num_tokens": 1, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -13.034186363220215, "logits_per_token": -11.996113777160645, "logits_per_char": -1.3329015307956271, "num_chars": 9}, {"sum_logits": -9.501840591430664, "num_tokens": 1, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -11.58763313293457, "logits_per_token": -9.501840591430664, "logits_per_char": -2.375460147857666, "num_chars": 4}, {"sum_logits": -9.496731758117676, "num_tokens": 2, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -13.781806945800781, "logits_per_token": -4.748365879058838, "logits_per_char": -1.3566759654453822, "num_chars": 7}, {"sum_logits": -11.15636920928955, "num_tokens": 1, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -12.782123565673828, "logits_per_token": -11.15636920928955, "logits_per_char": -2.2312738418579103, "num_chars": 5}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 293, "native_id": "8-373", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 29.193950653076172, "incorrect_loss_raw": 24.779017130533855, "correct_loss_per_char": 0.9123109579086304, "incorrect_loss_per_char": 0.8415469929090418, "correct_loss_per_token": 4.170564379010882, "incorrect_loss_per_token": 3.932381693522135, "correct_loss_uncond": -6.812450408935547, "incorrect_loss_uncond": -6.953815460205078}, "model_output": [{"sum_logits": -29.193950653076172, "num_tokens": 7, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -36.00640106201172, "logits_per_token": -4.170564379010882, "logits_per_char": -0.9123109579086304, "num_chars": 32}, {"sum_logits": -26.224620819091797, "num_tokens": 7, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -32.161415100097656, "logits_per_token": -3.7463744027273997, "logits_per_char": -0.8195194005966187, "num_chars": 32}, {"sum_logits": -27.505020141601562, "num_tokens": 7, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -34.167213439941406, "logits_per_token": -3.929288591657366, "logits_per_char": -0.7238163195158306, "num_chars": 38}, {"sum_logits": -20.607410430908203, "num_tokens": 5, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -28.869869232177734, "logits_per_token": -4.121482086181641, "logits_per_char": -0.9813052586146763, "num_chars": 21}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 294, "native_id": "9-772", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.9326276779174805, "incorrect_loss_raw": 7.738879998524983, "correct_loss_per_char": 0.9887712796529134, "incorrect_loss_per_char": 1.0120536571457273, "correct_loss_per_token": 5.9326276779174805, "incorrect_loss_per_token": 6.737558921178182, "correct_loss_uncond": -3.956364631652832, "incorrect_loss_uncond": -5.403567473093669}, "model_output": [{"sum_logits": -5.9326276779174805, "num_tokens": 1, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -9.888992309570312, "logits_per_token": -5.9326276779174805, "logits_per_char": -0.9887712796529134, "num_chars": 6}, {"sum_logits": -7.5085344314575195, "num_tokens": 1, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -13.685901641845703, "logits_per_token": -7.5085344314575195, "logits_per_char": -1.0726477759225028, "num_chars": 7}, {"sum_logits": -6.0079264640808105, "num_tokens": 2, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -11.702047348022461, "logits_per_token": -3.0039632320404053, "logits_per_char": -0.7509908080101013, "num_chars": 8}, {"sum_logits": -9.700179100036621, "num_tokens": 1, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -14.039393424987793, "logits_per_token": -9.700179100036621, "logits_per_char": -1.2125223875045776, "num_chars": 8}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 295, "native_id": "1852", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 19.0111026763916, "incorrect_loss_raw": 29.612101236979168, "correct_loss_per_char": 0.5760940204967152, "incorrect_loss_per_char": 0.7779259519005612, "correct_loss_per_token": 3.8022205352783205, "incorrect_loss_per_token": 5.0060636361440025, "correct_loss_uncond": -14.755075454711914, "incorrect_loss_uncond": -9.817923227945963}, "model_output": [{"sum_logits": -19.0111026763916, "num_tokens": 5, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -33.766178131103516, "logits_per_token": -3.8022205352783205, "logits_per_char": -0.5760940204967152, "num_chars": 33}, {"sum_logits": -22.171388626098633, "num_tokens": 6, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -34.51598358154297, "logits_per_token": -3.6952314376831055, "logits_per_char": -0.5992267196242874, "num_chars": 37}, {"sum_logits": -39.864601135253906, "num_tokens": 5, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -39.897010803222656, "logits_per_token": -7.972920227050781, "logits_per_char": -1.1389886038643973, "num_chars": 35}, {"sum_logits": -26.80031394958496, "num_tokens": 8, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -43.877079010009766, "logits_per_token": -3.35003924369812, "logits_per_char": -0.5955625322129992, "num_chars": 45}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 296, "native_id": "9-1090", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 2.7439684867858887, "incorrect_loss_raw": 4.73119592666626, "correct_loss_per_char": 1.3719842433929443, "incorrect_loss_per_char": 0.755211983786689, "correct_loss_per_token": 2.7439684867858887, "incorrect_loss_per_token": 4.73119592666626, "correct_loss_uncond": -3.909578800201416, "incorrect_loss_uncond": -6.892921606699626}, "model_output": [{"sum_logits": -5.37277889251709, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -11.676148414611816, "logits_per_token": -5.37277889251709, "logits_per_char": -1.0745557785034179, "num_chars": 5}, {"sum_logits": -4.634989261627197, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -9.888992309570312, "logits_per_token": -4.634989261627197, "logits_per_char": -0.7724982102711996, "num_chars": 6}, {"sum_logits": -4.185819625854492, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -13.307211875915527, "logits_per_token": -4.185819625854492, "logits_per_char": -0.4185819625854492, "num_chars": 10}, {"sum_logits": -2.7439684867858887, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -6.653547286987305, "logits_per_token": -2.7439684867858887, "logits_per_char": -1.3719842433929443, "num_chars": 2}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 297, "native_id": "7-769", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 14.770288467407227, "incorrect_loss_raw": 10.054702123006185, "correct_loss_per_char": 0.9846858978271484, "incorrect_loss_per_char": 0.8695550242429056, "correct_loss_per_token": 4.923429489135742, "incorrect_loss_per_token": 5.027351061503093, "correct_loss_uncond": -11.36839485168457, "incorrect_loss_uncond": -10.409710566202799}, "model_output": [{"sum_logits": -7.881807327270508, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -16.66693878173828, "logits_per_token": -3.940903663635254, "logits_per_char": -0.8757563696967231, "num_chars": 9}, {"sum_logits": -10.411762237548828, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -21.4351806640625, "logits_per_token": -5.205881118774414, "logits_per_char": -0.7436973026820591, "num_chars": 14}, {"sum_logits": -14.770288467407227, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -26.138683319091797, "logits_per_token": -4.923429489135742, "logits_per_char": -0.9846858978271484, "num_chars": 15}, {"sum_logits": -11.870536804199219, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -23.291118621826172, "logits_per_token": -5.935268402099609, "logits_per_char": -0.9892114003499349, "num_chars": 12}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 298, "native_id": "9-478", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.174920558929443, "incorrect_loss_raw": 9.91962973276774, "correct_loss_per_char": 0.4638800621032715, "incorrect_loss_per_char": 1.1929771408202157, "correct_loss_per_token": 4.174920558929443, "incorrect_loss_per_token": 9.91962973276774, "correct_loss_uncond": -9.57889986038208, "incorrect_loss_uncond": -4.063622792561849}, "model_output": [{"sum_logits": -9.159311294555664, "num_tokens": 1, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -12.896183967590332, "logits_per_token": -9.159311294555664, "logits_per_char": -1.3084730420793806, "num_chars": 7}, {"sum_logits": -11.295286178588867, "num_tokens": 1, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -15.434308052062988, "logits_per_token": -11.295286178588867, "logits_per_char": -0.9412738482157389, "num_chars": 12}, {"sum_logits": -4.174920558929443, "num_tokens": 1, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -13.753820419311523, "logits_per_token": -4.174920558929443, "logits_per_char": -0.4638800621032715, "num_chars": 9}, {"sum_logits": -9.304291725158691, "num_tokens": 1, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -13.61926555633545, "logits_per_token": -9.304291725158691, "logits_per_char": -1.3291845321655273, "num_chars": 7}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 299, "native_id": "448", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.480148315429688, "incorrect_loss_raw": 11.603748639424643, "correct_loss_per_char": 1.9257354736328125, "incorrect_loss_per_char": 1.8331814720517112, "correct_loss_per_token": 13.480148315429688, "incorrect_loss_per_token": 9.38617738087972, "correct_loss_uncond": -1.696497917175293, "incorrect_loss_uncond": -3.1543687184651694}, "model_output": [{"sum_logits": -13.480148315429688, "num_tokens": 1, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -15.17664623260498, "logits_per_token": -13.480148315429688, "logits_per_char": -1.9257354736328125, "num_chars": 7}, {"sum_logits": -13.305427551269531, "num_tokens": 2, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -15.068635940551758, "logits_per_token": -6.652713775634766, "logits_per_char": -2.217571258544922, "num_chars": 6}, {"sum_logits": -8.807962417602539, "num_tokens": 1, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -14.189266204833984, "logits_per_token": -8.807962417602539, "logits_per_char": -1.4679937362670898, "num_chars": 6}, {"sum_logits": -12.697855949401855, "num_tokens": 1, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -15.016449928283691, "logits_per_token": -12.697855949401855, "logits_per_char": -1.8139794213431222, "num_chars": 7}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 300, "native_id": "7-417", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.972719192504883, "incorrect_loss_raw": 12.234704971313477, "correct_loss_per_char": 0.3143536417107833, "incorrect_loss_per_char": 0.6790491287730974, "correct_loss_per_token": 1.9909063975016277, "incorrect_loss_per_token": 4.078234990437825, "correct_loss_uncond": -11.31396484375, "incorrect_loss_uncond": -9.07795524597168}, "model_output": [{"sum_logits": -13.992072105407715, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -23.623401641845703, "logits_per_token": -4.664024035135905, "logits_per_char": -0.8230630650239832, "num_chars": 17}, {"sum_logits": -5.972719192504883, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -17.286684036254883, "logits_per_token": -1.9909063975016277, "logits_per_char": -0.3143536417107833, "num_chars": 19}, {"sum_logits": -16.311975479125977, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -21.87863540649414, "logits_per_token": -5.437325159708659, "logits_per_char": -0.8585250252171567, "num_chars": 19}, {"sum_logits": -6.400067329406738, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -18.435943603515625, "logits_per_token": -2.1333557764689126, "logits_per_char": -0.3555592960781521, "num_chars": 18}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 301, "native_id": "7-108", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 19.335927963256836, "incorrect_loss_raw": 15.414790153503418, "correct_loss_per_char": 1.2084954977035522, "incorrect_loss_per_char": 1.3840750157575068, "correct_loss_per_token": 6.445309321085612, "incorrect_loss_per_token": 7.089497990078396, "correct_loss_uncond": -5.189428329467773, "incorrect_loss_uncond": -4.046395301818848}, "model_output": [{"sum_logits": -18.13422393798828, "num_tokens": 2, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -22.848182678222656, "logits_per_token": -9.06711196899414, "logits_per_char": -1.813422393798828, "num_chars": 10}, {"sum_logits": -16.987998962402344, "num_tokens": 2, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -22.767066955566406, "logits_per_token": -8.493999481201172, "logits_per_char": -1.5443635420365767, "num_chars": 11}, {"sum_logits": -11.122147560119629, "num_tokens": 3, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -12.768306732177734, "logits_per_token": -3.7073825200398765, "logits_per_char": -0.7944391114371163, "num_chars": 14}, {"sum_logits": -19.335927963256836, "num_tokens": 3, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -24.52535629272461, "logits_per_token": -6.445309321085612, "logits_per_char": -1.2084954977035522, "num_chars": 16}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 302, "native_id": "1506", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 22.61078643798828, "incorrect_loss_raw": 21.9329039255778, "correct_loss_per_char": 0.6111023361618454, "incorrect_loss_per_char": 0.6664038447652887, "correct_loss_per_token": 3.7684644063313804, "incorrect_loss_per_token": 3.6459116103157165, "correct_loss_uncond": -10.600212097167969, "incorrect_loss_uncond": -12.529194513956705}, "model_output": [{"sum_logits": -26.640012741088867, "num_tokens": 7, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -33.23421859741211, "logits_per_token": -3.805716105869838, "logits_per_char": -0.6830772497715094, "num_chars": 39}, {"sum_logits": -20.99163246154785, "num_tokens": 6, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -37.3258056640625, "logits_per_token": -3.498605410257975, "logits_per_char": -0.6174009547514074, "num_chars": 34}, {"sum_logits": -18.16706657409668, "num_tokens": 5, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -32.826271057128906, "logits_per_token": -3.633413314819336, "logits_per_char": -0.6987333297729492, "num_chars": 26}, {"sum_logits": -22.61078643798828, "num_tokens": 6, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -33.21099853515625, "logits_per_token": -3.7684644063313804, "logits_per_char": -0.6111023361618454, "num_chars": 37}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 303, "native_id": "1712", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.50191593170166, "incorrect_loss_raw": 11.992396672566732, "correct_loss_per_char": 1.5002737045288086, "incorrect_loss_per_char": 1.4529783372525815, "correct_loss_per_token": 3.5006386439005532, "incorrect_loss_per_token": 5.996198336283366, "correct_loss_uncond": -7.46457576751709, "incorrect_loss_uncond": -3.465979894002279}, "model_output": [{"sum_logits": -10.50191593170166, "num_tokens": 3, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -17.96649169921875, "logits_per_token": -3.5006386439005532, "logits_per_char": -1.5002737045288086, "num_chars": 7}, {"sum_logits": -9.95138931274414, "num_tokens": 2, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -16.365436553955078, "logits_per_token": -4.97569465637207, "logits_per_char": -1.105709923638238, "num_chars": 9}, {"sum_logits": -13.599706649780273, "num_tokens": 2, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -13.8656005859375, "logits_per_token": -6.799853324890137, "logits_per_char": -1.6999633312225342, "num_chars": 8}, {"sum_logits": -12.426094055175781, "num_tokens": 2, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -16.144092559814453, "logits_per_token": -6.213047027587891, "logits_per_char": -1.5532617568969727, "num_chars": 8}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 304, "native_id": "8-312", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 19.17551040649414, "incorrect_loss_raw": 25.37745475769043, "correct_loss_per_char": 1.065306133694119, "incorrect_loss_per_char": 1.082832523123928, "correct_loss_per_token": 4.793877601623535, "incorrect_loss_per_token": 5.4302387555440275, "correct_loss_uncond": -6.433233261108398, "incorrect_loss_uncond": -4.639739990234375}, "model_output": [{"sum_logits": -36.843650817871094, "num_tokens": 5, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -39.43600082397461, "logits_per_token": -7.368730163574218, "logits_per_char": -1.3158446720668249, "num_chars": 28}, {"sum_logits": -21.284868240356445, "num_tokens": 4, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -25.785579681396484, "logits_per_token": -5.321217060089111, "logits_per_char": -1.1824926800198026, "num_chars": 18}, {"sum_logits": -19.17551040649414, "num_tokens": 4, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -25.60874366760254, "logits_per_token": -4.793877601623535, "logits_per_char": -1.065306133694119, "num_chars": 18}, {"sum_logits": -18.00384521484375, "num_tokens": 5, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -24.83000373840332, "logits_per_token": -3.60076904296875, "logits_per_char": -0.7501602172851562, "num_chars": 24}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 305, "native_id": "9-776", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.975357055664062, "incorrect_loss_raw": 11.551743825276693, "correct_loss_per_char": 1.219484117296007, "incorrect_loss_per_char": 0.9310197321023432, "correct_loss_per_token": 5.487678527832031, "incorrect_loss_per_token": 5.775871912638347, "correct_loss_uncond": -12.11514663696289, "incorrect_loss_uncond": -11.47585423787435}, "model_output": [{"sum_logits": -10.975357055664062, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -23.090503692626953, "logits_per_token": -5.487678527832031, "logits_per_char": -1.219484117296007, "num_chars": 9}, {"sum_logits": -13.708786964416504, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -24.753223419189453, "logits_per_token": -6.854393482208252, "logits_per_char": -0.9791990688868931, "num_chars": 14}, {"sum_logits": -11.927797317504883, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -22.65656852722168, "logits_per_token": -5.963898658752441, "logits_per_char": -0.9939831097920736, "num_chars": 12}, {"sum_logits": -9.018647193908691, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -21.673002243041992, "logits_per_token": -4.509323596954346, "logits_per_char": -0.8198770176280629, "num_chars": 11}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 306, "native_id": "8-279", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 18.002727508544922, "incorrect_loss_raw": 20.187916437784832, "correct_loss_per_char": 0.6429545538766044, "incorrect_loss_per_char": 0.9521756829074812, "correct_loss_per_token": 2.5718182155064175, "incorrect_loss_per_token": 4.308783933851454, "correct_loss_uncond": -8.375469207763672, "incorrect_loss_uncond": -5.060618718465169}, "model_output": [{"sum_logits": -32.215126037597656, "num_tokens": 6, "num_tokens_all": 125, "is_greedy": false, "sum_logits_uncond": -37.59870910644531, "logits_per_token": -5.369187672932942, "logits_per_char": -1.1505402156284876, "num_chars": 28}, {"sum_logits": -14.192827224731445, "num_tokens": 5, "num_tokens_all": 124, "is_greedy": false, "sum_logits_uncond": -26.184051513671875, "logits_per_token": -2.838565444946289, "logits_per_char": -0.6170794445535411, "num_chars": 23}, {"sum_logits": -18.002727508544922, "num_tokens": 7, "num_tokens_all": 126, "is_greedy": false, "sum_logits_uncond": -26.378196716308594, "logits_per_token": -2.5718182155064175, "logits_per_char": -0.6429545538766044, "num_chars": 28}, {"sum_logits": -14.15579605102539, "num_tokens": 3, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -11.962844848632812, "logits_per_token": -4.71859868367513, "logits_per_char": -1.0889073885404146, "num_chars": 13}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 307, "native_id": "9-621", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 22.61821174621582, "incorrect_loss_raw": 22.553002675374348, "correct_loss_per_char": 0.8377115461561415, "incorrect_loss_per_char": 0.9203000261326029, "correct_loss_per_token": 5.654552936553955, "incorrect_loss_per_token": 5.22607479095459, "correct_loss_uncond": -15.336179733276367, "incorrect_loss_uncond": -8.298332850138346}, "model_output": [{"sum_logits": -19.437625885009766, "num_tokens": 4, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -27.67754364013672, "logits_per_token": -4.859406471252441, "logits_per_char": -0.8099010785420736, "num_chars": 24}, {"sum_logits": -22.61821174621582, "num_tokens": 4, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -37.95439147949219, "logits_per_token": -5.654552936553955, "logits_per_char": -0.8377115461561415, "num_chars": 27}, {"sum_logits": -24.730552673339844, "num_tokens": 5, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -37.110774993896484, "logits_per_token": -4.946110534667969, "logits_per_char": -0.8832340240478516, "num_chars": 28}, {"sum_logits": -23.490829467773438, "num_tokens": 4, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -27.765687942504883, "logits_per_token": -5.872707366943359, "logits_per_char": -1.0677649758078835, "num_chars": 22}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 308, "native_id": "1823", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 19.601245880126953, "incorrect_loss_raw": 22.531139691670734, "correct_loss_per_char": 1.0316445200066817, "incorrect_loss_per_char": 1.3253611583335727, "correct_loss_per_token": 4.900311470031738, "incorrect_loss_per_token": 6.0712804264492455, "correct_loss_uncond": -11.274171829223633, "incorrect_loss_uncond": -5.903948148091634}, "model_output": [{"sum_logits": -22.92346954345703, "num_tokens": 4, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -30.178056716918945, "logits_per_token": -5.730867385864258, "logits_per_char": -1.348439384909237, "num_chars": 17}, {"sum_logits": -28.884111404418945, "num_tokens": 4, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -30.324127197265625, "logits_per_token": -7.221027851104736, "logits_per_char": -1.6990653767305262, "num_chars": 17}, {"sum_logits": -19.601245880126953, "num_tokens": 4, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -30.875417709350586, "logits_per_token": -4.900311470031738, "logits_per_char": -1.0316445200066817, "num_chars": 19}, {"sum_logits": -15.78583812713623, "num_tokens": 3, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -24.80307960510254, "logits_per_token": -5.261946042378743, "logits_per_char": -0.9285787133609548, "num_chars": 17}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 309, "native_id": "9-735", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 13.694730758666992, "incorrect_loss_raw": 15.515630404154459, "correct_loss_per_char": 0.5072122503209997, "incorrect_loss_per_char": 0.8498419859091934, "correct_loss_per_token": 3.423682689666748, "incorrect_loss_per_token": 3.8789076010386148, "correct_loss_uncond": -12.627689361572266, "incorrect_loss_uncond": -6.0944013595581055}, "model_output": [{"sum_logits": -12.833487510681152, "num_tokens": 4, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -19.535974502563477, "logits_per_token": -3.208371877670288, "logits_per_char": -0.6754467110884818, "num_chars": 19}, {"sum_logits": -13.694730758666992, "num_tokens": 4, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -26.322420120239258, "logits_per_token": -3.423682689666748, "logits_per_char": -0.5072122503209997, "num_chars": 27}, {"sum_logits": -17.613536834716797, "num_tokens": 4, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -23.696258544921875, "logits_per_token": -4.403384208679199, "logits_per_char": -0.9270282544587788, "num_chars": 19}, {"sum_logits": -16.09986686706543, "num_tokens": 4, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -21.597862243652344, "logits_per_token": -4.024966716766357, "logits_per_char": -0.9470509921803194, "num_chars": 17}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 310, "native_id": "7-1170", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 30.96535873413086, "incorrect_loss_raw": 25.593220392862957, "correct_loss_per_char": 0.9383442040645715, "incorrect_loss_per_char": 0.8526646366889082, "correct_loss_per_token": 6.193071746826172, "incorrect_loss_per_token": 5.118644078572592, "correct_loss_uncond": -5.566978454589844, "incorrect_loss_uncond": -7.405572255452474}, "model_output": [{"sum_logits": -23.183380126953125, "num_tokens": 5, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -32.56462478637695, "logits_per_token": -4.636676025390625, "logits_per_char": -0.7994269009294181, "num_chars": 29}, {"sum_logits": -30.96535873413086, "num_tokens": 5, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -36.5323371887207, "logits_per_token": -6.193071746826172, "logits_per_char": -0.9383442040645715, "num_chars": 33}, {"sum_logits": -24.844009399414062, "num_tokens": 5, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -35.29362487792969, "logits_per_token": -4.968801879882813, "logits_per_char": -0.8872860499790737, "num_chars": 28}, {"sum_logits": -28.75227165222168, "num_tokens": 5, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -31.13812828063965, "logits_per_token": -5.750454330444336, "logits_per_char": -0.8712809591582327, "num_chars": 33}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 311, "native_id": "1500", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.333551406860352, "incorrect_loss_raw": 10.572618166605631, "correct_loss_per_char": 2.333387851715088, "incorrect_loss_per_char": 2.7136347081926133, "correct_loss_per_token": 3.1111838022867837, "incorrect_loss_per_token": 9.198758920033773, "correct_loss_uncond": -0.8118419647216797, "incorrect_loss_uncond": -0.21565818786621094}, "model_output": [{"sum_logits": -9.333551406860352, "num_tokens": 3, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -10.145393371582031, "logits_per_token": -3.1111838022867837, "logits_per_char": -2.333387851715088, "num_chars": 4}, {"sum_logits": -8.243155479431152, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -9.568012237548828, "logits_per_token": -4.121577739715576, "logits_per_char": -2.060788869857788, "num_chars": 4}, {"sum_logits": -13.085883140563965, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -13.181233406066895, "logits_per_token": -13.085883140563965, "logits_per_char": -2.617176628112793, "num_chars": 5}, {"sum_logits": -10.388815879821777, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -9.615583419799805, "logits_per_token": -10.388815879821777, "logits_per_char": -3.4629386266072593, "num_chars": 3}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 312, "native_id": "342", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 14.442501068115234, "incorrect_loss_raw": 15.251546859741211, "correct_loss_per_char": 0.8023611704508463, "incorrect_loss_per_char": 1.062594933795114, "correct_loss_per_token": 7.221250534057617, "incorrect_loss_per_token": 6.693083127339681, "correct_loss_uncond": -4.679353713989258, "incorrect_loss_uncond": -3.850835164388021}, "model_output": [{"sum_logits": -13.403854370117188, "num_tokens": 2, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -17.91304588317871, "logits_per_token": -6.701927185058594, "logits_per_char": -1.0310657207782452, "num_chars": 13}, {"sum_logits": -14.442501068115234, "num_tokens": 2, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -19.121854782104492, "logits_per_token": -7.221250534057617, "logits_per_char": -0.8023611704508463, "num_chars": 18}, {"sum_logits": -15.562360763549805, "num_tokens": 2, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -19.571372985839844, "logits_per_token": -7.781180381774902, "logits_per_char": -1.037490717569987, "num_chars": 15}, {"sum_logits": -16.78842544555664, "num_tokens": 3, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -19.82272720336914, "logits_per_token": -5.596141815185547, "logits_per_char": -1.1192283630371094, "num_chars": 15}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 313, "native_id": "7-356", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 8.73680305480957, "incorrect_loss_raw": 18.612762769063313, "correct_loss_per_char": 0.4598317397268195, "incorrect_loss_per_char": 1.2636408192188602, "correct_loss_per_token": 2.9122676849365234, "incorrect_loss_per_token": 6.204254256354439, "correct_loss_uncond": -15.466766357421875, "incorrect_loss_uncond": -3.555236498514811}, "model_output": [{"sum_logits": -23.841787338256836, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -23.201486587524414, "logits_per_token": -7.947262446085612, "logits_per_char": -1.833983641404372, "num_chars": 13}, {"sum_logits": -22.748035430908203, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -21.433963775634766, "logits_per_token": -7.582678476969401, "logits_per_char": -1.5165356953938802, "num_chars": 15}, {"sum_logits": -9.248465538024902, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -21.868547439575195, "logits_per_token": -3.082821846008301, "logits_per_char": -0.44040312085832867, "num_chars": 21}, {"sum_logits": -8.73680305480957, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -24.203569412231445, "logits_per_token": -2.9122676849365234, "logits_per_char": -0.4598317397268195, "num_chars": 19}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 314, "native_id": "78", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.767674446105957, "incorrect_loss_raw": 6.716416517893474, "correct_loss_per_char": 0.823953492300851, "incorrect_loss_per_char": 0.8370101555945381, "correct_loss_per_token": 1.9225581487019856, "incorrect_loss_per_token": 3.97169828414917, "correct_loss_uncond": -10.811728477478027, "incorrect_loss_uncond": -7.937142848968506}, "model_output": [{"sum_logits": -3.6809401512145996, "num_tokens": 1, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -13.715497970581055, "logits_per_token": -3.6809401512145996, "logits_per_char": -0.46011751890182495, "num_chars": 8}, {"sum_logits": -5.767674446105957, "num_tokens": 3, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -16.579402923583984, "logits_per_token": -1.9225581487019856, "logits_per_char": -0.823953492300851, "num_chars": 7}, {"sum_logits": -9.503634452819824, "num_tokens": 2, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -14.203113555908203, "logits_per_token": -4.751817226409912, "logits_per_char": -1.0559593836466472, "num_chars": 9}, {"sum_logits": -6.964674949645996, "num_tokens": 2, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -16.04206657409668, "logits_per_token": -3.482337474822998, "logits_per_char": -0.9949535642351423, "num_chars": 7}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 315, "native_id": "9-520", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 43.85365295410156, "incorrect_loss_raw": 19.731059710184734, "correct_loss_per_char": 0.9533402816109036, "incorrect_loss_per_char": 1.0718051123028922, "correct_loss_per_token": 6.264807564871652, "incorrect_loss_per_token": 5.083064863416884, "correct_loss_uncond": -9.571697235107422, "incorrect_loss_uncond": -2.125171979268392}, "model_output": [{"sum_logits": -15.518736839294434, "num_tokens": 3, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -11.962844848632812, "logits_per_token": -5.1729122797648115, "logits_per_char": -1.1937489876380334, "num_chars": 13}, {"sum_logits": -16.84656524658203, "num_tokens": 5, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -23.048580169677734, "logits_per_token": -3.369313049316406, "logits_per_char": -0.8022173926943824, "num_chars": 21}, {"sum_logits": -43.85365295410156, "num_tokens": 7, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -53.425350189208984, "logits_per_token": -6.264807564871652, "logits_per_char": -0.9533402816109036, "num_chars": 46}, {"sum_logits": -26.827877044677734, "num_tokens": 4, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -30.557270050048828, "logits_per_token": -6.706969261169434, "logits_per_char": -1.2194489565762607, "num_chars": 22}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 316, "native_id": "7-653", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 17.530929565429688, "incorrect_loss_raw": 18.61416753133138, "correct_loss_per_char": 0.5008837018694197, "incorrect_loss_per_char": 0.7902261942504388, "correct_loss_per_token": 2.504418509347098, "incorrect_loss_per_token": 4.653541882832845, "correct_loss_uncond": -17.949016571044922, "incorrect_loss_uncond": -10.058273951212565}, "model_output": [{"sum_logits": -22.741329193115234, "num_tokens": 4, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -35.534542083740234, "logits_per_token": -5.685332298278809, "logits_per_char": -0.8746665074275091, "num_chars": 26}, {"sum_logits": -13.613906860351562, "num_tokens": 4, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -27.129850387573242, "logits_per_token": -3.4034767150878906, "logits_per_char": -0.7165214137027138, "num_chars": 19}, {"sum_logits": -17.530929565429688, "num_tokens": 7, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -35.47994613647461, "logits_per_token": -2.504418509347098, "logits_per_char": -0.5008837018694197, "num_chars": 35}, {"sum_logits": -19.487266540527344, "num_tokens": 4, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -23.35293197631836, "logits_per_token": -4.871816635131836, "logits_per_char": -0.7794906616210937, "num_chars": 25}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 317, "native_id": "1112", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.568729877471924, "incorrect_loss_raw": 5.531404813130696, "correct_loss_per_char": 1.642182469367981, "incorrect_loss_per_char": 0.7999975721041362, "correct_loss_per_token": 6.568729877471924, "incorrect_loss_per_token": 5.531404813130696, "correct_loss_uncond": -6.703943729400635, "incorrect_loss_uncond": -7.873161633809407}, "model_output": [{"sum_logits": -5.290283679962158, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -12.888144493103027, "logits_per_token": -5.290283679962158, "logits_per_char": -0.6612854599952698, "num_chars": 8}, {"sum_logits": -6.568729877471924, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -13.272673606872559, "logits_per_token": -6.568729877471924, "logits_per_char": -1.642182469367981, "num_chars": 4}, {"sum_logits": -5.220788955688477, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -14.024401664733887, "logits_per_token": -5.220788955688477, "logits_per_char": -0.5220788955688477, "num_chars": 10}, {"sum_logits": -6.083141803741455, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -13.301153182983398, "logits_per_token": -6.083141803741455, "logits_per_char": -1.216628360748291, "num_chars": 5}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 318, "native_id": "9-152", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.631603240966797, "incorrect_loss_raw": 5.614584128061931, "correct_loss_per_char": 0.6052672068277994, "incorrect_loss_per_char": 1.1568655808766684, "correct_loss_per_token": 3.631603240966797, "incorrect_loss_per_token": 5.614584128061931, "correct_loss_uncond": -8.668170928955078, "incorrect_loss_uncond": -6.423292318979899}, "model_output": [{"sum_logits": -5.71766471862793, "num_tokens": 1, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -12.432230949401855, "logits_per_token": -5.71766471862793, "logits_per_char": -1.4294161796569824, "num_chars": 4}, {"sum_logits": -3.631603240966797, "num_tokens": 1, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -12.299774169921875, "logits_per_token": -3.631603240966797, "logits_per_char": -0.6052672068277994, "num_chars": 6}, {"sum_logits": -5.604978561401367, "num_tokens": 1, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -12.551660537719727, "logits_per_token": -5.604978561401367, "logits_per_char": -1.1209957122802734, "num_chars": 5}, {"sum_logits": -5.521109104156494, "num_tokens": 1, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -11.129737854003906, "logits_per_token": -5.521109104156494, "logits_per_char": -0.920184850692749, "num_chars": 6}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 319, "native_id": "9-552", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.5359296798706055, "incorrect_loss_raw": 6.457563479741414, "correct_loss_per_char": 0.3928810755411784, "incorrect_loss_per_char": 0.9785387209483556, "correct_loss_per_token": 3.5359296798706055, "incorrect_loss_per_token": 3.994326194127401, "correct_loss_uncond": -11.377290725708008, "incorrect_loss_uncond": -8.842913230260214}, "model_output": [{"sum_logits": -5.914634704589844, "num_tokens": 1, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -14.31518840789795, "logits_per_token": -5.914634704589844, "logits_per_char": -1.1829269409179688, "num_chars": 5}, {"sum_logits": -3.5359296798706055, "num_tokens": 1, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -14.913220405578613, "logits_per_token": -3.5359296798706055, "logits_per_char": -0.3928810755411784, "num_chars": 9}, {"sum_logits": -3.964103937149048, "num_tokens": 3, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -16.00395965576172, "logits_per_token": -1.3213679790496826, "logits_per_char": -0.3964103937149048, "num_chars": 10}, {"sum_logits": -9.493951797485352, "num_tokens": 2, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -15.582282066345215, "logits_per_token": -4.746975898742676, "logits_per_char": -1.3562788282121931, "num_chars": 7}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 320, "native_id": "7-262", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.657225131988525, "incorrect_loss_raw": 7.54978593190511, "correct_loss_per_char": 0.6961113756353204, "incorrect_loss_per_char": 0.6457297304491977, "correct_loss_per_token": 2.5524083773295083, "incorrect_loss_per_token": 3.2140853669908314, "correct_loss_uncond": -8.227921962738037, "incorrect_loss_uncond": -7.997787793477376}, "model_output": [{"sum_logits": -7.657225131988525, "num_tokens": 3, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -15.885147094726562, "logits_per_token": -2.5524083773295083, "logits_per_char": -0.6961113756353204, "num_chars": 11}, {"sum_logits": -8.259232521057129, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -15.435548782348633, "logits_per_token": -4.1296162605285645, "logits_per_char": -0.5899451800755092, "num_chars": 14}, {"sum_logits": -4.295588493347168, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -13.88058853149414, "logits_per_token": -2.147794246673584, "logits_per_char": -0.4295588493347168, "num_chars": 10}, {"sum_logits": -10.094536781311035, "num_tokens": 3, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -17.326583862304688, "logits_per_token": -3.364845593770345, "logits_per_char": -0.9176851619373668, "num_chars": 11}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 321, "native_id": "7-683", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.248560905456543, "incorrect_loss_raw": 11.794529914855957, "correct_loss_per_char": 0.45303505659103394, "incorrect_loss_per_char": 0.7724003235499065, "correct_loss_per_token": 2.416186968485514, "incorrect_loss_per_token": 3.931509971618653, "correct_loss_uncond": -13.712830543518066, "incorrect_loss_uncond": -9.865391731262207}, "model_output": [{"sum_logits": -15.105782508850098, "num_tokens": 3, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -24.183330535888672, "logits_per_token": -5.035260836283366, "logits_per_char": -1.0070521672566732, "num_chars": 15}, {"sum_logits": -10.00920295715332, "num_tokens": 3, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -22.210372924804688, "logits_per_token": -3.3364009857177734, "logits_per_char": -0.6255751848220825, "num_chars": 16}, {"sum_logits": -10.268604278564453, "num_tokens": 3, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -18.586061477661133, "logits_per_token": -3.422868092854818, "logits_per_char": -0.6845736185709635, "num_chars": 15}, {"sum_logits": -7.248560905456543, "num_tokens": 3, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -20.96139144897461, "logits_per_token": -2.416186968485514, "logits_per_char": -0.45303505659103394, "num_chars": 16}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 322, "native_id": "276", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 11.47869873046875, "incorrect_loss_raw": 13.825282414754232, "correct_loss_per_char": 0.8829768254206731, "incorrect_loss_per_char": 1.2347423235575359, "correct_loss_per_token": 5.739349365234375, "incorrect_loss_per_token": 9.376360575358072, "correct_loss_uncond": -7.686351776123047, "incorrect_loss_uncond": -2.3383121490478516}, "model_output": [{"sum_logits": -16.93071937561035, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -18.632736206054688, "logits_per_token": -8.465359687805176, "logits_per_char": -1.4108932813008626, "num_chars": 12}, {"sum_logits": -11.47869873046875, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -19.165050506591797, "logits_per_token": -5.739349365234375, "logits_per_char": -0.8829768254206731, "num_chars": 13}, {"sum_logits": -9.762811660766602, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -14.031314849853516, "logits_per_token": -4.881405830383301, "logits_per_char": -0.6508541107177734, "num_chars": 15}, {"sum_logits": -14.782316207885742, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -15.826732635498047, "logits_per_token": -14.782316207885742, "logits_per_char": -1.6424795786539714, "num_chars": 9}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 323, "native_id": "7-855", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.995480060577393, "incorrect_loss_raw": 9.612900733947754, "correct_loss_per_char": 0.4541345509615811, "incorrect_loss_per_char": 0.7990116973023315, "correct_loss_per_token": 2.4977400302886963, "incorrect_loss_per_token": 4.286796675788032, "correct_loss_uncond": -12.01102590560913, "incorrect_loss_uncond": -7.374248186747233}, "model_output": [{"sum_logits": -9.353766441345215, "num_tokens": 3, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -21.007564544677734, "logits_per_token": -3.1179221471150718, "logits_per_char": -0.7195204954880935, "num_chars": 13}, {"sum_logits": -12.38730239868164, "num_tokens": 2, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -13.80116081237793, "logits_per_token": -6.19365119934082, "logits_per_char": -1.0322751998901367, "num_chars": 12}, {"sum_logits": -4.995480060577393, "num_tokens": 2, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -17.006505966186523, "logits_per_token": -2.4977400302886963, "logits_per_char": -0.4541345509615811, "num_chars": 11}, {"sum_logits": -7.097633361816406, "num_tokens": 2, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -16.152721405029297, "logits_per_token": -3.548816680908203, "logits_per_char": -0.6452393965287642, "num_chars": 11}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 324, "native_id": "664", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 23.434322357177734, "incorrect_loss_raw": 18.776101112365723, "correct_loss_per_char": 0.9013200906606821, "incorrect_loss_per_char": 0.7054758348326752, "correct_loss_per_token": 4.6868644714355465, "incorrect_loss_per_token": 3.274333490644183, "correct_loss_uncond": -12.426944732666016, "incorrect_loss_uncond": -14.566132545471191}, "model_output": [{"sum_logits": -18.995014190673828, "num_tokens": 5, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -35.951358795166016, "logits_per_token": -3.799002838134766, "logits_per_char": -0.82587018220321, "num_chars": 23}, {"sum_logits": -25.246553421020508, "num_tokens": 7, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -40.32361602783203, "logits_per_token": -3.6066504887172153, "logits_per_char": -0.7650470733642578, "num_chars": 33}, {"sum_logits": -12.086735725402832, "num_tokens": 5, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -23.751726150512695, "logits_per_token": -2.4173471450805666, "logits_per_char": -0.5255102489305579, "num_chars": 23}, {"sum_logits": -23.434322357177734, "num_tokens": 5, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -35.86126708984375, "logits_per_token": -4.6868644714355465, "logits_per_char": -0.9013200906606821, "num_chars": 26}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 325, "native_id": "9-883", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.22184944152832, "incorrect_loss_raw": 9.552175998687744, "correct_loss_per_char": 0.802427715725369, "incorrect_loss_per_char": 0.9316514608187553, "correct_loss_per_token": 3.61092472076416, "incorrect_loss_per_token": 3.5059370199839273, "correct_loss_uncond": -10.822351455688477, "incorrect_loss_uncond": -7.184758345286052}, "model_output": [{"sum_logits": -11.350619316101074, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -18.00078582763672, "logits_per_token": -3.7835397720336914, "logits_per_char": -0.8731245627770057, "num_chars": 13}, {"sum_logits": -11.51209831237793, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -17.229541778564453, "logits_per_token": -3.8373661041259766, "logits_per_char": -1.4390122890472412, "num_chars": 8}, {"sum_logits": -7.22184944152832, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -18.044200897216797, "logits_per_token": -3.61092472076416, "logits_per_char": -0.802427715725369, "num_chars": 9}, {"sum_logits": -5.7938103675842285, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -14.980475425720215, "logits_per_token": -2.8969051837921143, "logits_per_char": -0.48281753063201904, "num_chars": 12}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 326, "native_id": "9-550", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.957582473754883, "incorrect_loss_raw": 11.005668958028158, "correct_loss_per_char": 0.569408689226423, "incorrect_loss_per_char": 0.4211040293308373, "correct_loss_per_token": 2.9893956184387207, "incorrect_loss_per_token": 2.3238364537556966, "correct_loss_uncond": -10.015031814575195, "incorrect_loss_uncond": -13.696820259094238}, "model_output": [{"sum_logits": -11.957582473754883, "num_tokens": 4, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -21.972614288330078, "logits_per_token": -2.9893956184387207, "logits_per_char": -0.569408689226423, "num_chars": 21}, {"sum_logits": -7.362159729003906, "num_tokens": 4, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -20.414518356323242, "logits_per_token": -1.8405399322509766, "logits_per_char": -0.3505790347144717, "num_chars": 21}, {"sum_logits": -10.110547065734863, "num_tokens": 5, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -27.17745018005371, "logits_per_token": -2.0221094131469726, "logits_per_char": -0.33701823552449545, "num_chars": 30}, {"sum_logits": -15.544300079345703, "num_tokens": 5, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -26.515499114990234, "logits_per_token": -3.1088600158691406, "logits_per_char": -0.5757148177535446, "num_chars": 27}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 327, "native_id": "8-493", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 21.059566497802734, "incorrect_loss_raw": 10.219326655069986, "correct_loss_per_char": 1.2387980292825138, "incorrect_loss_per_char": 1.0462081085551869, "correct_loss_per_token": 7.019855499267578, "incorrect_loss_per_token": 4.194141785303752, "correct_loss_uncond": -9.532123565673828, "incorrect_loss_uncond": -10.624828974405924}, "model_output": [{"sum_logits": -12.763665199279785, "num_tokens": 2, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -21.942785263061523, "logits_per_token": -6.381832599639893, "logits_per_char": -1.2763665199279786, "num_chars": 10}, {"sum_logits": -10.986258506774902, "num_tokens": 4, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -22.4439754486084, "logits_per_token": -2.7465646266937256, "logits_per_char": -0.998750773343173, "num_chars": 11}, {"sum_logits": -6.908056259155273, "num_tokens": 2, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -18.145706176757812, "logits_per_token": -3.4540281295776367, "logits_per_char": -0.8635070323944092, "num_chars": 8}, {"sum_logits": -21.059566497802734, "num_tokens": 3, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -30.591690063476562, "logits_per_token": -7.019855499267578, "logits_per_char": -1.2387980292825138, "num_chars": 17}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 328, "native_id": "9-257", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 13.18812370300293, "incorrect_loss_raw": 17.331693649291992, "correct_loss_per_char": 1.4653470781114366, "incorrect_loss_per_char": 1.840743160660649, "correct_loss_per_token": 6.594061851501465, "incorrect_loss_per_token": 10.796010653177897, "correct_loss_uncond": -4.99098014831543, "incorrect_loss_uncond": 0.4516003926595052}, "model_output": [{"sum_logits": -24.75385284423828, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -22.78537368774414, "logits_per_token": -12.37692642211914, "logits_per_char": -2.2503502585671167, "num_chars": 11}, {"sum_logits": -12.780982971191406, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -11.66164779663086, "logits_per_token": -12.780982971191406, "logits_per_char": -1.825854710170201, "num_chars": 7}, {"sum_logits": -13.18812370300293, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -18.17910385131836, "logits_per_token": -6.594061851501465, "logits_per_char": -1.4653470781114366, "num_chars": 9}, {"sum_logits": -14.460245132446289, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -16.19325828552246, "logits_per_token": -7.2301225662231445, "logits_per_char": -1.446024513244629, "num_chars": 10}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 329, "native_id": "1239", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 36.19567108154297, "incorrect_loss_raw": 29.10838445027669, "correct_loss_per_char": 0.7868624148161515, "incorrect_loss_per_char": 0.6444578881367254, "correct_loss_per_token": 4.524458885192871, "incorrect_loss_per_token": 3.634271558125814, "correct_loss_uncond": -19.352184295654297, "incorrect_loss_uncond": -14.764415740966797}, "model_output": [{"sum_logits": -33.36766815185547, "num_tokens": 8, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -43.96269226074219, "logits_per_token": -4.170958518981934, "logits_per_char": -0.8138455646794017, "num_chars": 41}, {"sum_logits": -33.915870666503906, "num_tokens": 10, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -49.623046875, "logits_per_token": -3.3915870666503904, "logits_per_char": -0.6056405476161412, "num_chars": 56}, {"sum_logits": -20.041614532470703, "num_tokens": 6, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -38.03266143798828, "logits_per_token": -3.340269088745117, "logits_per_char": -0.5138875521146334, "num_chars": 39}, {"sum_logits": -36.19567108154297, "num_tokens": 8, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -55.547855377197266, "logits_per_token": -4.524458885192871, "logits_per_char": -0.7868624148161515, "num_chars": 46}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 330, "native_id": "869", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 12.03070068359375, "incorrect_loss_raw": 10.183476448059082, "correct_loss_per_char": 2.0051167805989585, "incorrect_loss_per_char": 1.4930882453918457, "correct_loss_per_token": 6.015350341796875, "incorrect_loss_per_token": 6.689579804738362, "correct_loss_uncond": -5.070594787597656, "incorrect_loss_uncond": -3.8252182006835938}, "model_output": [{"sum_logits": -12.03070068359375, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -17.101295471191406, "logits_per_token": -6.015350341796875, "logits_per_char": -2.0051167805989585, "num_chars": 6}, {"sum_logits": -9.58704948425293, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -14.321036338806152, "logits_per_token": -9.58704948425293, "logits_per_char": -1.5978415807088215, "num_chars": 6}, {"sum_logits": -9.187102317810059, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -13.28221321105957, "logits_per_token": -4.593551158905029, "logits_per_char": -0.9187102317810059, "num_chars": 10}, {"sum_logits": -11.776277542114258, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -14.422834396362305, "logits_per_token": -5.888138771057129, "logits_per_char": -1.9627129236857097, "num_chars": 6}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 331, "native_id": "7-1105", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 22.41754150390625, "incorrect_loss_raw": 21.73434289296468, "correct_loss_per_char": 0.7472513834635417, "incorrect_loss_per_char": 0.8105032729411471, "correct_loss_per_token": 4.48350830078125, "incorrect_loss_per_token": 4.569657182693482, "correct_loss_uncond": -11.72079086303711, "incorrect_loss_uncond": -8.206993420918783}, "model_output": [{"sum_logits": -22.41754150390625, "num_tokens": 5, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -34.13833236694336, "logits_per_token": -4.48350830078125, "logits_per_char": -0.7472513834635417, "num_chars": 30}, {"sum_logits": -25.497867584228516, "num_tokens": 5, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -32.63768005371094, "logits_per_token": -5.0995735168457035, "logits_per_char": -0.7968083620071411, "num_chars": 32}, {"sum_logits": -26.337844848632812, "num_tokens": 5, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -28.286937713623047, "logits_per_token": -5.267568969726563, "logits_per_char": -1.0535137939453125, "num_chars": 25}, {"sum_logits": -13.367316246032715, "num_tokens": 4, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -28.899391174316406, "logits_per_token": -3.3418290615081787, "logits_per_char": -0.5811876628709876, "num_chars": 23}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 332, "native_id": "597", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 23.015296936035156, "incorrect_loss_raw": 21.041579882303875, "correct_loss_per_char": 0.5352394636287245, "incorrect_loss_per_char": 0.6415436368335709, "correct_loss_per_token": 3.835882822672526, "incorrect_loss_per_token": 2.9905356831020775, "correct_loss_uncond": -14.760902404785156, "incorrect_loss_uncond": -8.520634651184082}, "model_output": [{"sum_logits": -9.342719078063965, "num_tokens": 6, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -21.14439582824707, "logits_per_token": -1.5571198463439941, "logits_per_char": -0.3221627268297919, "num_chars": 29}, {"sum_logits": -16.185455322265625, "num_tokens": 5, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -25.419017791748047, "logits_per_token": -3.237091064453125, "logits_per_char": -0.5581191490436422, "num_chars": 29}, {"sum_logits": -37.59656524658203, "num_tokens": 9, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -42.12322998046875, "logits_per_token": -4.177396138509114, "logits_per_char": -1.0443490346272786, "num_chars": 36}, {"sum_logits": -23.015296936035156, "num_tokens": 6, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -37.77619934082031, "logits_per_token": -3.835882822672526, "logits_per_char": -0.5352394636287245, "num_chars": 43}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 333, "native_id": "385", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 29.389503479003906, "incorrect_loss_raw": 31.690353393554688, "correct_loss_per_char": 0.8643971611471737, "incorrect_loss_per_char": 0.9238922935503848, "correct_loss_per_token": 4.198500497000558, "incorrect_loss_per_token": 4.512173461914062, "correct_loss_uncond": -5.158031463623047, "incorrect_loss_uncond": -5.217339833577474}, "model_output": [{"sum_logits": -30.214977264404297, "num_tokens": 7, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -33.35947036743164, "logits_per_token": -4.316425323486328, "logits_per_char": -0.9746766859485257, "num_chars": 31}, {"sum_logits": -37.51121520996094, "num_tokens": 10, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -42.86949157714844, "logits_per_token": -3.7511215209960938, "logits_per_char": -0.9149076880478277, "num_chars": 41}, {"sum_logits": -27.344867706298828, "num_tokens": 5, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -34.494117736816406, "logits_per_token": -5.468973541259766, "logits_per_char": -0.8820925066548009, "num_chars": 31}, {"sum_logits": -29.389503479003906, "num_tokens": 7, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -34.54753494262695, "logits_per_token": -4.198500497000558, "logits_per_char": -0.8643971611471737, "num_chars": 34}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 334, "native_id": "1301", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 14.04737377166748, "incorrect_loss_raw": 6.28341817855835, "correct_loss_per_char": 1.2770339792424983, "incorrect_loss_per_char": 0.6634063285207908, "correct_loss_per_token": 7.02368688583374, "incorrect_loss_per_token": 2.437126080195109, "correct_loss_uncond": -2.12526798248291, "incorrect_loss_uncond": -8.45650021235148}, "model_output": [{"sum_logits": -4.791906833648682, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -14.177331924438477, "logits_per_token": -2.395953416824341, "logits_per_char": -0.5989883542060852, "num_chars": 8}, {"sum_logits": -14.04737377166748, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -16.17264175415039, "logits_per_token": -7.02368688583374, "logits_per_char": -1.2770339792424983, "num_chars": 11}, {"sum_logits": -5.603351593017578, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -13.939334869384766, "logits_per_token": -2.801675796508789, "logits_per_char": -0.6225946214463975, "num_chars": 9}, {"sum_logits": -8.454996109008789, "num_tokens": 4, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -16.10308837890625, "logits_per_token": -2.1137490272521973, "logits_per_char": -0.7686360099098899, "num_chars": 11}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 335, "native_id": "9-893", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.718194961547852, "incorrect_loss_raw": 9.312690099080404, "correct_loss_per_char": 0.7655853543962751, "incorrect_loss_per_char": 0.7453004483020668, "correct_loss_per_token": 5.359097480773926, "incorrect_loss_per_token": 7.240248680114746, "correct_loss_uncond": -6.285917282104492, "incorrect_loss_uncond": -6.602897644042969}, "model_output": [{"sum_logits": -5.426721572875977, "num_tokens": 1, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -13.012849807739258, "logits_per_token": -5.426721572875977, "logits_per_char": -0.5426721572875977, "num_chars": 10}, {"sum_logits": -12.434648513793945, "num_tokens": 2, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -19.86631965637207, "logits_per_token": -6.217324256896973, "logits_per_char": -0.7771655321121216, "num_chars": 16}, {"sum_logits": -10.076700210571289, "num_tokens": 1, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -14.867593765258789, "logits_per_token": -10.076700210571289, "logits_per_char": -0.9160636555064808, "num_chars": 11}, {"sum_logits": -10.718194961547852, "num_tokens": 2, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -17.004112243652344, "logits_per_token": -5.359097480773926, "logits_per_char": -0.7655853543962751, "num_chars": 14}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 336, "native_id": "9-369", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.331517219543457, "incorrect_loss_raw": 13.984081904093424, "correct_loss_per_char": 0.4073065121968587, "incorrect_loss_per_char": 0.850184342616184, "correct_loss_per_token": 3.6657586097717285, "incorrect_loss_per_token": 5.34600133366055, "correct_loss_uncond": -10.794075965881348, "incorrect_loss_uncond": -6.7339324951171875}, "model_output": [{"sum_logits": -11.799201011657715, "num_tokens": 4, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -20.057907104492188, "logits_per_token": -2.9498002529144287, "logits_per_char": -0.6210105795609323, "num_chars": 19}, {"sum_logits": -7.331517219543457, "num_tokens": 2, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -18.125593185424805, "logits_per_token": -3.6657586097717285, "logits_per_char": -0.4073065121968587, "num_chars": 18}, {"sum_logits": -11.929911613464355, "num_tokens": 3, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -22.301095962524414, "logits_per_token": -3.9766372044881186, "logits_per_char": -0.6278900849191766, "num_chars": 19}, {"sum_logits": -18.223133087158203, "num_tokens": 2, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -19.795040130615234, "logits_per_token": -9.111566543579102, "logits_per_char": -1.3016523633684431, "num_chars": 14}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 337, "native_id": "9-1026", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.878836154937744, "incorrect_loss_raw": 11.55918550491333, "correct_loss_per_char": 0.8398337364196777, "incorrect_loss_per_char": 1.3723198727611858, "correct_loss_per_token": 5.878836154937744, "incorrect_loss_per_token": 8.836565812428793, "correct_loss_uncond": -8.402648448944092, "incorrect_loss_uncond": -3.74425999323527}, "model_output": [{"sum_logits": -5.878836154937744, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.281484603881836, "logits_per_token": -5.878836154937744, "logits_per_char": -0.8398337364196777, "num_chars": 7}, {"sum_logits": -10.496404647827148, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.774459838867188, "logits_per_token": -10.496404647827148, "logits_per_char": -0.9542186043479226, "num_chars": 11}, {"sum_logits": -16.335718154907227, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.737049102783203, "logits_per_token": -8.167859077453613, "logits_per_char": -2.0419647693634033, "num_chars": 8}, {"sum_logits": -7.845433712005615, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.39882755279541, "logits_per_token": -7.845433712005615, "logits_per_char": -1.1207762445722307, "num_chars": 7}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 338, "native_id": "7-424", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.86996603012085, "incorrect_loss_raw": 10.342366854349772, "correct_loss_per_char": 0.41928328786577496, "incorrect_loss_per_char": 0.7723373347877439, "correct_loss_per_token": 1.9566553433736165, "incorrect_loss_per_token": 3.447455618116591, "correct_loss_uncond": -13.803675174713135, "incorrect_loss_uncond": -9.908428510030111}, "model_output": [{"sum_logits": -8.711856842041016, "num_tokens": 3, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -20.096010208129883, "logits_per_token": -2.9039522806803384, "logits_per_char": -0.5444910526275635, "num_chars": 16}, {"sum_logits": -5.86996603012085, "num_tokens": 3, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -19.673641204833984, "logits_per_token": -1.9566553433736165, "logits_per_char": -0.41928328786577496, "num_chars": 14}, {"sum_logits": -8.73034381866455, "num_tokens": 3, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -19.38872718811035, "logits_per_token": -2.910114606221517, "logits_per_char": -0.7275286515553793, "num_chars": 12}, {"sum_logits": -13.58489990234375, "num_tokens": 3, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -21.267648696899414, "logits_per_token": -4.528299967447917, "logits_per_char": -1.0449923001802885, "num_chars": 13}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 339, "native_id": "9-259", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 16.783342361450195, "incorrect_loss_raw": 20.25060526529948, "correct_loss_per_char": 2.0979177951812744, "incorrect_loss_per_char": 1.4080512850892308, "correct_loss_per_token": 5.5944474538167315, "incorrect_loss_per_token": 6.061015182071262, "correct_loss_uncond": 6.667724609375, "incorrect_loss_uncond": -3.1946983337402344}, "model_output": [{"sum_logits": -16.783342361450195, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -10.115617752075195, "logits_per_token": -5.5944474538167315, "logits_per_char": -2.0979177951812744, "num_chars": 8}, {"sum_logits": -24.81071662902832, "num_tokens": 4, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -28.56928062438965, "logits_per_token": -6.20267915725708, "logits_per_char": -1.459453919354607, "num_chars": 17}, {"sum_logits": -11.300346374511719, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -16.56608009338379, "logits_per_token": -3.7667821248372397, "logits_per_char": -0.8692574134239783, "num_chars": 13}, {"sum_logits": -24.6407527923584, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -25.200550079345703, "logits_per_token": -8.213584264119467, "logits_per_char": -1.8954425224891076, "num_chars": 13}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 340, "native_id": "9-783", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.445138931274414, "incorrect_loss_raw": 8.24103577931722, "correct_loss_per_char": 1.7408564885457356, "incorrect_loss_per_char": 1.452995491027832, "correct_loss_per_token": 10.445138931274414, "incorrect_loss_per_token": 8.24103577931722, "correct_loss_uncond": -4.449108123779297, "incorrect_loss_uncond": -5.280112902323405}, "model_output": [{"sum_logits": -10.248612403869629, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -14.790234565734863, "logits_per_token": -10.248612403869629, "logits_per_char": -1.4640874862670898, "num_chars": 7}, {"sum_logits": -7.226047992706299, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -12.802062034606934, "logits_per_token": -7.226047992706299, "logits_per_char": -1.4452095985412599, "num_chars": 5}, {"sum_logits": -10.445138931274414, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -14.894247055053711, "logits_per_token": -10.445138931274414, "logits_per_char": -1.7408564885457356, "num_chars": 6}, {"sum_logits": -7.248446941375732, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -12.971149444580078, "logits_per_token": -7.248446941375732, "logits_per_char": -1.4496893882751465, "num_chars": 5}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 341, "native_id": "1088", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 19.397584915161133, "incorrect_loss_raw": 6.023443857828776, "correct_loss_per_char": 1.3855417796543665, "incorrect_loss_per_char": 0.698085347811381, "correct_loss_per_token": 9.698792457580566, "incorrect_loss_per_token": 5.089365005493164, "correct_loss_uncond": -5.24189567565918, "incorrect_loss_uncond": -10.538147608439127}, "model_output": [{"sum_logits": -5.604473114013672, "num_tokens": 2, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -23.5694580078125, "logits_per_token": -2.802236557006836, "logits_per_char": -0.4003195081438337, "num_chars": 14}, {"sum_logits": -19.397584915161133, "num_tokens": 2, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -24.639480590820312, "logits_per_token": -9.698792457580566, "logits_per_char": -1.3855417796543665, "num_chars": 14}, {"sum_logits": -7.5994367599487305, "num_tokens": 1, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -13.184250831604004, "logits_per_token": -7.5994367599487305, "logits_per_char": -1.0856338228498186, "num_chars": 7}, {"sum_logits": -4.866421699523926, "num_tokens": 1, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -12.931065559387207, "logits_per_token": -4.866421699523926, "logits_per_char": -0.6083027124404907, "num_chars": 8}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 342, "native_id": "1387", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 41.370540618896484, "incorrect_loss_raw": 32.42506663004557, "correct_loss_per_char": 0.8802242684871593, "incorrect_loss_per_char": 0.7432540188664976, "correct_loss_per_token": 4.596726735432942, "incorrect_loss_per_token": 3.3395222288189514, "correct_loss_uncond": -3.2675323486328125, "incorrect_loss_uncond": -7.643225351969401}, "model_output": [{"sum_logits": -34.713233947753906, "num_tokens": 11, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -37.91510772705078, "logits_per_token": -3.1557485407049004, "logits_per_char": -0.7231923739115397, "num_chars": 48}, {"sum_logits": -24.264862060546875, "num_tokens": 8, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -38.12493896484375, "logits_per_token": -3.0331077575683594, "logits_per_char": -0.6740239461263021, "num_chars": 36}, {"sum_logits": -41.370540618896484, "num_tokens": 9, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -44.6380729675293, "logits_per_token": -4.596726735432942, "logits_per_char": -0.8802242684871593, "num_chars": 47}, {"sum_logits": -38.29710388183594, "num_tokens": 10, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -44.16482925415039, "logits_per_token": -3.8297103881835937, "logits_per_char": -0.8325457365616508, "num_chars": 46}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 343, "native_id": "7-1062", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 21.94427490234375, "incorrect_loss_raw": 15.397737820943197, "correct_loss_per_char": 0.4987335205078125, "incorrect_loss_per_char": 0.6423574867993774, "correct_loss_per_token": 3.1348964146205356, "incorrect_loss_per_token": 3.5756875197092692, "correct_loss_uncond": -17.55646514892578, "incorrect_loss_uncond": -13.692399660746256}, "model_output": [{"sum_logits": -21.94427490234375, "num_tokens": 7, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -39.50074005126953, "logits_per_token": -3.1348964146205356, "logits_per_char": -0.4987335205078125, "num_chars": 44}, {"sum_logits": -10.026549339294434, "num_tokens": 4, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -30.301380157470703, "logits_per_token": -2.5066373348236084, "logits_per_char": -0.3856365130497859, "num_chars": 26}, {"sum_logits": -16.424816131591797, "num_tokens": 5, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -26.908475875854492, "logits_per_token": -3.2849632263183595, "logits_per_char": -0.7821341015043712, "num_chars": 21}, {"sum_logits": -19.74184799194336, "num_tokens": 4, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -30.060556411743164, "logits_per_token": -4.93546199798584, "logits_per_char": -0.7593018458439753, "num_chars": 26}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 344, "native_id": "676", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 12.87384033203125, "incorrect_loss_raw": 15.430704434712728, "correct_loss_per_char": 0.8582560221354166, "incorrect_loss_per_char": 1.1227600113958374, "correct_loss_per_token": 4.291280110677083, "incorrect_loss_per_token": 5.1435681449042425, "correct_loss_uncond": -10.073795318603516, "incorrect_loss_uncond": -6.645119667053223}, "model_output": [{"sum_logits": -11.106980323791504, "num_tokens": 3, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -17.925310134887695, "logits_per_token": -3.702326774597168, "logits_per_char": -0.8543831018301157, "num_chars": 13}, {"sum_logits": -18.783546447753906, "num_tokens": 3, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -26.10464096069336, "logits_per_token": -6.261182149251302, "logits_per_char": -1.2522364298502604, "num_chars": 15}, {"sum_logits": -16.401586532592773, "num_tokens": 3, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -22.197521209716797, "logits_per_token": -5.467195510864258, "logits_per_char": -1.2616605025071363, "num_chars": 13}, {"sum_logits": -12.87384033203125, "num_tokens": 3, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -22.947635650634766, "logits_per_token": -4.291280110677083, "logits_per_char": -0.8582560221354166, "num_chars": 15}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 345, "native_id": "1998", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.698287963867188, "incorrect_loss_raw": 6.64759635925293, "correct_loss_per_char": 0.6465525309244792, "incorrect_loss_per_char": 0.9467226966979011, "correct_loss_per_token": 4.849143981933594, "incorrect_loss_per_token": 6.64759635925293, "correct_loss_uncond": -9.66141128540039, "incorrect_loss_uncond": -6.603288014729817}, "model_output": [{"sum_logits": -2.973966121673584, "num_tokens": 1, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -12.941990852355957, "logits_per_token": -2.973966121673584, "logits_per_char": -0.4248523030962263, "num_chars": 7}, {"sum_logits": -9.698287963867188, "num_tokens": 2, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -19.359699249267578, "logits_per_token": -4.849143981933594, "logits_per_char": -0.6465525309244792, "num_chars": 15}, {"sum_logits": -7.061110019683838, "num_tokens": 1, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -12.37353515625, "logits_per_token": -7.061110019683838, "logits_per_char": -1.1768516699473064, "num_chars": 6}, {"sum_logits": -9.907712936401367, "num_tokens": 1, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -14.437127113342285, "logits_per_token": -9.907712936401367, "logits_per_char": -1.238464117050171, "num_chars": 8}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 346, "native_id": "1698", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 17.718149185180664, "incorrect_loss_raw": 6.286049842834473, "correct_loss_per_char": 1.0422440697165096, "incorrect_loss_per_char": 1.1098450024922688, "correct_loss_per_token": 5.906049728393555, "incorrect_loss_per_token": 6.286049842834473, "correct_loss_uncond": -4.134370803833008, "incorrect_loss_uncond": -5.1234925587972}, "model_output": [{"sum_logits": -17.718149185180664, "num_tokens": 3, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -21.852519989013672, "logits_per_token": -5.906049728393555, "logits_per_char": -1.0422440697165096, "num_chars": 17}, {"sum_logits": -7.085649490356445, "num_tokens": 1, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -9.888992309570312, "logits_per_token": -7.085649490356445, "logits_per_char": -1.1809415817260742, "num_chars": 6}, {"sum_logits": -6.177197456359863, "num_tokens": 1, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -12.399619102478027, "logits_per_token": -6.177197456359863, "logits_per_char": -1.0295329093933105, "num_chars": 6}, {"sum_logits": -5.595302581787109, "num_tokens": 1, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -11.94001579284668, "logits_per_token": -5.595302581787109, "logits_per_char": -1.1190605163574219, "num_chars": 5}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 347, "native_id": "490", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.419283866882324, "incorrect_loss_raw": 16.435265858968098, "correct_loss_per_char": 1.118273655573527, "incorrect_loss_per_char": 1.115898689313492, "correct_loss_per_token": 6.709641933441162, "incorrect_loss_per_token": 8.217632929484049, "correct_loss_uncond": -2.6341753005981445, "incorrect_loss_uncond": -4.544016520182292}, "model_output": [{"sum_logits": -13.419283866882324, "num_tokens": 2, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -16.05345916748047, "logits_per_token": -6.709641933441162, "logits_per_char": -1.118273655573527, "num_chars": 12}, {"sum_logits": -12.293132781982422, "num_tokens": 2, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -18.087467193603516, "logits_per_token": -6.146566390991211, "logits_per_char": -0.9456255986140325, "num_chars": 13}, {"sum_logits": -16.183969497680664, "num_tokens": 2, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -21.203243255615234, "logits_per_token": -8.091984748840332, "logits_per_char": -1.2449207305908203, "num_chars": 13}, {"sum_logits": -20.82869529724121, "num_tokens": 2, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -23.647136688232422, "logits_per_token": -10.414347648620605, "logits_per_char": -1.1571497387356229, "num_chars": 18}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 348, "native_id": "844", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 16.051898956298828, "incorrect_loss_raw": 16.219640254974365, "correct_loss_per_char": 1.0701265970865885, "incorrect_loss_per_char": 1.115096240573459, "correct_loss_per_token": 8.025949478149414, "incorrect_loss_per_token": 6.732806046803792, "correct_loss_uncond": -9.637332916259766, "incorrect_loss_uncond": -5.403575738271077}, "model_output": [{"sum_logits": -16.051898956298828, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -25.689231872558594, "logits_per_token": -8.025949478149414, "logits_per_char": -1.0701265970865885, "num_chars": 15}, {"sum_logits": -19.27198028564453, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -27.78561782836914, "logits_per_token": -6.423993428548177, "logits_per_char": -1.2847986857096354, "num_chars": 15}, {"sum_logits": -23.418773651123047, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -24.931121826171875, "logits_per_token": -7.806257883707683, "logits_per_char": -1.4636733531951904, "num_chars": 16}, {"sum_logits": -5.968166828155518, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -12.152908325195312, "logits_per_token": -5.968166828155518, "logits_per_char": -0.5968166828155518, "num_chars": 10}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 349, "native_id": "1795", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.594961166381836, "incorrect_loss_raw": 7.487931728363037, "correct_loss_per_char": 1.0743701457977295, "incorrect_loss_per_char": 1.2702496157752143, "correct_loss_per_token": 2.148740291595459, "incorrect_loss_per_token": 4.424844053056505, "correct_loss_uncond": -5.788521766662598, "incorrect_loss_uncond": -6.647334893544515}, "model_output": [{"sum_logits": -7.036871433258057, "num_tokens": 3, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -14.798407554626465, "logits_per_token": -2.345623811086019, "logits_per_char": -0.7036871433258056, "num_chars": 10}, {"sum_logits": -8.996030807495117, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -15.05756950378418, "logits_per_token": -4.498015403747559, "logits_per_char": -1.4993384679158528, "num_chars": 6}, {"sum_logits": -8.594961166381836, "num_tokens": 4, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -14.383482933044434, "logits_per_token": -2.148740291595459, "logits_per_char": -1.0743701457977295, "num_chars": 8}, {"sum_logits": -6.4308929443359375, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -12.549822807312012, "logits_per_token": -6.4308929443359375, "logits_per_char": -1.6077232360839844, "num_chars": 4}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 350, "native_id": "1508", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.683930397033691, "incorrect_loss_raw": 7.836892922719319, "correct_loss_per_char": 0.7426589330037435, "incorrect_loss_per_char": 0.7217368761698406, "correct_loss_per_token": 3.3419651985168457, "incorrect_loss_per_token": 5.5708088874816895, "correct_loss_uncond": -10.208250999450684, "incorrect_loss_uncond": -6.245558897654216}, "model_output": [{"sum_logits": -3.4680862426757812, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -12.46367073059082, "logits_per_token": -3.4680862426757812, "logits_per_char": -0.6936172485351563, "num_chars": 5}, {"sum_logits": -6.683930397033691, "num_tokens": 2, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -16.892181396484375, "logits_per_token": -3.3419651985168457, "logits_per_char": -0.7426589330037435, "num_chars": 9}, {"sum_logits": -13.596504211425781, "num_tokens": 2, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -16.522768020629883, "logits_per_token": -6.798252105712891, "logits_per_char": -0.75536134507921, "num_chars": 18}, {"sum_logits": -6.4460883140563965, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -13.260916709899902, "logits_per_token": -6.4460883140563965, "logits_per_char": -0.7162320348951552, "num_chars": 9}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 351, "native_id": "9-289", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.805051803588867, "incorrect_loss_raw": 5.4385732014973955, "correct_loss_per_char": 1.9610103607177733, "incorrect_loss_per_char": 0.7714930534362793, "correct_loss_per_token": 9.805051803588867, "incorrect_loss_per_token": 5.4385732014973955, "correct_loss_uncond": -6.091375350952148, "incorrect_loss_uncond": -7.664685567220052}, "model_output": [{"sum_logits": -3.953803539276123, "num_tokens": 1, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -12.05653190612793, "logits_per_token": -3.953803539276123, "logits_per_char": -0.6589672565460205, "num_chars": 6}, {"sum_logits": -6.0721116065979, "num_tokens": 1, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -13.803878784179688, "logits_per_token": -6.0721116065979, "logits_per_char": -0.6072111606597901, "num_chars": 10}, {"sum_logits": -6.289804458618164, "num_tokens": 1, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -13.449365615844727, "logits_per_token": -6.289804458618164, "logits_per_char": -1.0483007431030273, "num_chars": 6}, {"sum_logits": -9.805051803588867, "num_tokens": 1, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -15.896427154541016, "logits_per_token": -9.805051803588867, "logits_per_char": -1.9610103607177733, "num_chars": 5}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 352, "native_id": "9-668", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.9778363704681396, "incorrect_loss_raw": 8.377026875813803, "correct_loss_per_char": 0.7955672740936279, "incorrect_loss_per_char": 1.6754053751627602, "correct_loss_per_token": 3.9778363704681396, "incorrect_loss_per_token": 8.377026875813803, "correct_loss_uncond": -7.891245126724243, "incorrect_loss_uncond": -5.026683807373047}, "model_output": [{"sum_logits": -8.996288299560547, "num_tokens": 1, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -14.269713401794434, "logits_per_token": -8.996288299560547, "logits_per_char": -1.7992576599121093, "num_chars": 5}, {"sum_logits": -7.230630874633789, "num_tokens": 1, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -13.12280559539795, "logits_per_token": -7.230630874633789, "logits_per_char": -1.4461261749267578, "num_chars": 5}, {"sum_logits": -3.9778363704681396, "num_tokens": 1, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -11.869081497192383, "logits_per_token": -3.9778363704681396, "logits_per_char": -0.7955672740936279, "num_chars": 5}, {"sum_logits": -8.90416145324707, "num_tokens": 1, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -12.818613052368164, "logits_per_token": -8.90416145324707, "logits_per_char": -1.780832290649414, "num_chars": 5}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 353, "native_id": "7-364", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 27.092548370361328, "incorrect_loss_raw": 25.964197794596355, "correct_loss_per_char": 0.7968396579518038, "incorrect_loss_per_char": 1.0712948215180549, "correct_loss_per_token": 4.515424728393555, "incorrect_loss_per_token": 5.545344443548292, "correct_loss_uncond": -11.49344253540039, "incorrect_loss_uncond": -6.330435434977214}, "model_output": [{"sum_logits": -27.092548370361328, "num_tokens": 6, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -38.58599090576172, "logits_per_token": -4.515424728393555, "logits_per_char": -0.7968396579518038, "num_chars": 34}, {"sum_logits": -26.479740142822266, "num_tokens": 7, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -35.641082763671875, "logits_per_token": -3.782820020403181, "logits_per_char": -0.8274918794631958, "num_chars": 32}, {"sum_logits": -28.251678466796875, "num_tokens": 4, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -35.94561004638672, "logits_per_token": -7.062919616699219, "logits_per_char": -1.228333846382473, "num_chars": 23}, {"sum_logits": -23.161174774169922, "num_tokens": 4, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -25.29720687866211, "logits_per_token": -5.7902936935424805, "logits_per_char": -1.1580587387084962, "num_chars": 20}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 354, "native_id": "1271", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.6498117446899414, "incorrect_loss_raw": 3.7096428871154785, "correct_loss_per_char": 0.6083019574483236, "incorrect_loss_per_char": 0.5025451677816886, "correct_loss_per_token": 3.6498117446899414, "incorrect_loss_per_token": 3.7096428871154785, "correct_loss_uncond": -6.903359413146973, "incorrect_loss_uncond": -6.8009012540181475}, "model_output": [{"sum_logits": -3.5071358680725098, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -10.08270263671875, "logits_per_token": -3.5071358680725098, "logits_per_char": -0.7014271736145019, "num_chars": 5}, {"sum_logits": -3.6498117446899414, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -10.553171157836914, "logits_per_token": -3.6498117446899414, "logits_per_char": -0.6083019574483236, "num_chars": 6}, {"sum_logits": -3.6591782569885254, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -10.294792175292969, "logits_per_token": -3.6591782569885254, "logits_per_char": -0.36591782569885256, "num_chars": 10}, {"sum_logits": -3.9626145362854004, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -11.15413761138916, "logits_per_token": -3.9626145362854004, "logits_per_char": -0.44029050403171116, "num_chars": 9}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 355, "native_id": "9-1117", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 9.122467041015625, "incorrect_loss_raw": 10.56358273824056, "correct_loss_per_char": 1.0136074490017362, "incorrect_loss_per_char": 1.4318942705790203, "correct_loss_per_token": 3.0408223470052085, "incorrect_loss_per_token": 7.045265515645345, "correct_loss_uncond": -2.841796875, "incorrect_loss_uncond": -2.594437599182129}, "model_output": [{"sum_logits": -9.122467041015625, "num_tokens": 3, "num_tokens_all": 124, "is_greedy": false, "sum_logits_uncond": -11.964263916015625, "logits_per_token": -3.0408223470052085, "logits_per_char": -1.0136074490017362, "num_chars": 9}, {"sum_logits": -12.489034652709961, "num_tokens": 2, "num_tokens_all": 123, "is_greedy": false, "sum_logits_uncond": -16.505966186523438, "logits_per_token": -6.2445173263549805, "logits_per_char": -1.248903465270996, "num_chars": 10}, {"sum_logits": -10.58084487915039, "num_tokens": 1, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -11.26740550994873, "logits_per_token": -10.58084487915039, "logits_per_char": -1.3226056098937988, "num_chars": 8}, {"sum_logits": -8.620868682861328, "num_tokens": 2, "num_tokens_all": 123, "is_greedy": false, "sum_logits_uncond": -11.700689315795898, "logits_per_token": -4.310434341430664, "logits_per_char": -1.7241737365722656, "num_chars": 5}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 356, "native_id": "35", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.603633403778076, "incorrect_loss_raw": 7.933247884114583, "correct_loss_per_char": 0.9504541754722595, "incorrect_loss_per_char": 1.4439199924468993, "correct_loss_per_token": 7.603633403778076, "incorrect_loss_per_token": 7.933247884114583, "correct_loss_uncond": -3.0169968605041504, "incorrect_loss_uncond": -1.9166113535563152}, "model_output": [{"sum_logits": -7.603633403778076, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -10.620630264282227, "logits_per_token": -7.603633403778076, "logits_per_char": -0.9504541754722595, "num_chars": 8}, {"sum_logits": -8.009441375732422, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -10.634567260742188, "logits_per_token": -8.009441375732422, "logits_per_char": -1.6018882751464845, "num_chars": 5}, {"sum_logits": -7.672276496887207, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -8.620218276977539, "logits_per_token": -7.672276496887207, "logits_per_char": -1.9180691242218018, "num_chars": 4}, {"sum_logits": -8.118025779724121, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -10.294792175292969, "logits_per_token": -8.118025779724121, "logits_per_char": -0.8118025779724121, "num_chars": 10}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 357, "native_id": "1660", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 14.583902359008789, "incorrect_loss_raw": 24.727738698323567, "correct_loss_per_char": 0.34723577045259024, "incorrect_loss_per_char": 0.5218888528165762, "correct_loss_per_token": 1.8229877948760986, "incorrect_loss_per_token": 2.9200884115816366, "correct_loss_uncond": -25.98649024963379, "incorrect_loss_uncond": -19.02947489420573}, "model_output": [{"sum_logits": -17.990623474121094, "num_tokens": 6, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -32.49632263183594, "logits_per_token": -2.998437245686849, "logits_per_char": -0.47343745984529195, "num_chars": 38}, {"sum_logits": -14.583902359008789, "num_tokens": 8, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -40.57039260864258, "logits_per_token": -1.8229877948760986, "logits_per_char": -0.34723577045259024, "num_chars": 42}, {"sum_logits": -23.848773956298828, "num_tokens": 11, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -46.502864837646484, "logits_per_token": -2.16807035966353, "logits_per_char": -0.4183995430929619, "num_chars": 57}, {"sum_logits": -32.34381866455078, "num_tokens": 9, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -52.27245330810547, "logits_per_token": -3.5937576293945312, "logits_per_char": -0.6738295555114746, "num_chars": 48}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 358, "native_id": "7-710", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 19.470319747924805, "incorrect_loss_raw": 17.331087112426758, "correct_loss_per_char": 0.846535641214122, "incorrect_loss_per_char": 0.8003176684652514, "correct_loss_per_token": 4.867579936981201, "incorrect_loss_per_token": 3.706765143076579, "correct_loss_uncond": -15.383413314819336, "incorrect_loss_uncond": -11.332732518513998}, "model_output": [{"sum_logits": -14.432863235473633, "num_tokens": 4, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -28.39300537109375, "logits_per_token": -3.608215808868408, "logits_per_char": -0.8018257353040907, "num_chars": 18}, {"sum_logits": -19.470319747924805, "num_tokens": 4, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -34.85373306274414, "logits_per_token": -4.867579936981201, "logits_per_char": -0.846535641214122, "num_chars": 23}, {"sum_logits": -19.829984664916992, "num_tokens": 5, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -28.756183624267578, "logits_per_token": -3.9659969329833986, "logits_per_char": -0.7931993865966797, "num_chars": 25}, {"sum_logits": -17.73041343688965, "num_tokens": 5, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -28.842269897460938, "logits_per_token": -3.54608268737793, "logits_per_char": -0.805927883494984, "num_chars": 22}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 359, "native_id": "8-52", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 33.14241027832031, "incorrect_loss_raw": 30.876113891601562, "correct_loss_per_char": 1.0691100089780745, "incorrect_loss_per_char": 0.9204795563813297, "correct_loss_per_token": 4.142801284790039, "incorrect_loss_per_token": 4.353933581599483, "correct_loss_uncond": -16.32982635498047, "incorrect_loss_uncond": -7.705518086751302}, "model_output": [{"sum_logits": -31.483354568481445, "num_tokens": 6, "num_tokens_all": 124, "is_greedy": false, "sum_logits_uncond": -38.69377136230469, "logits_per_token": -5.247225761413574, "logits_per_char": -1.0155920828542402, "num_chars": 31}, {"sum_logits": -33.14241027832031, "num_tokens": 8, "num_tokens_all": 126, "is_greedy": false, "sum_logits_uncond": -49.47223663330078, "logits_per_token": -4.142801284790039, "logits_per_char": -1.0691100089780745, "num_chars": 31}, {"sum_logits": -32.15165710449219, "num_tokens": 7, "num_tokens_all": 125, "is_greedy": false, "sum_logits_uncond": -41.238365173339844, "logits_per_token": -4.5930938720703125, "logits_per_char": -0.8931015862358941, "num_chars": 36}, {"sum_logits": -28.993330001831055, "num_tokens": 9, "num_tokens_all": 127, "is_greedy": false, "sum_logits_uncond": -35.81275939941406, "logits_per_token": -3.221481111314562, "logits_per_char": -0.8527450000538546, "num_chars": 34}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 360, "native_id": "9-1167", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.7884631156921387, "incorrect_loss_raw": 6.321542104085286, "correct_loss_per_char": 0.47355788946151733, "incorrect_loss_per_char": 1.2245295418633355, "correct_loss_per_token": 3.7884631156921387, "incorrect_loss_per_token": 6.321542104085286, "correct_loss_uncond": -10.064362049102783, "incorrect_loss_uncond": -6.939334869384766}, "model_output": [{"sum_logits": -4.3058977127075195, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -13.778168678283691, "logits_per_token": -4.3058977127075195, "logits_per_char": -0.8611795425415039, "num_chars": 5}, {"sum_logits": -11.078629493713379, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -12.072437286376953, "logits_per_token": -11.078629493713379, "logits_per_char": -2.215725898742676, "num_chars": 5}, {"sum_logits": -3.580099105834961, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -13.932024955749512, "logits_per_token": -3.580099105834961, "logits_per_char": -0.5966831843058268, "num_chars": 6}, {"sum_logits": -3.7884631156921387, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -13.852825164794922, "logits_per_token": -3.7884631156921387, "logits_per_char": -0.47355788946151733, "num_chars": 8}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 361, "native_id": "8-43", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 21.04482650756836, "incorrect_loss_raw": 34.49587949117025, "correct_loss_per_char": 0.7014942169189453, "incorrect_loss_per_char": 1.1048522564488596, "correct_loss_per_token": 4.208965301513672, "incorrect_loss_per_token": 6.897600777943929, "correct_loss_uncond": -8.80615234375, "incorrect_loss_uncond": -2.528909683227539}, "model_output": [{"sum_logits": -27.493093490600586, "num_tokens": 4, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -28.51015853881836, "logits_per_token": -6.8732733726501465, "logits_per_char": -1.0997237396240234, "num_chars": 25}, {"sum_logits": -41.38140106201172, "num_tokens": 6, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -43.7265625, "logits_per_token": -6.896900177001953, "logits_per_char": -1.0610615656926081, "num_chars": 39}, {"sum_logits": -34.61314392089844, "num_tokens": 5, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -38.837646484375, "logits_per_token": -6.922628784179688, "logits_per_char": -1.153771464029948, "num_chars": 30}, {"sum_logits": -21.04482650756836, "num_tokens": 5, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -29.85097885131836, "logits_per_token": -4.208965301513672, "logits_per_char": -0.7014942169189453, "num_chars": 30}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 362, "native_id": "9-57", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.370084762573242, "incorrect_loss_raw": 8.097935676574707, "correct_loss_per_char": 0.7671549660818917, "incorrect_loss_per_char": 1.1877319211051576, "correct_loss_per_token": 5.370084762573242, "incorrect_loss_per_token": 8.097935676574707, "correct_loss_uncond": -6.397276878356934, "incorrect_loss_uncond": -4.636544863382976}, "model_output": [{"sum_logits": -11.214377403259277, "num_tokens": 1, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -13.72259521484375, "logits_per_token": -11.214377403259277, "logits_per_char": -1.4017971754074097, "num_chars": 8}, {"sum_logits": -5.370084762573242, "num_tokens": 1, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -11.767361640930176, "logits_per_token": -5.370084762573242, "logits_per_char": -0.7671549660818917, "num_chars": 7}, {"sum_logits": -7.95352840423584, "num_tokens": 1, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -13.184515953063965, "logits_per_token": -7.95352840423584, "logits_per_char": -1.1362183434622628, "num_chars": 7}, {"sum_logits": -5.125901222229004, "num_tokens": 1, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -11.296330451965332, "logits_per_token": -5.125901222229004, "logits_per_char": -1.0251802444458007, "num_chars": 5}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 363, "native_id": "1411", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.801020622253418, "incorrect_loss_raw": 15.192371050516764, "correct_loss_per_char": 0.6900510311126709, "incorrect_loss_per_char": 0.7856739524322137, "correct_loss_per_token": 4.600340207417806, "incorrect_loss_per_token": 6.329730192820231, "correct_loss_uncond": -11.595398902893066, "incorrect_loss_uncond": -8.990974108378092}, "model_output": [{"sum_logits": -13.801020622253418, "num_tokens": 3, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -25.396419525146484, "logits_per_token": -4.600340207417806, "logits_per_char": -0.6900510311126709, "num_chars": 20}, {"sum_logits": -22.79619598388672, "num_tokens": 3, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -27.58775520324707, "logits_per_token": -7.598731994628906, "logits_per_char": -1.1997997886256169, "num_chars": 19}, {"sum_logits": -12.050857543945312, "num_tokens": 2, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -21.133464813232422, "logits_per_token": -6.025428771972656, "logits_per_char": -0.6694920857747396, "num_chars": 18}, {"sum_logits": -10.730059623718262, "num_tokens": 2, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -23.828815460205078, "logits_per_token": -5.365029811859131, "logits_per_char": -0.4877299828962846, "num_chars": 22}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 364, "native_id": "9-206", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.030318260192871, "incorrect_loss_raw": 13.474278450012207, "correct_loss_per_char": 0.7715629430917593, "incorrect_loss_per_char": 0.9030154372463707, "correct_loss_per_token": 3.3434394200642905, "incorrect_loss_per_token": 4.625483512878418, "correct_loss_uncond": -7.036715507507324, "incorrect_loss_uncond": -5.070423444112142}, "model_output": [{"sum_logits": -14.145059585571289, "num_tokens": 4, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -21.18268585205078, "logits_per_token": -3.5362648963928223, "logits_per_char": -0.832062328563017, "num_chars": 17}, {"sum_logits": -9.485562324523926, "num_tokens": 2, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -16.70199203491211, "logits_per_token": -4.742781162261963, "logits_per_char": -0.6775401660374233, "num_chars": 14}, {"sum_logits": -10.030318260192871, "num_tokens": 3, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -17.067033767700195, "logits_per_token": -3.3434394200642905, "logits_per_char": -0.7715629430917593, "num_chars": 13}, {"sum_logits": -16.792213439941406, "num_tokens": 3, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -17.749427795410156, "logits_per_token": -5.597404479980469, "logits_per_char": -1.1994438171386719, "num_chars": 14}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 365, "native_id": "7-740", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 18.22633171081543, "incorrect_loss_raw": 18.626924514770508, "correct_loss_per_char": 0.8679205576578776, "incorrect_loss_per_char": 0.9816016311979193, "correct_loss_per_token": 3.0377219518025718, "incorrect_loss_per_token": 4.656731128692627, "correct_loss_uncond": -11.882345199584961, "incorrect_loss_uncond": -8.909833908081055}, "model_output": [{"sum_logits": -15.151407241821289, "num_tokens": 4, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -26.494670867919922, "logits_per_token": -3.7878518104553223, "logits_per_char": -0.7974424864116468, "num_chars": 19}, {"sum_logits": -18.552288055419922, "num_tokens": 4, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -26.86964225769043, "logits_per_token": -4.6380720138549805, "logits_per_char": -1.091311062083525, "num_chars": 17}, {"sum_logits": -22.177078247070312, "num_tokens": 4, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -29.245962142944336, "logits_per_token": -5.544269561767578, "logits_per_char": -1.0560513450985862, "num_chars": 21}, {"sum_logits": -18.22633171081543, "num_tokens": 6, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -30.10867691040039, "logits_per_token": -3.0377219518025718, "logits_per_char": -0.8679205576578776, "num_chars": 21}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 366, "native_id": "1774", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 41.87460708618164, "incorrect_loss_raw": 29.293659210205078, "correct_loss_per_char": 0.7900869261543706, "incorrect_loss_per_char": 0.7823873020353771, "correct_loss_per_token": 4.187460708618164, "incorrect_loss_per_token": 3.493390984005398, "correct_loss_uncond": -6.891696929931641, "incorrect_loss_uncond": -8.773192087809244}, "model_output": [{"sum_logits": -36.356346130371094, "num_tokens": 9, "num_tokens_all": 124, "is_greedy": false, "sum_logits_uncond": -44.17609405517578, "logits_per_token": -4.039594014485677, "logits_per_char": -0.7574238777160645, "num_chars": 48}, {"sum_logits": -41.87460708618164, "num_tokens": 10, "num_tokens_all": 125, "is_greedy": false, "sum_logits_uncond": -48.76630401611328, "logits_per_token": -4.187460708618164, "logits_per_char": -0.7900869261543706, "num_chars": 53}, {"sum_logits": -26.64547348022461, "num_tokens": 8, "num_tokens_all": 123, "is_greedy": false, "sum_logits_uncond": -37.944541931152344, "logits_per_token": -3.330684185028076, "logits_per_char": -0.7011966705322266, "num_chars": 38}, {"sum_logits": -24.87915802001953, "num_tokens": 8, "num_tokens_all": 123, "is_greedy": false, "sum_logits_uncond": -32.079917907714844, "logits_per_token": -3.1098947525024414, "logits_per_char": -0.8885413578578404, "num_chars": 28}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 367, "native_id": "7-93", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 16.744903564453125, "incorrect_loss_raw": 9.683489004770914, "correct_loss_per_char": 1.6744903564453124, "incorrect_loss_per_char": 0.6368577692243788, "correct_loss_per_token": 8.372451782226562, "incorrect_loss_per_token": 4.095204173194038, "correct_loss_uncond": -0.3758506774902344, "incorrect_loss_uncond": -6.466768105824788}, "model_output": [{"sum_logits": -7.9072747230529785, "num_tokens": 3, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -16.05129623413086, "logits_per_token": -2.6357582410176597, "logits_per_char": -0.4392930401696099, "num_chars": 18}, {"sum_logits": -6.776519775390625, "num_tokens": 1, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -13.479421615600586, "logits_per_token": -6.776519775390625, "logits_per_char": -0.7529466417100694, "num_chars": 9}, {"sum_logits": -16.744903564453125, "num_tokens": 2, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -17.12075424194336, "logits_per_token": -8.372451782226562, "logits_per_char": -1.6744903564453124, "num_chars": 10}, {"sum_logits": -14.36667251586914, "num_tokens": 5, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -18.920053482055664, "logits_per_token": -2.873334503173828, "logits_per_char": -0.718333625793457, "num_chars": 20}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 368, "native_id": "8-97", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 12.141294479370117, "incorrect_loss_raw": 23.799549738566082, "correct_loss_per_char": 0.4856517791748047, "incorrect_loss_per_char": 0.9661422098237678, "correct_loss_per_token": 3.0353236198425293, "incorrect_loss_per_token": 5.498096497853598, "correct_loss_uncond": -18.519176483154297, "incorrect_loss_uncond": -12.39437739054362}, "model_output": [{"sum_logits": -27.10745620727539, "num_tokens": 5, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -45.053466796875, "logits_per_token": -5.421491241455078, "logits_per_char": -1.042594469510592, "num_chars": 26}, {"sum_logits": -12.141294479370117, "num_tokens": 4, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -30.660470962524414, "logits_per_token": -3.0353236198425293, "logits_per_char": -0.4856517791748047, "num_chars": 25}, {"sum_logits": -16.617929458618164, "num_tokens": 4, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -27.296663284301758, "logits_per_token": -4.154482364654541, "logits_per_char": -0.8308964729309082, "num_chars": 20}, {"sum_logits": -27.673263549804688, "num_tokens": 4, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -36.231651306152344, "logits_per_token": -6.918315887451172, "logits_per_char": -1.0249356870298032, "num_chars": 27}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 369, "native_id": "9-813", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 32.28300476074219, "incorrect_loss_raw": 17.345102945963543, "correct_loss_per_char": 0.7337046536532316, "incorrect_loss_per_char": 0.8219341698640622, "correct_loss_per_token": 4.611857822963169, "incorrect_loss_per_token": 3.9883708424038358, "correct_loss_uncond": -10.977672576904297, "incorrect_loss_uncond": -5.211057027180989}, "model_output": [{"sum_logits": -11.706714630126953, "num_tokens": 3, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -11.962844848632812, "logits_per_token": -3.902238210042318, "logits_per_char": -0.9005165100097656, "num_chars": 13}, {"sum_logits": -32.28300476074219, "num_tokens": 7, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -43.260677337646484, "logits_per_token": -4.611857822963169, "logits_per_char": -0.7337046536532316, "num_chars": 44}, {"sum_logits": -16.09730339050293, "num_tokens": 4, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -24.944107055664062, "logits_per_token": -4.024325847625732, "logits_per_char": -0.699882756108823, "num_chars": 23}, {"sum_logits": -24.231290817260742, "num_tokens": 6, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -30.76152801513672, "logits_per_token": -4.038548469543457, "logits_per_char": -0.865403243473598, "num_chars": 28}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 370, "native_id": "9-686", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 15.512176513671875, "incorrect_loss_raw": 9.157040278116861, "correct_loss_per_char": 1.0341451009114584, "incorrect_loss_per_char": 1.203006244841076, "correct_loss_per_token": 5.170725504557292, "incorrect_loss_per_token": 9.157040278116861, "correct_loss_uncond": -9.090614318847656, "incorrect_loss_uncond": -4.195768674214681}, "model_output": [{"sum_logits": -9.395312309265137, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -13.943110466003418, "logits_per_token": -9.395312309265137, "logits_per_char": -1.174414038658142, "num_chars": 8}, {"sum_logits": -15.512176513671875, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -24.60279083251953, "logits_per_token": -5.170725504557292, "logits_per_char": -1.0341451009114584, "num_chars": 15}, {"sum_logits": -9.80720329284668, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -13.184250831604004, "logits_per_token": -9.80720329284668, "logits_per_char": -1.40102904183524, "num_chars": 7}, {"sum_logits": -8.26860523223877, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -12.931065559387207, "logits_per_token": -8.26860523223877, "logits_per_char": -1.0335756540298462, "num_chars": 8}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 371, "native_id": "9-799", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.5646843910217285, "incorrect_loss_raw": 8.32503334681193, "correct_loss_per_char": 0.42805264546320987, "incorrect_loss_per_char": 1.4368544941856747, "correct_loss_per_token": 2.7823421955108643, "incorrect_loss_per_token": 8.32503334681193, "correct_loss_uncond": -11.205249309539795, "incorrect_loss_uncond": -5.4256313641866045}, "model_output": [{"sum_logits": -5.148592948913574, "num_tokens": 1, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -12.802062034606934, "logits_per_token": -5.148592948913574, "logits_per_char": -1.0297185897827148, "num_chars": 5}, {"sum_logits": -11.977989196777344, "num_tokens": 1, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -15.478782653808594, "logits_per_token": -11.977989196777344, "logits_per_char": -1.7111413138253349, "num_chars": 7}, {"sum_logits": -5.5646843910217285, "num_tokens": 2, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -16.769933700561523, "logits_per_token": -2.7823421955108643, "logits_per_char": -0.42805264546320987, "num_chars": 13}, {"sum_logits": -7.848517894744873, "num_tokens": 1, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -12.971149444580078, "logits_per_token": -7.848517894744873, "logits_per_char": -1.5697035789489746, "num_chars": 5}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 372, "native_id": "1179", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 45.28023147583008, "incorrect_loss_raw": 25.026621500651043, "correct_loss_per_char": 1.029096169905229, "incorrect_loss_per_char": 0.7827667949175594, "correct_loss_per_token": 5.031136830647786, "incorrect_loss_per_token": 3.4898285335964623, "correct_loss_uncond": -9.912151336669922, "incorrect_loss_uncond": -8.011818567911783}, "model_output": [{"sum_logits": -22.563018798828125, "num_tokens": 8, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -29.508150100708008, "logits_per_token": -2.8203773498535156, "logits_per_char": -0.7050943374633789, "num_chars": 32}, {"sum_logits": -45.28023147583008, "num_tokens": 9, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -55.1923828125, "logits_per_token": -5.031136830647786, "logits_per_char": -1.029096169905229, "num_chars": 44}, {"sum_logits": -26.488784790039062, "num_tokens": 8, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -33.21732711791992, "logits_per_token": -3.311098098754883, "logits_per_char": -0.8544769287109375, "num_chars": 31}, {"sum_logits": -26.028060913085938, "num_tokens": 6, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -36.38984298706055, "logits_per_token": -4.338010152180989, "logits_per_char": -0.7887291185783617, "num_chars": 33}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 373, "native_id": "1954", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 35.53950500488281, "incorrect_loss_raw": 34.384480794270836, "correct_loss_per_char": 0.740406354268392, "incorrect_loss_per_char": 1.0438799769915692, "correct_loss_per_token": 5.0770721435546875, "incorrect_loss_per_token": 5.594038009643555, "correct_loss_uncond": -10.492755889892578, "incorrect_loss_uncond": -8.088246663411459}, "model_output": [{"sum_logits": -35.53950500488281, "num_tokens": 7, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -46.03226089477539, "logits_per_token": -5.0770721435546875, "logits_per_char": -0.740406354268392, "num_chars": 48}, {"sum_logits": -33.86343002319336, "num_tokens": 5, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -35.361061096191406, "logits_per_token": -6.772686004638672, "logits_per_char": -0.9152278384646854, "num_chars": 37}, {"sum_logits": -36.93377685546875, "num_tokens": 8, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -46.37689971923828, "logits_per_token": -4.616722106933594, "logits_per_char": -0.971941496196546, "num_chars": 38}, {"sum_logits": -32.35623550415039, "num_tokens": 6, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -45.68022155761719, "logits_per_token": -5.392705917358398, "logits_per_char": -1.2444705963134766, "num_chars": 26}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 374, "native_id": "8-403", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.541378021240234, "incorrect_loss_raw": 15.226455688476562, "correct_loss_per_char": 0.8877983093261719, "incorrect_loss_per_char": 0.6317775073483834, "correct_loss_per_token": 3.847126007080078, "incorrect_loss_per_token": 2.758093388875326, "correct_loss_uncond": -0.4214668273925781, "incorrect_loss_uncond": -9.606170654296875}, "model_output": [{"sum_logits": -7.897097587585449, "num_tokens": 5, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -19.670574188232422, "logits_per_token": -1.5794195175170898, "logits_per_char": -0.5264731725056966, "num_chars": 15}, {"sum_logits": -25.847797393798828, "num_tokens": 6, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -31.44554901123047, "logits_per_token": -4.307966232299805, "logits_per_char": -0.9573258293999566, "num_chars": 27}, {"sum_logits": -11.541378021240234, "num_tokens": 3, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -11.962844848632812, "logits_per_token": -3.847126007080078, "logits_per_char": -0.8877983093261719, "num_chars": 13}, {"sum_logits": -11.93447208404541, "num_tokens": 5, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -23.381755828857422, "logits_per_token": -2.386894416809082, "logits_per_char": -0.4115335201394969, "num_chars": 29}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 375, "native_id": "9-576", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.9972028732299805, "incorrect_loss_raw": 10.064030329386393, "correct_loss_per_char": 0.7774669859144423, "incorrect_loss_per_char": 1.7010351847088527, "correct_loss_per_token": 6.9972028732299805, "incorrect_loss_per_token": 10.064030329386393, "correct_loss_uncond": -6.536572456359863, "incorrect_loss_uncond": -1.8407360712687175}, "model_output": [{"sum_logits": -6.9972028732299805, "num_tokens": 1, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -13.533775329589844, "logits_per_token": -6.9972028732299805, "logits_per_char": -0.7774669859144423, "num_chars": 9}, {"sum_logits": -10.957456588745117, "num_tokens": 1, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -13.08056926727295, "logits_per_token": -10.957456588745117, "logits_per_char": -2.1914913177490236, "num_chars": 5}, {"sum_logits": -12.354642868041992, "num_tokens": 1, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -12.74473762512207, "logits_per_token": -12.354642868041992, "logits_per_char": -1.7649489811488561, "num_chars": 7}, {"sum_logits": -6.87999153137207, "num_tokens": 1, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -9.888992309570312, "logits_per_token": -6.87999153137207, "logits_per_char": -1.1466652552286785, "num_chars": 6}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 376, "native_id": "9-866", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 18.018041610717773, "incorrect_loss_raw": 18.15083948771159, "correct_loss_per_char": 0.8190018913962624, "incorrect_loss_per_char": 0.7628448724746705, "correct_loss_per_token": 4.504510402679443, "incorrect_loss_per_token": 3.802272733052572, "correct_loss_uncond": -12.475345611572266, "incorrect_loss_uncond": -7.254432042439778}, "model_output": [{"sum_logits": -21.369861602783203, "num_tokens": 5, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -25.748199462890625, "logits_per_token": -4.2739723205566404, "logits_per_char": -0.8904109001159668, "num_chars": 24}, {"sum_logits": -18.018041610717773, "num_tokens": 4, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -30.49338722229004, "logits_per_token": -4.504510402679443, "logits_per_char": -0.8190018913962624, "num_chars": 22}, {"sum_logits": -19.428836822509766, "num_tokens": 4, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -28.42322540283203, "logits_per_token": -4.857209205627441, "logits_per_char": -0.9714418411254883, "num_chars": 20}, {"sum_logits": -13.653820037841797, "num_tokens": 6, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -22.044389724731445, "logits_per_token": -2.275636672973633, "logits_per_char": -0.42668187618255615, "num_chars": 32}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 377, "native_id": "7-208", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 31.79576873779297, "incorrect_loss_raw": 20.434187571207683, "correct_loss_per_char": 0.9936177730560303, "incorrect_loss_per_char": 0.7610850817141358, "correct_loss_per_token": 4.542252676827567, "incorrect_loss_per_token": 3.2387899126325337, "correct_loss_uncond": -9.375556945800781, "incorrect_loss_uncond": -10.469034830729166}, "model_output": [{"sum_logits": -16.78006362915039, "num_tokens": 5, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -29.064754486083984, "logits_per_token": -3.356012725830078, "logits_per_char": -0.9322257571750217, "num_chars": 18}, {"sum_logits": -23.686187744140625, "num_tokens": 7, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -31.807403564453125, "logits_per_token": -3.3837411063058034, "logits_per_char": -0.8167650946255388, "num_chars": 29}, {"sum_logits": -31.79576873779297, "num_tokens": 7, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -41.17132568359375, "logits_per_token": -4.542252676827567, "logits_per_char": -0.9936177730560303, "num_chars": 32}, {"sum_logits": -20.83631134033203, "num_tokens": 7, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -31.837509155273438, "logits_per_token": -2.9766159057617188, "logits_per_char": -0.5342643933418469, "num_chars": 39}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 378, "native_id": "9-771", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.429178237915039, "incorrect_loss_raw": 8.853447755177816, "correct_loss_per_char": 0.8791675567626953, "incorrect_loss_per_char": 1.587070792692679, "correct_loss_per_token": 3.809726079305013, "incorrect_loss_per_token": 7.094909191131592, "correct_loss_uncond": -4.883142471313477, "incorrect_loss_uncond": -0.8436853090922037}, "model_output": [{"sum_logits": -8.268492698669434, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -9.396711349487305, "logits_per_token": -8.268492698669434, "logits_per_char": -1.6536985397338868, "num_chars": 5}, {"sum_logits": -7.74061918258667, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -8.581192016601562, "logits_per_token": -7.74061918258667, "logits_per_char": -1.9351547956466675, "num_chars": 4}, {"sum_logits": -10.551231384277344, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -11.113495826721191, "logits_per_token": -5.275615692138672, "logits_per_char": -1.1723590426974826, "num_chars": 9}, {"sum_logits": -11.429178237915039, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -16.312320709228516, "logits_per_token": -3.809726079305013, "logits_per_char": -0.8791675567626953, "num_chars": 13}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 379, "native_id": "998", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 44.4936408996582, "incorrect_loss_raw": 39.11059824625651, "correct_loss_per_char": 0.7176393693493258, "incorrect_loss_per_char": 0.838739065164383, "correct_loss_per_token": 2.96624272664388, "incorrect_loss_per_token": 3.712654707166884, "correct_loss_uncond": -4.304958343505859, "incorrect_loss_uncond": -4.71694819132487}, "model_output": [{"sum_logits": -30.323043823242188, "num_tokens": 9, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -33.856048583984375, "logits_per_token": -3.369227091471354, "logits_per_char": -0.7979748374537418, "num_chars": 38}, {"sum_logits": -55.92828369140625, "num_tokens": 12, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -61.78715515136719, "logits_per_token": -4.6606903076171875, "logits_per_char": -1.0966330135569853, "num_chars": 51}, {"sum_logits": -44.4936408996582, "num_tokens": 15, "num_tokens_all": 123, "is_greedy": false, "sum_logits_uncond": -48.79859924316406, "logits_per_token": -2.96624272664388, "logits_per_char": -0.7176393693493258, "num_chars": 62}, {"sum_logits": -31.080467224121094, "num_tokens": 10, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -35.83943557739258, "logits_per_token": -3.1080467224121096, "logits_per_char": -0.6216093444824219, "num_chars": 50}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 380, "native_id": "433", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 34.630584716796875, "incorrect_loss_raw": 22.27350362141927, "correct_loss_per_char": 1.2368065970284599, "incorrect_loss_per_char": 0.9140764110778138, "correct_loss_per_token": 5.7717641194661455, "incorrect_loss_per_token": 4.454700724283854, "correct_loss_uncond": -9.424652099609375, "incorrect_loss_uncond": -5.479346593221028}, "model_output": [{"sum_logits": -18.40043830871582, "num_tokens": 5, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -27.970090866088867, "logits_per_token": -3.680087661743164, "logits_per_char": -0.681497715137623, "num_chars": 27}, {"sum_logits": -34.630584716796875, "num_tokens": 6, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -44.05523681640625, "logits_per_token": -5.7717641194661455, "logits_per_char": -1.2368065970284599, "num_chars": 28}, {"sum_logits": -23.86212921142578, "num_tokens": 5, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -25.881587982177734, "logits_per_token": -4.772425842285156, "logits_per_char": -1.0374838787576426, "num_chars": 23}, {"sum_logits": -24.55794334411621, "num_tokens": 5, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -29.406871795654297, "logits_per_token": -4.911588668823242, "logits_per_char": -1.0232476393381755, "num_chars": 24}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 381, "native_id": "9-508", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 23.400772094726562, "incorrect_loss_raw": 30.17447280883789, "correct_loss_per_char": 1.0636714588512073, "incorrect_loss_per_char": 0.9202299963000189, "correct_loss_per_token": 3.9001286824544272, "incorrect_loss_per_token": 6.644570350646973, "correct_loss_uncond": -10.7708740234375, "incorrect_loss_uncond": -6.326524098714192}, "model_output": [{"sum_logits": -23.400772094726562, "num_tokens": 6, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -34.17164611816406, "logits_per_token": -3.9001286824544272, "logits_per_char": -1.0636714588512073, "num_chars": 22}, {"sum_logits": -27.24618911743164, "num_tokens": 5, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -32.622859954833984, "logits_per_token": -5.449237823486328, "logits_per_char": -0.8256420944676255, "num_chars": 33}, {"sum_logits": -36.58054733276367, "num_tokens": 4, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -40.33397674560547, "logits_per_token": -9.145136833190918, "logits_per_char": -1.0451584952218191, "num_chars": 35}, {"sum_logits": -26.69668197631836, "num_tokens": 5, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -36.5461540222168, "logits_per_token": -5.339336395263672, "logits_per_char": -0.8898893992106119, "num_chars": 30}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 382, "native_id": "7-561", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.700584888458252, "incorrect_loss_raw": 7.99102258682251, "correct_loss_per_char": 0.675146222114563, "incorrect_loss_per_char": 1.2710075151352656, "correct_loss_per_token": 2.700584888458252, "incorrect_loss_per_token": 7.99102258682251, "correct_loss_uncond": -8.47200632095337, "incorrect_loss_uncond": -3.929638067881266}, "model_output": [{"sum_logits": -7.664527416229248, "num_tokens": 1, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -13.313104629516602, "logits_per_token": -7.664527416229248, "logits_per_char": -1.0949324880327498, "num_chars": 7}, {"sum_logits": -12.11391544342041, "num_tokens": 1, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -12.559885025024414, "logits_per_token": -12.11391544342041, "logits_per_char": -2.018985907236735, "num_chars": 6}, {"sum_logits": -4.194624900817871, "num_tokens": 1, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -9.888992309570312, "logits_per_token": -4.194624900817871, "logits_per_char": -0.6991041501363119, "num_chars": 6}, {"sum_logits": -2.700584888458252, "num_tokens": 1, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -11.172591209411621, "logits_per_token": -2.700584888458252, "logits_per_char": -0.675146222114563, "num_chars": 4}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 383, "native_id": "7-976", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 16.93216323852539, "incorrect_loss_raw": 15.434247175852457, "correct_loss_per_char": 0.7696437835693359, "incorrect_loss_per_char": 0.5572842487831737, "correct_loss_per_token": 3.386432647705078, "incorrect_loss_per_token": 3.7531466484069824, "correct_loss_uncond": -12.910417556762695, "incorrect_loss_uncond": -13.17032257715861}, "model_output": [{"sum_logits": -16.93216323852539, "num_tokens": 5, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -29.842580795288086, "logits_per_token": -3.386432647705078, "logits_per_char": -0.7696437835693359, "num_chars": 22}, {"sum_logits": -6.32490873336792, "num_tokens": 5, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -21.73571014404297, "logits_per_token": -1.264981746673584, "logits_per_char": -0.22588959762028285, "num_chars": 28}, {"sum_logits": -16.94418716430664, "num_tokens": 4, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -27.99277114868164, "logits_per_token": -4.23604679107666, "logits_per_char": -0.6516995063194861, "num_chars": 26}, {"sum_logits": -23.033645629882812, "num_tokens": 4, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -36.085227966308594, "logits_per_token": -5.758411407470703, "logits_per_char": -0.7942636424097521, "num_chars": 29}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 384, "native_id": "1635", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.243182182312012, "incorrect_loss_raw": 10.049790382385254, "correct_loss_per_char": 1.0347403117588587, "incorrect_loss_per_char": 1.538413828390616, "correct_loss_per_token": 7.243182182312012, "incorrect_loss_per_token": 7.17422538333469, "correct_loss_uncond": -3.815481185913086, "incorrect_loss_uncond": -3.888650894165039}, "model_output": [{"sum_logits": -7.932202339172363, "num_tokens": 1, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -12.679478645324707, "logits_per_token": -7.932202339172363, "logits_per_char": -1.3220337231953938, "num_chars": 6}, {"sum_logits": -12.940042495727539, "num_tokens": 3, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -17.06340789794922, "logits_per_token": -4.313347498575847, "logits_per_char": -1.4377824995252821, "num_chars": 9}, {"sum_logits": -7.243182182312012, "num_tokens": 1, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -11.058663368225098, "logits_per_token": -7.243182182312012, "logits_per_char": -1.0347403117588587, "num_chars": 7}, {"sum_logits": -9.27712631225586, "num_tokens": 1, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -12.072437286376953, "logits_per_token": -9.27712631225586, "logits_per_char": -1.8554252624511718, "num_chars": 5}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 385, "native_id": "7-875", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 9.63351821899414, "incorrect_loss_raw": 11.283381144205729, "correct_loss_per_char": 0.8757743835449219, "incorrect_loss_per_char": 1.4013896548558795, "correct_loss_per_token": 3.2111727396647134, "incorrect_loss_per_token": 5.915280289120144, "correct_loss_uncond": -8.951904296875, "incorrect_loss_uncond": -8.193795204162598}, "model_output": [{"sum_logits": -9.63351821899414, "num_tokens": 3, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -18.58542251586914, "logits_per_token": -3.2111727396647134, "logits_per_char": -0.8757743835449219, "num_chars": 11}, {"sum_logits": -17.920856475830078, "num_tokens": 3, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -27.7282772064209, "logits_per_token": -5.973618825276692, "logits_per_char": -1.493404706319173, "num_chars": 12}, {"sum_logits": -7.615157127380371, "num_tokens": 1, "num_tokens_all": 105, "is_greedy": false, "sum_logits_uncond": -15.004511833190918, "logits_per_token": -7.615157127380371, "logits_per_char": -1.5230314254760742, "num_chars": 5}, {"sum_logits": -8.314129829406738, "num_tokens": 2, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -15.698740005493164, "logits_per_token": -4.157064914703369, "logits_per_char": -1.1877328327723913, "num_chars": 7}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 386, "native_id": "7-1053", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 25.639205932617188, "incorrect_loss_raw": 15.018375714619955, "correct_loss_per_char": 1.0683002471923828, "incorrect_loss_per_char": 0.6399108238757605, "correct_loss_per_token": 5.127841186523438, "incorrect_loss_per_token": 3.1216573291354712, "correct_loss_uncond": -15.973838806152344, "incorrect_loss_uncond": -16.320160230000813}, "model_output": [{"sum_logits": -16.662281036376953, "num_tokens": 4, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -32.74390411376953, "logits_per_token": -4.165570259094238, "logits_per_char": -0.757376410744407, "num_chars": 22}, {"sum_logits": -25.639205932617188, "num_tokens": 5, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -41.61304473876953, "logits_per_token": -5.127841186523438, "logits_per_char": -1.0683002471923828, "num_chars": 24}, {"sum_logits": -14.017821311950684, "num_tokens": 5, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -30.495243072509766, "logits_per_token": -2.803564262390137, "logits_per_char": -0.6094704918239428, "num_chars": 23}, {"sum_logits": -14.375024795532227, "num_tokens": 6, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -30.776460647583008, "logits_per_token": -2.3958374659220376, "logits_per_char": -0.5528855690589318, "num_chars": 26}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 387, "native_id": "9-957", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 11.874079704284668, "incorrect_loss_raw": 10.306784311930338, "correct_loss_per_char": 0.7421299815177917, "incorrect_loss_per_char": 0.6558994486297801, "correct_loss_per_token": 2.968519926071167, "incorrect_loss_per_token": 3.243162049187554, "correct_loss_uncond": -3.9633588790893555, "incorrect_loss_uncond": -2.9726343154907227}, "model_output": [{"sum_logits": -12.858911514282227, "num_tokens": 6, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -16.35980224609375, "logits_per_token": -2.1431519190470376, "logits_per_char": -0.4945735197800856, "num_chars": 26}, {"sum_logits": -8.666318893432617, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -11.013801574707031, "logits_per_token": -2.888772964477539, "logits_per_char": -0.6190227781023298, "num_chars": 14}, {"sum_logits": -9.395122528076172, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -12.464652061462402, "logits_per_token": -4.697561264038086, "logits_per_char": -0.8541020480069247, "num_chars": 11}, {"sum_logits": -11.874079704284668, "num_tokens": 4, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -15.837438583374023, "logits_per_token": -2.968519926071167, "logits_per_char": -0.7421299815177917, "num_chars": 16}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 388, "native_id": "1150", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 15.847761154174805, "incorrect_loss_raw": 8.409929752349854, "correct_loss_per_char": 1.1319829395839147, "incorrect_loss_per_char": 1.0642471760726422, "correct_loss_per_token": 15.847761154174805, "incorrect_loss_per_token": 8.409929752349854, "correct_loss_uncond": -2.2983150482177734, "incorrect_loss_uncond": -4.246478875478108}, "model_output": [{"sum_logits": -7.905920028686523, "num_tokens": 1, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -11.893747329711914, "logits_per_token": -7.905920028686523, "logits_per_char": -1.3176533381144206, "num_chars": 6}, {"sum_logits": -5.778676509857178, "num_tokens": 1, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -13.867145538330078, "logits_per_token": -5.778676509857178, "logits_per_char": -0.8255252156938825, "num_chars": 7}, {"sum_logits": -15.847761154174805, "num_tokens": 1, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -18.146076202392578, "logits_per_token": -15.847761154174805, "logits_per_char": -1.1319829395839147, "num_chars": 14}, {"sum_logits": -11.54519271850586, "num_tokens": 1, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -12.208333015441895, "logits_per_token": -11.54519271850586, "logits_per_char": -1.0495629744096235, "num_chars": 11}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 389, "native_id": "8-240", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.2824809551239014, "incorrect_loss_raw": 11.335685094197592, "correct_loss_per_char": 0.32606870787484304, "incorrect_loss_per_char": 2.2953713946872285, "correct_loss_per_token": 2.2824809551239014, "incorrect_loss_per_token": 11.335685094197592, "correct_loss_uncond": -10.162786722183228, "incorrect_loss_uncond": -2.229802449544271}, "model_output": [{"sum_logits": -2.2824809551239014, "num_tokens": 1, "num_tokens_all": 118, "is_greedy": true, "sum_logits_uncond": -12.445267677307129, "logits_per_token": -2.2824809551239014, "logits_per_char": -0.32606870787484304, "num_chars": 7}, {"sum_logits": -11.367733001708984, "num_tokens": 1, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -12.923983573913574, "logits_per_token": -11.367733001708984, "logits_per_char": -2.841933250427246, "num_chars": 4}, {"sum_logits": -14.510505676269531, "num_tokens": 1, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -13.99431037902832, "logits_per_token": -14.510505676269531, "logits_per_char": -2.4184176127115884, "num_chars": 6}, {"sum_logits": -8.128816604614258, "num_tokens": 1, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -13.778168678283691, "logits_per_token": -8.128816604614258, "logits_per_char": -1.6257633209228515, "num_chars": 5}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 390, "native_id": "9-554", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 16.970930099487305, "incorrect_loss_raw": 15.211015701293945, "correct_loss_per_char": 0.34634551223443477, "incorrect_loss_per_char": 0.44492452051125325, "correct_loss_per_token": 2.121366262435913, "incorrect_loss_per_token": 2.4801642100016275, "correct_loss_uncond": -19.400121688842773, "incorrect_loss_uncond": -23.500879923502605}, "model_output": [{"sum_logits": -13.984048843383789, "num_tokens": 7, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -31.930734634399414, "logits_per_token": -1.9977212633405412, "logits_per_char": -0.4112955542171703, "num_chars": 34}, {"sum_logits": -15.522994995117188, "num_tokens": 7, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -41.9511604309082, "logits_per_token": -2.2175707135881697, "logits_per_char": -0.4195404052734375, "num_chars": 37}, {"sum_logits": -16.970930099487305, "num_tokens": 8, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -36.37105178833008, "logits_per_token": -2.121366262435913, "logits_per_char": -0.34634551223443477, "num_chars": 49}, {"sum_logits": -16.12600326538086, "num_tokens": 5, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -42.25379180908203, "logits_per_token": -3.225200653076172, "logits_per_char": -0.5039376020431519, "num_chars": 32}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 391, "native_id": "9-135", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 19.353851318359375, "incorrect_loss_raw": 18.181894302368164, "correct_loss_per_char": 0.8064104715983073, "incorrect_loss_per_char": 0.9487543720362076, "correct_loss_per_token": 3.870770263671875, "incorrect_loss_per_token": 3.980265564388699, "correct_loss_uncond": -12.991172790527344, "incorrect_loss_uncond": -8.472625096638998}, "model_output": [{"sum_logits": -20.347488403320312, "num_tokens": 6, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -26.567873001098633, "logits_per_token": -3.3912480672200522, "logits_per_char": -0.8846734088400136, "num_chars": 23}, {"sum_logits": -19.353851318359375, "num_tokens": 5, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -32.34502410888672, "logits_per_token": -3.870770263671875, "logits_per_char": -0.8064104715983073, "num_chars": 24}, {"sum_logits": -18.87714385986328, "num_tokens": 4, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -28.019363403320312, "logits_per_token": -4.71928596496582, "logits_per_char": -1.1104202270507812, "num_chars": 17}, {"sum_logits": -15.321050643920898, "num_tokens": 4, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -25.37632179260254, "logits_per_token": -3.8302626609802246, "logits_per_char": -0.8511694802178277, "num_chars": 18}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 392, "native_id": "7-1096", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.57845687866211, "incorrect_loss_raw": 8.50752337773641, "correct_loss_per_char": 1.0723071098327637, "incorrect_loss_per_char": 1.5662965854008994, "correct_loss_per_token": 8.57845687866211, "incorrect_loss_per_token": 5.250944296518962, "correct_loss_uncond": -5.839754104614258, "incorrect_loss_uncond": -5.010167916615804}, "model_output": [{"sum_logits": -7.928764343261719, "num_tokens": 2, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -13.394623756408691, "logits_per_token": -3.9643821716308594, "logits_per_char": -0.8809738159179688, "num_chars": 9}, {"sum_logits": -8.57845687866211, "num_tokens": 1, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -14.418210983276367, "logits_per_token": -8.57845687866211, "logits_per_char": -1.0723071098327637, "num_chars": 8}, {"sum_logits": -11.610710144042969, "num_tokens": 2, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -15.119565963745117, "logits_per_token": -5.805355072021484, "logits_per_char": -2.3221420288085937, "num_chars": 5}, {"sum_logits": -5.983095645904541, "num_tokens": 1, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -12.038884162902832, "logits_per_token": -5.983095645904541, "logits_per_char": -1.4957739114761353, "num_chars": 4}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 393, "native_id": "841", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.456254482269287, "incorrect_loss_raw": 6.939536412556966, "correct_loss_per_char": 0.4320318102836609, "incorrect_loss_per_char": 1.0654321216401599, "correct_loss_per_token": 3.456254482269287, "incorrect_loss_per_token": 4.8640325334337025, "correct_loss_uncond": -9.892354488372803, "incorrect_loss_uncond": -7.245494842529297}, "model_output": [{"sum_logits": -4.671501159667969, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -12.534984588623047, "logits_per_token": -4.671501159667969, "logits_per_char": -0.6673573085239956, "num_chars": 7}, {"sum_logits": -9.339767456054688, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -15.359185218811035, "logits_per_token": -3.113255818684896, "logits_per_char": -1.167470932006836, "num_chars": 8}, {"sum_logits": -3.456254482269287, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -13.34860897064209, "logits_per_token": -3.456254482269287, "logits_per_char": -0.4320318102836609, "num_chars": 8}, {"sum_logits": -6.807340621948242, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -14.660923957824707, "logits_per_token": -6.807340621948242, "logits_per_char": -1.3614681243896485, "num_chars": 5}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 394, "native_id": "7-146", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.954171657562256, "incorrect_loss_raw": 9.089541753133139, "correct_loss_per_char": 0.5412883325056597, "incorrect_loss_per_char": 0.7523906544001414, "correct_loss_per_token": 2.977085828781128, "incorrect_loss_per_token": 4.544770876566569, "correct_loss_uncond": -10.523470401763916, "incorrect_loss_uncond": -9.851105372111002}, "model_output": [{"sum_logits": -5.954171657562256, "num_tokens": 2, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -16.477642059326172, "logits_per_token": -2.977085828781128, "logits_per_char": -0.5412883325056597, "num_chars": 11}, {"sum_logits": -8.929424285888672, "num_tokens": 2, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -19.801837921142578, "logits_per_token": -4.464712142944336, "logits_per_char": -0.8117658441716974, "num_chars": 11}, {"sum_logits": -12.926258087158203, "num_tokens": 2, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -18.958728790283203, "logits_per_token": -6.463129043579102, "logits_per_char": -0.9943275451660156, "num_chars": 13}, {"sum_logits": -5.412942886352539, "num_tokens": 2, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -18.06137466430664, "logits_per_token": -2.7064714431762695, "logits_per_char": -0.4510785738627116, "num_chars": 12}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 395, "native_id": "1554", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 34.35398864746094, "incorrect_loss_raw": 26.195220947265625, "correct_loss_per_char": 0.9542774624294705, "incorrect_loss_per_char": 0.7700945449350906, "correct_loss_per_token": 4.907712663922991, "incorrect_loss_per_token": 3.757966283767942, "correct_loss_uncond": -14.377235412597656, "incorrect_loss_uncond": -14.905672709147135}, "model_output": [{"sum_logits": -32.238243103027344, "num_tokens": 8, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -40.05036926269531, "logits_per_token": -4.029780387878418, "logits_per_char": -0.7164054022894966, "num_chars": 45}, {"sum_logits": -26.16845703125, "num_tokens": 6, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -45.042476654052734, "logits_per_token": -4.361409505208333, "logits_per_char": -0.8177642822265625, "num_chars": 32}, {"sum_logits": -20.17896270751953, "num_tokens": 7, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -38.209835052490234, "logits_per_token": -2.882708958217076, "logits_per_char": -0.7761139502892127, "num_chars": 26}, {"sum_logits": -34.35398864746094, "num_tokens": 7, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -48.731224060058594, "logits_per_token": -4.907712663922991, "logits_per_char": -0.9542774624294705, "num_chars": 36}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 396, "native_id": "9-731", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.178526878356934, "incorrect_loss_raw": 12.724111874898275, "correct_loss_per_char": 0.5982105731964111, "incorrect_loss_per_char": 1.0587889989217123, "correct_loss_per_token": 7.178526878356934, "incorrect_loss_per_token": 9.078838030497232, "correct_loss_uncond": -5.977656364440918, "incorrect_loss_uncond": -1.5607481002807617}, "model_output": [{"sum_logits": -7.178526878356934, "num_tokens": 1, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -13.156183242797852, "logits_per_token": -7.178526878356934, "logits_per_char": -0.5982105731964111, "num_chars": 12}, {"sum_logits": -16.403732299804688, "num_tokens": 3, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -16.17844009399414, "logits_per_token": -5.4679107666015625, "logits_per_char": -1.0935821533203125, "num_chars": 15}, {"sum_logits": -11.420299530029297, "num_tokens": 1, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -13.692340850830078, "logits_per_token": -11.420299530029297, "logits_per_char": -1.1420299530029296, "num_chars": 10}, {"sum_logits": -10.34830379486084, "num_tokens": 1, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -12.98379898071289, "logits_per_token": -10.34830379486084, "logits_per_char": -0.9407548904418945, "num_chars": 11}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 397, "native_id": "1780", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.530045509338379, "incorrect_loss_raw": 12.841049830118815, "correct_loss_per_char": 2.3060091018676756, "incorrect_loss_per_char": 1.858362314436171, "correct_loss_per_token": 5.7650227546691895, "incorrect_loss_per_token": 8.784777535332575, "correct_loss_uncond": -0.5107488632202148, "incorrect_loss_uncond": -0.45433775583902997}, "model_output": [{"sum_logits": -11.530045509338379, "num_tokens": 2, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -12.040794372558594, "logits_per_token": -5.7650227546691895, "logits_per_char": -2.3060091018676756, "num_chars": 5}, {"sum_logits": -8.502117156982422, "num_tokens": 1, "num_tokens_all": 105, "is_greedy": false, "sum_logits_uncond": -10.957900047302246, "logits_per_token": -8.502117156982422, "logits_per_char": -1.7004234313964843, "num_chars": 5}, {"sum_logits": -18.253225326538086, "num_tokens": 3, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -18.618206024169922, "logits_per_token": -6.084408442179362, "logits_per_char": -1.5211021105448406, "num_chars": 12}, {"sum_logits": -11.767807006835938, "num_tokens": 1, "num_tokens_all": 105, "is_greedy": false, "sum_logits_uncond": -10.310056686401367, "logits_per_token": -11.767807006835938, "logits_per_char": -2.3535614013671875, "num_chars": 5}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 398, "native_id": "7-1077", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 30.212162017822266, "incorrect_loss_raw": 25.583332061767578, "correct_loss_per_char": 0.8392267227172852, "incorrect_loss_per_char": 1.0028109289653504, "correct_loss_per_token": 3.776520252227783, "incorrect_loss_per_token": 4.540194659762912, "correct_loss_uncond": -8.491912841796875, "incorrect_loss_uncond": -5.490049997965495}, "model_output": [{"sum_logits": -24.867538452148438, "num_tokens": 5, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -33.33319091796875, "logits_per_token": -4.973507690429687, "logits_per_char": -1.0811973240064539, "num_chars": 23}, {"sum_logits": -22.602794647216797, "num_tokens": 6, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -29.590648651123047, "logits_per_token": -3.7671324412027993, "logits_per_char": -0.9827302020529042, "num_chars": 23}, {"sum_logits": -29.2796630859375, "num_tokens": 6, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -30.296306610107422, "logits_per_token": -4.87994384765625, "logits_per_char": -0.9445052608366935, "num_chars": 31}, {"sum_logits": -30.212162017822266, "num_tokens": 8, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -38.70407485961914, "logits_per_token": -3.776520252227783, "logits_per_char": -0.8392267227172852, "num_chars": 36}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 399, "native_id": "8-494", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.077028274536133, "incorrect_loss_raw": 17.505640029907227, "correct_loss_per_char": 0.5830014881334806, "incorrect_loss_per_char": 0.9175604941352965, "correct_loss_per_token": 2.2154056549072267, "incorrect_loss_per_token": 3.963669220606486, "correct_loss_uncond": -7.826398849487305, "incorrect_loss_uncond": -4.610365549723308}, "model_output": [{"sum_logits": -11.016804695129395, "num_tokens": 3, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -12.768306732177734, "logits_per_token": -3.6722682317097983, "logits_per_char": -0.786914621080671, "num_chars": 14}, {"sum_logits": -15.624642372131348, "num_tokens": 4, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -22.41411018371582, "logits_per_token": -3.906160593032837, "logits_per_char": -1.0416428248087566, "num_chars": 15}, {"sum_logits": -11.077028274536133, "num_tokens": 5, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -18.903427124023438, "logits_per_token": -2.2154056549072267, "logits_per_char": -0.5830014881334806, "num_chars": 19}, {"sum_logits": -25.875473022460938, "num_tokens": 6, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -31.165599822998047, "logits_per_token": -4.312578837076823, "logits_per_char": -0.924124036516462, "num_chars": 28}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 400, "native_id": "936", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.296156883239746, "incorrect_loss_raw": 7.83101240793864, "correct_loss_per_char": 1.6592313766479492, "incorrect_loss_per_char": 1.3255129465981135, "correct_loss_per_token": 8.296156883239746, "incorrect_loss_per_token": 7.83101240793864, "correct_loss_uncond": -4.745561599731445, "incorrect_loss_uncond": -5.144240379333496}, "model_output": [{"sum_logits": -8.296156883239746, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -13.041718482971191, "logits_per_token": -8.296156883239746, "logits_per_char": -1.6592313766479492, "num_chars": 5}, {"sum_logits": -8.434117317199707, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -13.892477989196777, "logits_per_token": -8.434117317199707, "logits_per_char": -1.204873902457101, "num_chars": 7}, {"sum_logits": -7.203571319580078, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -12.778532028198242, "logits_per_token": -7.203571319580078, "logits_per_char": -1.200595219930013, "num_chars": 6}, {"sum_logits": -7.855348587036133, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -12.254748344421387, "logits_per_token": -7.855348587036133, "logits_per_char": -1.5710697174072266, "num_chars": 5}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 401, "native_id": "8-478", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 13.871116638183594, "incorrect_loss_raw": 10.232722600301107, "correct_loss_per_char": 1.0670089721679688, "incorrect_loss_per_char": 0.8264882124175585, "correct_loss_per_token": 6.935558319091797, "incorrect_loss_per_token": 3.0696005556318497, "correct_loss_uncond": -8.230968475341797, "incorrect_loss_uncond": -4.646956443786621}, "model_output": [{"sum_logits": -11.200634002685547, "num_tokens": 3, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -17.340782165527344, "logits_per_token": -3.733544667561849, "logits_per_char": -0.8000452859061105, "num_chars": 14}, {"sum_logits": -13.871116638183594, "num_tokens": 2, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -22.10208511352539, "logits_per_token": -6.935558319091797, "logits_per_char": -1.0670089721679688, "num_chars": 13}, {"sum_logits": -7.210482597351074, "num_tokens": 3, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -11.630830764770508, "logits_per_token": -2.403494199117025, "logits_per_char": -0.6554984179410067, "num_chars": 11}, {"sum_logits": -12.2870512008667, "num_tokens": 4, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -15.667424201965332, "logits_per_token": -3.071762800216675, "logits_per_char": -1.0239209334055583, "num_chars": 12}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 402, "native_id": "9-669", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 19.295230865478516, "incorrect_loss_raw": 13.563084920247396, "correct_loss_per_char": 1.4842485281137319, "incorrect_loss_per_char": 1.2047870505569327, "correct_loss_per_token": 6.431743621826172, "incorrect_loss_per_token": 6.761210123697917, "correct_loss_uncond": -0.07651519775390625, "incorrect_loss_uncond": -0.5843095779418945}, "model_output": [{"sum_logits": -10.080818176269531, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -10.917590141296387, "logits_per_token": -10.080818176269531, "logits_per_char": -1.6801363627115886, "num_chars": 6}, {"sum_logits": -10.93978500366211, "num_tokens": 3, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -11.962844848632812, "logits_per_token": -3.646595001220703, "logits_per_char": -0.8415219233586237, "num_chars": 13}, {"sum_logits": -19.295230865478516, "num_tokens": 3, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -19.371746063232422, "logits_per_token": -6.431743621826172, "logits_per_char": -1.4842485281137319, "num_chars": 13}, {"sum_logits": -19.668651580810547, "num_tokens": 3, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -19.561748504638672, "logits_per_token": -6.556217193603516, "logits_per_char": -1.092702865600586, "num_chars": 18}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 403, "native_id": "7-732", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.120334625244141, "incorrect_loss_raw": 11.41327158610026, "correct_loss_per_char": 0.3413556416829427, "incorrect_loss_per_char": 0.7925252649519178, "correct_loss_per_token": 2.5601673126220703, "incorrect_loss_per_token": 4.298412534925672, "correct_loss_uncond": -14.073789596557617, "incorrect_loss_uncond": -11.75896962483724}, "model_output": [{"sum_logits": -5.120334625244141, "num_tokens": 2, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -19.194124221801758, "logits_per_token": -2.5601673126220703, "logits_per_char": -0.3413556416829427, "num_chars": 15}, {"sum_logits": -12.786029815673828, "num_tokens": 3, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -24.516721725463867, "logits_per_token": -4.262009938557942, "logits_per_char": -0.7991268634796143, "num_chars": 16}, {"sum_logits": -8.891796112060547, "num_tokens": 2, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -20.742313385009766, "logits_per_token": -4.445898056030273, "logits_per_char": -0.7409830093383789, "num_chars": 12}, {"sum_logits": -12.561988830566406, "num_tokens": 3, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -24.257688522338867, "logits_per_token": -4.187329610188802, "logits_per_char": -0.8374659220377604, "num_chars": 15}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 404, "native_id": "7-658", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.662158012390137, "incorrect_loss_raw": 6.103533109029134, "correct_loss_per_char": 0.6662158012390137, "incorrect_loss_per_char": 1.1706795674783212, "correct_loss_per_token": 6.662158012390137, "incorrect_loss_per_token": 3.798167732026842, "correct_loss_uncond": -7.36224365234375, "incorrect_loss_uncond": -8.418620109558105}, "model_output": [{"sum_logits": -6.927548885345459, "num_tokens": 2, "num_tokens_all": 125, "is_greedy": false, "sum_logits_uncond": -14.704242706298828, "logits_per_token": -3.4637744426727295, "logits_per_char": -1.3855097770690918, "num_chars": 5}, {"sum_logits": -6.204567909240723, "num_tokens": 1, "num_tokens_all": 124, "is_greedy": false, "sum_logits_uncond": -12.748580932617188, "logits_per_token": -6.204567909240723, "logits_per_char": -1.5511419773101807, "num_chars": 4}, {"sum_logits": -5.178482532501221, "num_tokens": 3, "num_tokens_all": 126, "is_greedy": false, "sum_logits_uncond": -16.113636016845703, "logits_per_token": -1.7261608441670735, "logits_per_char": -0.5753869480556912, "num_chars": 9}, {"sum_logits": -6.662158012390137, "num_tokens": 1, "num_tokens_all": 124, "is_greedy": false, "sum_logits_uncond": -14.024401664733887, "logits_per_token": -6.662158012390137, "logits_per_char": -0.6662158012390137, "num_chars": 10}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 405, "native_id": "1003", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 20.09486198425293, "incorrect_loss_raw": 11.226511160532633, "correct_loss_per_char": 0.6929262753190666, "incorrect_loss_per_char": 1.178815170627996, "correct_loss_per_token": 6.69828732808431, "incorrect_loss_per_token": 6.298547347386678, "correct_loss_uncond": -6.353208541870117, "incorrect_loss_uncond": -5.222339789072673}, "model_output": [{"sum_logits": -5.978030681610107, "num_tokens": 2, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -14.336740493774414, "logits_per_token": -2.9890153408050537, "logits_per_char": -0.49816922346750897, "num_chars": 12}, {"sum_logits": -20.09486198425293, "num_tokens": 3, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -26.448070526123047, "logits_per_token": -6.69828732808431, "logits_per_char": -0.6929262753190666, "num_chars": 29}, {"sum_logits": -10.009188652038574, "num_tokens": 1, "num_tokens_all": 105, "is_greedy": false, "sum_logits_uncond": -12.445183753967285, "logits_per_token": -10.009188652038574, "logits_per_char": -1.4298840931483678, "num_chars": 7}, {"sum_logits": -17.69231414794922, "num_tokens": 3, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -22.56462860107422, "logits_per_token": -5.897438049316406, "logits_per_char": -1.6083921952681108, "num_chars": 11}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 406, "native_id": "8-62", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 14.059810638427734, "incorrect_loss_raw": 13.031717618306478, "correct_loss_per_char": 0.6695147923060826, "incorrect_loss_per_char": 0.7077040813587331, "correct_loss_per_token": 4.686603546142578, "incorrect_loss_per_token": 4.343905872768826, "correct_loss_uncond": -11.205678939819336, "incorrect_loss_uncond": -11.703139305114746}, "model_output": [{"sum_logits": -14.059810638427734, "num_tokens": 3, "num_tokens_all": 128, "is_greedy": false, "sum_logits_uncond": -25.26548957824707, "logits_per_token": -4.686603546142578, "logits_per_char": -0.6695147923060826, "num_chars": 21}, {"sum_logits": -17.35124969482422, "num_tokens": 3, "num_tokens_all": 128, "is_greedy": false, "sum_logits_uncond": -24.643417358398438, "logits_per_token": -5.783749898274739, "logits_per_char": -0.9639583163791232, "num_chars": 18}, {"sum_logits": -12.9525785446167, "num_tokens": 3, "num_tokens_all": 128, "is_greedy": false, "sum_logits_uncond": -25.690982818603516, "logits_per_token": -4.317526181538899, "logits_per_char": -0.7195876969231499, "num_chars": 18}, {"sum_logits": -8.791324615478516, "num_tokens": 3, "num_tokens_all": 128, "is_greedy": false, "sum_logits_uncond": -23.87017059326172, "logits_per_token": -2.9304415384928384, "logits_per_char": -0.43956623077392576, "num_chars": 20}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 407, "native_id": "7-386", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.367776870727539, "incorrect_loss_raw": 8.411949475606283, "correct_loss_per_char": 0.5367776870727539, "incorrect_loss_per_char": 1.1271044678158229, "correct_loss_per_token": 5.367776870727539, "incorrect_loss_per_token": 8.411949475606283, "correct_loss_uncond": -8.541312217712402, "incorrect_loss_uncond": -3.4541266759236655}, "model_output": [{"sum_logits": -7.587949752807617, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -11.896349906921387, "logits_per_token": -7.587949752807617, "logits_per_char": -1.2646582921346028, "num_chars": 6}, {"sum_logits": -5.367776870727539, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -13.909089088439941, "logits_per_token": -5.367776870727539, "logits_per_char": -0.5367776870727539, "num_chars": 10}, {"sum_logits": -9.895936012268066, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -11.4952392578125, "logits_per_token": -9.895936012268066, "logits_per_char": -0.8246613343556722, "num_chars": 12}, {"sum_logits": -7.751962661743164, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -12.206639289855957, "logits_per_token": -7.751962661743164, "logits_per_char": -1.291993776957194, "num_chars": 6}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 408, "native_id": "257", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 2.337188482284546, "incorrect_loss_raw": 7.063081423441569, "correct_loss_per_char": 0.5842971205711365, "incorrect_loss_per_char": 0.5767277378950735, "correct_loss_per_token": 2.337188482284546, "incorrect_loss_per_token": 3.5315407117207847, "correct_loss_uncond": -9.34023404121399, "incorrect_loss_uncond": -8.555071194966635}, "model_output": [{"sum_logits": -9.850817680358887, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -15.900577545166016, "logits_per_token": -4.925408840179443, "logits_per_char": -0.8955288800326261, "num_chars": 11}, {"sum_logits": -7.679024696350098, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -18.700294494628906, "logits_per_token": -3.839512348175049, "logits_per_char": -0.590694207411546, "num_chars": 13}, {"sum_logits": -3.6594018936157227, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -12.253585815429688, "logits_per_token": -1.8297009468078613, "logits_per_char": -0.24396012624104818, "num_chars": 15}, {"sum_logits": -2.337188482284546, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -11.677422523498535, "logits_per_token": -2.337188482284546, "logits_per_char": -0.5842971205711365, "num_chars": 4}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 409, "native_id": "147", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.446192741394043, "incorrect_loss_raw": 3.218242327372233, "correct_loss_per_char": 0.45384939511617023, "incorrect_loss_per_char": 0.5164490867544104, "correct_loss_per_token": 5.446192741394043, "incorrect_loss_per_token": 3.218242327372233, "correct_loss_uncond": -7.094365119934082, "incorrect_loss_uncond": -9.94576358795166}, "model_output": [{"sum_logits": -3.2408053874969482, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -13.917444229125977, "logits_per_token": -3.2408053874969482, "logits_per_char": -0.3600894874996609, "num_chars": 9}, {"sum_logits": -2.8057963848114014, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -13.581989288330078, "logits_per_token": -2.8057963848114014, "logits_per_char": -0.4676327308019002, "num_chars": 6}, {"sum_logits": -3.6081252098083496, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -11.992584228515625, "logits_per_token": -3.6081252098083496, "logits_per_char": -0.7216250419616699, "num_chars": 5}, {"sum_logits": -5.446192741394043, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -12.540557861328125, "logits_per_token": -5.446192741394043, "logits_per_char": -0.45384939511617023, "num_chars": 12}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 410, "native_id": "7-599", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.470169544219971, "incorrect_loss_raw": 6.308595975240071, "correct_loss_per_char": 1.094033908843994, "incorrect_loss_per_char": 0.887609146259449, "correct_loss_per_token": 5.470169544219971, "incorrect_loss_per_token": 3.9504649904039173, "correct_loss_uncond": -7.332279682159424, "incorrect_loss_uncond": -8.164274533589682}, "model_output": [{"sum_logits": -2.941865921020508, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -12.073330879211426, "logits_per_token": -2.941865921020508, "logits_per_char": -0.5883731842041016, "num_chars": 5}, {"sum_logits": -5.372332572937012, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -12.399619102478027, "logits_per_token": -5.372332572937012, "logits_per_char": -0.8953887621561686, "num_chars": 6}, {"sum_logits": -10.611589431762695, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -18.945661544799805, "logits_per_token": -3.537196477254232, "logits_per_char": -1.1790654924180772, "num_chars": 9}, {"sum_logits": -5.470169544219971, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -12.802449226379395, "logits_per_token": -5.470169544219971, "logits_per_char": -1.094033908843994, "num_chars": 5}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 411, "native_id": "8-92", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 24.892545700073242, "incorrect_loss_raw": 17.45837688446045, "correct_loss_per_char": 0.9957018280029297, "incorrect_loss_per_char": 0.6770952562685096, "correct_loss_per_token": 4.148757616678874, "incorrect_loss_per_token": 2.9461836285061307, "correct_loss_uncond": -2.791471481323242, "incorrect_loss_uncond": -3.289348284403483}, "model_output": [{"sum_logits": -16.175296783447266, "num_tokens": 7, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -19.45023536682129, "logits_per_token": -2.3107566833496094, "logits_per_char": -0.6221267993633564, "num_chars": 26}, {"sum_logits": -24.892545700073242, "num_tokens": 6, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -27.684017181396484, "logits_per_token": -4.148757616678874, "logits_per_char": -0.9957018280029297, "num_chars": 25}, {"sum_logits": -14.834656715393066, "num_tokens": 5, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -17.869892120361328, "logits_per_token": -2.9669313430786133, "logits_per_char": -0.7807714060733193, "num_chars": 19}, {"sum_logits": -21.365177154541016, "num_tokens": 6, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -24.92304801940918, "logits_per_token": -3.5608628590901694, "logits_per_char": -0.6283875633688534, "num_chars": 34}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 412, "native_id": "354", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.948695182800293, "incorrect_loss_raw": 9.073251406351725, "correct_loss_per_char": 1.706956454685756, "incorrect_loss_per_char": 1.0635571691724988, "correct_loss_per_token": 3.982898394266764, "incorrect_loss_per_token": 5.171280860900879, "correct_loss_uncond": -4.357064247131348, "incorrect_loss_uncond": -5.546873410542806}, "model_output": [{"sum_logits": -11.948695182800293, "num_tokens": 3, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -16.30575942993164, "logits_per_token": -3.982898394266764, "logits_per_char": -1.706956454685756, "num_chars": 7}, {"sum_logits": -10.521781921386719, "num_tokens": 3, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -15.097009658813477, "logits_per_token": -3.5072606404622397, "logits_per_char": -0.8768151601155599, "num_chars": 12}, {"sum_logits": -7.03708553314209, "num_tokens": 3, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -13.379186630249023, "logits_per_token": -2.34569517771403, "logits_per_char": -0.703708553314209, "num_chars": 10}, {"sum_logits": -9.660886764526367, "num_tokens": 1, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -15.384178161621094, "logits_per_token": -9.660886764526367, "logits_per_char": -1.6101477940877278, "num_chars": 6}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 413, "native_id": "9-966", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.2386884689331055, "incorrect_loss_raw": 13.180951277414957, "correct_loss_per_char": 0.8042987187703451, "incorrect_loss_per_char": 1.9060557065186678, "correct_loss_per_token": 3.6193442344665527, "incorrect_loss_per_token": 6.545227103763157, "correct_loss_uncond": -11.130040168762207, "incorrect_loss_uncond": -3.2621779441833496}, "model_output": [{"sum_logits": -18.87894058227539, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -18.286333084106445, "logits_per_token": -9.439470291137695, "logits_per_char": -3.1464900970458984, "num_chars": 6}, {"sum_logits": -7.2386884689331055, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -18.368728637695312, "logits_per_token": -3.6193442344665527, "logits_per_char": -0.8042987187703451, "num_chars": 9}, {"sum_logits": -15.701553344726562, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -21.154062271118164, "logits_per_token": -5.2338511149088545, "logits_per_char": -1.7446170383029513, "num_chars": 9}, {"sum_logits": -4.96235990524292, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -9.888992309570312, "logits_per_token": -4.96235990524292, "logits_per_char": -0.8270599842071533, "num_chars": 6}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 414, "native_id": "9-612", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.350672721862793, "incorrect_loss_raw": 6.7835283279418945, "correct_loss_per_char": 0.5350672721862793, "incorrect_loss_per_char": 1.1757879216446836, "correct_loss_per_token": 5.350672721862793, "incorrect_loss_per_token": 6.7835283279418945, "correct_loss_uncond": -7.956539154052734, "incorrect_loss_uncond": -5.324678103129069}, "model_output": [{"sum_logits": -3.9968738555908203, "num_tokens": 1, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -9.888992309570312, "logits_per_token": -3.9968738555908203, "logits_per_char": -0.6661456425984701, "num_chars": 6}, {"sum_logits": -5.350672721862793, "num_tokens": 1, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -13.307211875915527, "logits_per_token": -5.350672721862793, "logits_per_char": -0.5350672721862793, "num_chars": 10}, {"sum_logits": -7.090544700622559, "num_tokens": 1, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -14.34075927734375, "logits_per_token": -7.090544700622559, "logits_per_char": -0.5454265154325045, "num_chars": 13}, {"sum_logits": -9.263166427612305, "num_tokens": 1, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -12.094867706298828, "logits_per_token": -9.263166427612305, "logits_per_char": -2.315791606903076, "num_chars": 4}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 415, "native_id": "9-548", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.25256061553955, "incorrect_loss_raw": 11.388678550720215, "correct_loss_per_char": 1.0280622906155057, "incorrect_loss_per_char": 0.9916643240512947, "correct_loss_per_token": 9.25256061553955, "incorrect_loss_per_token": 6.050662676493327, "correct_loss_uncond": -2.10817813873291, "incorrect_loss_uncond": -2.041762351989746}, "model_output": [{"sum_logits": -9.25256061553955, "num_tokens": 1, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -11.360738754272461, "logits_per_token": -9.25256061553955, "logits_per_char": -1.0280622906155057, "num_chars": 9}, {"sum_logits": -10.144964218139648, "num_tokens": 1, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -11.796646118164062, "logits_per_token": -10.144964218139648, "logits_per_char": -1.127218246459961, "num_chars": 9}, {"sum_logits": -11.895063400268555, "num_tokens": 3, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -11.962844848632812, "logits_per_token": -3.9650211334228516, "logits_per_char": -0.915004876943735, "num_chars": 13}, {"sum_logits": -12.126008033752441, "num_tokens": 3, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -16.531831741333008, "logits_per_token": -4.0420026779174805, "logits_per_char": -0.9327698487501878, "num_chars": 13}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 416, "native_id": "9-429", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.963930130004883, "incorrect_loss_raw": 12.864819844563803, "correct_loss_per_char": 0.7117092950003487, "incorrect_loss_per_char": 0.9241294297252091, "correct_loss_per_token": 2.4909825325012207, "incorrect_loss_per_token": 3.2162049611409507, "correct_loss_uncond": -10.219423294067383, "incorrect_loss_uncond": -6.714669545491536}, "model_output": [{"sum_logits": -9.963930130004883, "num_tokens": 4, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -20.183353424072266, "logits_per_token": -2.4909825325012207, "logits_per_char": -0.7117092950003487, "num_chars": 14}, {"sum_logits": -16.134048461914062, "num_tokens": 4, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -23.20090675354004, "logits_per_token": -4.033512115478516, "logits_per_char": -1.1524320329938615, "num_chars": 14}, {"sum_logits": -10.50734806060791, "num_tokens": 4, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -16.444780349731445, "logits_per_token": -2.6268370151519775, "logits_per_char": -0.700489870707194, "num_chars": 15}, {"sum_logits": -11.953063011169434, "num_tokens": 4, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -19.09278106689453, "logits_per_token": -2.9882657527923584, "logits_per_char": -0.9194663854745718, "num_chars": 13}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 417, "native_id": "7-95", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 14.057161331176758, "incorrect_loss_raw": 16.17845280965169, "correct_loss_per_char": 0.5206356048583984, "incorrect_loss_per_char": 0.5695487501064024, "correct_loss_per_token": 3.5142903327941895, "incorrect_loss_per_token": 3.067165798611111, "correct_loss_uncond": -15.03097915649414, "incorrect_loss_uncond": -10.707595825195312}, "model_output": [{"sum_logits": -15.167228698730469, "num_tokens": 6, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -25.149396896362305, "logits_per_token": -2.5278714497884116, "logits_per_char": -0.5230078861631197, "num_chars": 29}, {"sum_logits": -11.005105972290039, "num_tokens": 5, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -23.077306747436523, "logits_per_token": -2.201021194458008, "logits_per_char": -0.44020423889160154, "num_chars": 25}, {"sum_logits": -14.057161331176758, "num_tokens": 4, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -29.0881404876709, "logits_per_token": -3.5142903327941895, "logits_per_char": -0.5206356048583984, "num_chars": 27}, {"sum_logits": -22.36302375793457, "num_tokens": 5, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -32.43144226074219, "logits_per_token": -4.472604751586914, "logits_per_char": -0.7454341252644857, "num_chars": 30}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 418, "native_id": "1560", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 31.534046173095703, "incorrect_loss_raw": 20.06795597076416, "correct_loss_per_char": 1.2613618469238281, "incorrect_loss_per_char": 0.8546810355050666, "correct_loss_per_token": 6.3068092346191404, "incorrect_loss_per_token": 3.462149052392869, "correct_loss_uncond": -6.29278564453125, "incorrect_loss_uncond": -8.484513282775879}, "model_output": [{"sum_logits": -31.534046173095703, "num_tokens": 5, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -37.82683181762695, "logits_per_token": -6.3068092346191404, "logits_per_char": -1.2613618469238281, "num_chars": 25}, {"sum_logits": -14.244709968566895, "num_tokens": 6, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -25.312694549560547, "logits_per_token": -2.3741183280944824, "logits_per_char": -0.6193352160246476, "num_chars": 23}, {"sum_logits": -25.317859649658203, "num_tokens": 5, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -30.813764572143555, "logits_per_token": -5.063571929931641, "logits_per_char": -1.1508118022571912, "num_chars": 22}, {"sum_logits": -20.641298294067383, "num_tokens": 7, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -29.530948638916016, "logits_per_token": -2.948756899152483, "logits_per_char": -0.7938960882333609, "num_chars": 26}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 419, "native_id": "9-461", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 25.504819869995117, "incorrect_loss_raw": 24.251780192057293, "correct_loss_per_char": 0.7501417608822093, "incorrect_loss_per_char": 0.6907394401028625, "correct_loss_per_token": 3.643545695713588, "incorrect_loss_per_token": 3.3638421194893975, "correct_loss_uncond": -13.435644149780273, "incorrect_loss_uncond": -12.21172841389974}, "model_output": [{"sum_logits": -28.337345123291016, "num_tokens": 7, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -30.142641067504883, "logits_per_token": -4.048192160470145, "logits_per_char": -0.8587074279785156, "num_chars": 33}, {"sum_logits": -28.40264892578125, "num_tokens": 10, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -51.198463439941406, "logits_per_token": -2.840264892578125, "logits_per_char": -0.5462047870342548, "num_chars": 52}, {"sum_logits": -25.504819869995117, "num_tokens": 7, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -38.94046401977539, "logits_per_token": -3.643545695713588, "logits_per_char": -0.7501417608822093, "num_chars": 34}, {"sum_logits": -16.01534652709961, "num_tokens": 5, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -28.049421310424805, "logits_per_token": -3.203069305419922, "logits_per_char": -0.667306105295817, "num_chars": 24}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 420, "native_id": "9-490", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 22.677236557006836, "incorrect_loss_raw": 19.204434076944988, "correct_loss_per_char": 2.061566959727894, "incorrect_loss_per_char": 1.231060330829923, "correct_loss_per_token": 11.338618278503418, "incorrect_loss_per_token": 8.617422315809462, "correct_loss_uncond": -0.018407821655273438, "incorrect_loss_uncond": -3.283137639363607}, "model_output": [{"sum_logits": -23.334047317504883, "num_tokens": 2, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -25.118450164794922, "logits_per_token": -11.667023658752441, "logits_per_char": -1.6667176655360632, "num_chars": 14}, {"sum_logits": -16.552949905395508, "num_tokens": 2, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -20.298852920532227, "logits_per_token": -8.276474952697754, "logits_per_char": -1.1823535646711076, "num_chars": 14}, {"sum_logits": -22.677236557006836, "num_tokens": 2, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -22.69564437866211, "logits_per_token": -11.338618278503418, "logits_per_char": -2.061566959727894, "num_chars": 11}, {"sum_logits": -17.72630500793457, "num_tokens": 3, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -22.045412063598633, "logits_per_token": -5.90876833597819, "logits_per_char": -0.8441097622825986, "num_chars": 21}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 421, "native_id": "9-301", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.950078964233398, "incorrect_loss_raw": 6.593791166941325, "correct_loss_per_char": 1.2375197410583496, "incorrect_loss_per_char": 1.0679216844064219, "correct_loss_per_token": 4.950078964233398, "incorrect_loss_per_token": 6.593791166941325, "correct_loss_uncond": -7.14478874206543, "incorrect_loss_uncond": -6.283803145090739}, "model_output": [{"sum_logits": -5.7716755867004395, "num_tokens": 1, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -11.676148414611816, "logits_per_token": -5.7716755867004395, "logits_per_char": -1.154335117340088, "num_chars": 5}, {"sum_logits": -8.465733528137207, "num_tokens": 1, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -14.02598762512207, "logits_per_token": -8.465733528137207, "logits_per_char": -0.9406370586819119, "num_chars": 9}, {"sum_logits": -4.950078964233398, "num_tokens": 1, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -12.094867706298828, "logits_per_token": -4.950078964233398, "logits_per_char": -1.2375197410583496, "num_chars": 4}, {"sum_logits": -5.543964385986328, "num_tokens": 1, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -12.930646896362305, "logits_per_token": -5.543964385986328, "logits_per_char": -1.1087928771972657, "num_chars": 5}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 422, "native_id": "60", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 2.3771791458129883, "incorrect_loss_raw": 4.527498881022136, "correct_loss_per_char": 0.47543582916259763, "incorrect_loss_per_char": 0.5078711028050894, "correct_loss_per_token": 2.3771791458129883, "incorrect_loss_per_token": 2.974250316619873, "correct_loss_uncond": -9.03602409362793, "incorrect_loss_uncond": -9.435848871866861}, "model_output": [{"sum_logits": -4.263005256652832, "num_tokens": 1, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -12.255573272705078, "logits_per_token": -4.263005256652832, "logits_per_char": -0.7105008761088053, "num_chars": 6}, {"sum_logits": -4.503055572509766, "num_tokens": 2, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -14.244253158569336, "logits_per_token": -2.251527786254883, "logits_per_char": -0.37525463104248047, "num_chars": 12}, {"sum_logits": -2.3771791458129883, "num_tokens": 1, "num_tokens_all": 115, "is_greedy": true, "sum_logits_uncond": -11.413203239440918, "logits_per_token": -2.3771791458129883, "logits_per_char": -0.47543582916259763, "num_chars": 5}, {"sum_logits": -4.816435813903809, "num_tokens": 2, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -15.390216827392578, "logits_per_token": -2.4082179069519043, "logits_per_char": -0.4378578012639826, "num_chars": 11}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 423, "native_id": "9-894", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 17.14361000061035, "incorrect_loss_raw": 11.172714392344156, "correct_loss_per_char": 0.9022952631900185, "incorrect_loss_per_char": 1.2179695620681301, "correct_loss_per_token": 5.714536666870117, "incorrect_loss_per_token": 5.586357196172078, "correct_loss_uncond": -8.03907585144043, "incorrect_loss_uncond": -3.4587508837381997}, "model_output": [{"sum_logits": -17.799606323242188, "num_tokens": 2, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -14.782465934753418, "logits_per_token": -8.899803161621094, "logits_per_char": -2.2249507904052734, "num_chars": 8}, {"sum_logits": -6.3228068351745605, "num_tokens": 2, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -14.577720642089844, "logits_per_token": -3.1614034175872803, "logits_per_char": -0.5748006213795055, "num_chars": 11}, {"sum_logits": -17.14361000061035, "num_tokens": 3, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -25.18268585205078, "logits_per_token": -5.714536666870117, "logits_per_char": -0.9022952631900185, "num_chars": 19}, {"sum_logits": -9.395730018615723, "num_tokens": 2, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -14.534209251403809, "logits_per_token": -4.697865009307861, "logits_per_char": -0.8541572744196112, "num_chars": 11}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 424, "native_id": "9-895", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 25.169227600097656, "incorrect_loss_raw": 21.87718137105306, "correct_loss_per_char": 0.8389742533365886, "incorrect_loss_per_char": 0.5936385992878206, "correct_loss_per_token": 4.194871266682942, "incorrect_loss_per_token": 3.1770229248773485, "correct_loss_uncond": -5.4720458984375, "incorrect_loss_uncond": -13.064743677775065}, "model_output": [{"sum_logits": -25.169227600097656, "num_tokens": 6, "num_tokens_all": 123, "is_greedy": false, "sum_logits_uncond": -30.641273498535156, "logits_per_token": -4.194871266682942, "logits_per_char": -0.8389742533365886, "num_chars": 30}, {"sum_logits": -13.66569709777832, "num_tokens": 7, "num_tokens_all": 124, "is_greedy": false, "sum_logits_uncond": -20.483421325683594, "logits_per_token": -1.95224244253976, "logits_per_char": -0.471230934406149, "num_chars": 29}, {"sum_logits": -23.822416305541992, "num_tokens": 5, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -37.73855209350586, "logits_per_token": -4.764483261108398, "logits_per_char": -0.7684650421142578, "num_chars": 31}, {"sum_logits": -28.143430709838867, "num_tokens": 10, "num_tokens_all": 127, "is_greedy": false, "sum_logits_uncond": -46.60380172729492, "logits_per_token": -2.814343070983887, "logits_per_char": -0.5412198213430551, "num_chars": 52}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 425, "native_id": "9-281", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 22.115379333496094, "incorrect_loss_raw": 17.188055356343586, "correct_loss_per_char": 0.5143111472906068, "incorrect_loss_per_char": 0.8015758901644459, "correct_loss_per_token": 2.457264370388455, "incorrect_loss_per_token": 3.282216223459395, "correct_loss_uncond": -18.501548767089844, "incorrect_loss_uncond": -9.118959108988443}, "model_output": [{"sum_logits": -22.115379333496094, "num_tokens": 9, "num_tokens_all": 125, "is_greedy": false, "sum_logits_uncond": -40.61692810058594, "logits_per_token": -2.457264370388455, "logits_per_char": -0.5143111472906068, "num_chars": 43}, {"sum_logits": -18.479310989379883, "num_tokens": 7, "num_tokens_all": 123, "is_greedy": false, "sum_logits_uncond": -33.161258697509766, "logits_per_token": -2.639901569911412, "logits_per_char": -0.659975392477853, "num_chars": 28}, {"sum_logits": -13.021780967712402, "num_tokens": 3, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -11.962844848632812, "logits_per_token": -4.340593655904134, "logits_per_char": -1.0016754590548003, "num_chars": 13}, {"sum_logits": -20.063074111938477, "num_tokens": 7, "num_tokens_all": 123, "is_greedy": false, "sum_logits_uncond": -33.796939849853516, "logits_per_token": -2.8661534445626393, "logits_per_char": -0.7430768189606843, "num_chars": 27}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 426, "native_id": "202", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.328516483306885, "incorrect_loss_raw": 4.212932745615642, "correct_loss_per_char": 0.484410589391535, "incorrect_loss_per_char": 0.6018475350879487, "correct_loss_per_token": 1.776172161102295, "incorrect_loss_per_token": 2.106466372807821, "correct_loss_uncond": -10.19263219833374, "incorrect_loss_uncond": -10.338188012441}, "model_output": [{"sum_logits": -2.6398186683654785, "num_tokens": 2, "num_tokens_all": 114, "is_greedy": true, "sum_logits_uncond": -13.204900741577148, "logits_per_token": -1.3199093341827393, "logits_per_char": -0.3771169526236398, "num_chars": 7}, {"sum_logits": -4.383758068084717, "num_tokens": 2, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -14.314716339111328, "logits_per_token": -2.1918790340423584, "logits_per_char": -0.6262511525835309, "num_chars": 7}, {"sum_logits": -5.328516483306885, "num_tokens": 3, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -15.521148681640625, "logits_per_token": -1.776172161102295, "logits_per_char": -0.484410589391535, "num_chars": 11}, {"sum_logits": -5.6152215003967285, "num_tokens": 2, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -16.133745193481445, "logits_per_token": -2.8076107501983643, "logits_per_char": -0.8021745000566755, "num_chars": 7}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 427, "native_id": "1937", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 10.448793411254883, "incorrect_loss_raw": 10.841425895690918, "correct_loss_per_char": 0.5804885228474935, "incorrect_loss_per_char": 0.7223553723759122, "correct_loss_per_token": 5.224396705627441, "incorrect_loss_per_token": 5.420712947845459, "correct_loss_uncond": -6.360250473022461, "incorrect_loss_uncond": -5.307496388753255}, "model_output": [{"sum_logits": -11.864290237426758, "num_tokens": 2, "num_tokens_all": 105, "is_greedy": false, "sum_logits_uncond": -15.088715553283691, "logits_per_token": -5.932145118713379, "logits_per_char": -0.6591272354125977, "num_chars": 18}, {"sum_logits": -10.258883476257324, "num_tokens": 2, "num_tokens_all": 105, "is_greedy": false, "sum_logits_uncond": -17.30459213256836, "logits_per_token": -5.129441738128662, "logits_per_char": -0.6411802172660828, "num_chars": 16}, {"sum_logits": -10.448793411254883, "num_tokens": 2, "num_tokens_all": 105, "is_greedy": false, "sum_logits_uncond": -16.809043884277344, "logits_per_token": -5.224396705627441, "logits_per_char": -0.5804885228474935, "num_chars": 18}, {"sum_logits": -10.401103973388672, "num_tokens": 2, "num_tokens_all": 105, "is_greedy": false, "sum_logits_uncond": -16.05345916748047, "logits_per_token": -5.200551986694336, "logits_per_char": -0.866758664449056, "num_chars": 12}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 428, "native_id": "620", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 18.85127067565918, "incorrect_loss_raw": 17.65267054239909, "correct_loss_per_char": 0.8196204641590947, "incorrect_loss_per_char": 1.125798852459278, "correct_loss_per_token": 9.42563533782959, "incorrect_loss_per_token": 7.451349099477132, "correct_loss_uncond": -5.406835556030273, "incorrect_loss_uncond": -5.322598775227864}, "model_output": [{"sum_logits": -18.85127067565918, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -24.258106231689453, "logits_per_token": -9.42563533782959, "logits_per_char": -0.8196204641590947, "num_chars": 23}, {"sum_logits": -16.93313980102539, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -21.13713264465332, "logits_per_token": -8.466569900512695, "logits_per_char": -1.128875986735026, "num_chars": 15}, {"sum_logits": -16.499834060668945, "num_tokens": 4, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -25.252328872680664, "logits_per_token": -4.124958515167236, "logits_per_char": -1.0999889373779297, "num_chars": 15}, {"sum_logits": -19.52503776550293, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -22.536346435546875, "logits_per_token": -9.762518882751465, "logits_per_char": -1.1485316332648783, "num_chars": 17}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 429, "native_id": "8-142", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 13.90188980102539, "incorrect_loss_raw": 17.343372344970703, "correct_loss_per_char": 0.60442999134893, "incorrect_loss_per_char": 0.7224071521114036, "correct_loss_per_token": 2.780377960205078, "incorrect_loss_per_token": 3.668761889139811, "correct_loss_uncond": -14.043144226074219, "incorrect_loss_uncond": -12.15646489461263}, "model_output": [{"sum_logits": -14.742158889770508, "num_tokens": 6, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -27.338417053222656, "logits_per_token": -2.457026481628418, "logits_per_char": -0.5460058848063151, "num_chars": 27}, {"sum_logits": -15.454607009887695, "num_tokens": 5, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -27.9930419921875, "logits_per_token": -3.090921401977539, "logits_per_char": -0.6719394352125085, "num_chars": 23}, {"sum_logits": -13.90188980102539, "num_tokens": 5, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -27.94503402709961, "logits_per_token": -2.780377960205078, "logits_per_char": -0.60442999134893, "num_chars": 23}, {"sum_logits": -21.833351135253906, "num_tokens": 4, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -33.168052673339844, "logits_per_token": -5.458337783813477, "logits_per_char": -0.9492761363153872, "num_chars": 23}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 430, "native_id": "7-1138", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 4.850200653076172, "incorrect_loss_raw": 8.733151912689209, "correct_loss_per_char": 0.40418338775634766, "incorrect_loss_per_char": 0.659837618852273, "correct_loss_per_token": 2.425100326538086, "incorrect_loss_per_token": 4.3665759563446045, "correct_loss_uncond": -13.061456680297852, "incorrect_loss_uncond": -12.398488203684488}, "model_output": [{"sum_logits": -4.850200653076172, "num_tokens": 2, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -17.911657333374023, "logits_per_token": -2.425100326538086, "logits_per_char": -0.40418338775634766, "num_chars": 12}, {"sum_logits": -13.896846771240234, "num_tokens": 2, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -24.476028442382812, "logits_per_token": -6.948423385620117, "logits_per_char": -0.9926319122314453, "num_chars": 14}, {"sum_logits": -6.32211971282959, "num_tokens": 2, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -21.208402633666992, "logits_per_token": -3.161059856414795, "logits_per_char": -0.5268433094024658, "num_chars": 12}, {"sum_logits": -5.980489253997803, "num_tokens": 2, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -17.71048927307129, "logits_per_token": -2.9902446269989014, "logits_per_char": -0.4600376349229079, "num_chars": 13}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 431, "native_id": "8-471", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 16.8850154876709, "incorrect_loss_raw": 16.159574190775555, "correct_loss_per_char": 1.2060725348336356, "incorrect_loss_per_char": 0.8643927254238261, "correct_loss_per_token": 5.6283384958903, "incorrect_loss_per_token": 4.269480387369792, "correct_loss_uncond": -8.77218246459961, "incorrect_loss_uncond": -7.668875694274902}, "model_output": [{"sum_logits": -13.25062084197998, "num_tokens": 3, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -19.44281578063965, "logits_per_token": -4.416873613993327, "logits_per_char": -0.8833747227986654, "num_chars": 15}, {"sum_logits": -16.8850154876709, "num_tokens": 3, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -25.657197952270508, "logits_per_token": -5.6283384958903, "logits_per_char": -1.2060725348336356, "num_chars": 14}, {"sum_logits": -20.10679817199707, "num_tokens": 6, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -27.351722717285156, "logits_per_token": -3.3511330286661782, "logits_per_char": -0.9139453714544122, "num_chars": 22}, {"sum_logits": -15.12130355834961, "num_tokens": 3, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -24.690811157226562, "logits_per_token": -5.04043451944987, "logits_per_char": -0.7958580820184005, "num_chars": 19}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 432, "native_id": "9-433", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.668076515197754, "incorrect_loss_raw": 4.843150774637858, "correct_loss_per_char": 1.2226921717325847, "incorrect_loss_per_char": 1.6143835915459528, "correct_loss_per_token": 3.668076515197754, "incorrect_loss_per_token": 4.843150774637858, "correct_loss_uncond": -3.5500264167785645, "incorrect_loss_uncond": -2.9286484718322754}, "model_output": [{"sum_logits": -4.747279167175293, "num_tokens": 1, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -7.790660381317139, "logits_per_token": -4.747279167175293, "logits_per_char": -1.582426389058431, "num_chars": 3}, {"sum_logits": -3.668076515197754, "num_tokens": 1, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -7.218102931976318, "logits_per_token": -3.668076515197754, "logits_per_char": -1.2226921717325847, "num_chars": 3}, {"sum_logits": -5.012920379638672, "num_tokens": 1, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -7.781537055969238, "logits_per_token": -5.012920379638672, "logits_per_char": -1.6709734598795574, "num_chars": 3}, {"sum_logits": -4.769252777099609, "num_tokens": 1, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -7.743200302124023, "logits_per_token": -4.769252777099609, "logits_per_char": -1.5897509256998699, "num_chars": 3}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 433, "native_id": "1458", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.664822578430176, "incorrect_loss_raw": 11.668264071146647, "correct_loss_per_char": 0.6903444698878697, "incorrect_loss_per_char": 1.296473785682961, "correct_loss_per_token": 3.221607526143392, "incorrect_loss_per_token": 5.834132035573323, "correct_loss_uncond": -8.5847806930542, "incorrect_loss_uncond": -5.347899754842122}, "model_output": [{"sum_logits": -10.696920394897461, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -17.947681427001953, "logits_per_token": -5.3484601974487305, "logits_per_char": -1.1885467105441623, "num_chars": 9}, {"sum_logits": -12.538045883178711, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -17.16919708251953, "logits_per_token": -6.2690229415893555, "logits_per_char": -1.393116209242079, "num_chars": 9}, {"sum_logits": -9.664822578430176, "num_tokens": 3, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -18.249603271484375, "logits_per_token": -3.221607526143392, "logits_per_char": -0.6903444698878697, "num_chars": 14}, {"sum_logits": -11.76982593536377, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -15.931612968444824, "logits_per_token": -5.884912967681885, "logits_per_char": -1.307758437262641, "num_chars": 9}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 434, "native_id": "57", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.802509784698486, "incorrect_loss_raw": 5.95886246363322, "correct_loss_per_char": 0.9670849641164144, "incorrect_loss_per_char": 0.9877367907100254, "correct_loss_per_token": 2.901254892349243, "incorrect_loss_per_token": 4.162896394729614, "correct_loss_uncond": -8.840443134307861, "incorrect_loss_uncond": -7.732573986053467}, "model_output": [{"sum_logits": -4.705862998962402, "num_tokens": 2, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -14.297945976257324, "logits_per_token": -2.352931499481201, "logits_per_char": -0.784310499827067, "num_chars": 6}, {"sum_logits": -5.802509784698486, "num_tokens": 2, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -14.642952919006348, "logits_per_token": -2.901254892349243, "logits_per_char": -0.9670849641164144, "num_chars": 6}, {"sum_logits": -7.100790977478027, "num_tokens": 1, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -12.954693794250488, "logits_per_token": -7.100790977478027, "logits_per_char": -1.4201581954956055, "num_chars": 5}, {"sum_logits": -6.0699334144592285, "num_tokens": 2, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -13.821669578552246, "logits_per_token": -3.0349667072296143, "logits_per_char": -0.7587416768074036, "num_chars": 8}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 435, "native_id": "605", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.070114135742188, "incorrect_loss_raw": 12.172212918599447, "correct_loss_per_char": 0.9070114135742188, "incorrect_loss_per_char": 1.1875816345214842, "correct_loss_per_token": 9.070114135742188, "incorrect_loss_per_token": 6.230979177686904, "correct_loss_uncond": -6.738315582275391, "incorrect_loss_uncond": -5.378546714782715}, "model_output": [{"sum_logits": -14.045459747314453, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -17.982410430908203, "logits_per_token": -4.681819915771484, "logits_per_char": -1.4045459747314453, "num_chars": 10}, {"sum_logits": -12.690092086791992, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -18.034587860107422, "logits_per_token": -4.230030695597331, "logits_per_char": -1.2690092086791993, "num_chars": 10}, {"sum_logits": -9.070114135742188, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -15.808429718017578, "logits_per_token": -9.070114135742188, "logits_per_char": -0.9070114135742188, "num_chars": 10}, {"sum_logits": -9.781086921691895, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -16.63528060913086, "logits_per_token": -9.781086921691895, "logits_per_char": -0.8891897201538086, "num_chars": 11}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 436, "native_id": "9-889", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.845184326171875, "incorrect_loss_raw": 5.852357069651286, "correct_loss_per_char": 1.6408640543619792, "incorrect_loss_per_char": 0.9188547840824834, "correct_loss_per_token": 4.9225921630859375, "incorrect_loss_per_token": 5.017602761586507, "correct_loss_uncond": -6.847278594970703, "incorrect_loss_uncond": -6.226560115814209}, "model_output": [{"sum_logits": -9.845184326171875, "num_tokens": 2, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -16.692462921142578, "logits_per_token": -4.9225921630859375, "logits_per_char": -1.6408640543619792, "num_chars": 6}, {"sum_logits": -9.289427757263184, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -12.136067390441895, "logits_per_token": -9.289427757263184, "logits_per_char": -1.548237959543864, "num_chars": 6}, {"sum_logits": -3.259117603302002, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -10.193432807922363, "logits_per_token": -3.259117603302002, "logits_per_char": -0.6518235206604004, "num_chars": 5}, {"sum_logits": -5.008525848388672, "num_tokens": 2, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -13.907251358032227, "logits_per_token": -2.504262924194336, "logits_per_char": -0.5565028720431857, "num_chars": 9}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 437, "native_id": "1890", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 23.877262115478516, "incorrect_loss_raw": 26.99257532755534, "correct_loss_per_char": 0.6822074890136719, "incorrect_loss_per_char": 0.7782791882935315, "correct_loss_per_token": 3.979543685913086, "incorrect_loss_per_token": 4.210501322670589, "correct_loss_uncond": -20.289875030517578, "incorrect_loss_uncond": -12.841906229654947}, "model_output": [{"sum_logits": -23.877262115478516, "num_tokens": 6, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -44.167137145996094, "logits_per_token": -3.979543685913086, "logits_per_char": -0.6822074890136719, "num_chars": 35}, {"sum_logits": -26.800865173339844, "num_tokens": 6, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -42.113800048828125, "logits_per_token": -4.466810862223308, "logits_per_char": -0.8933621724446614, "num_chars": 30}, {"sum_logits": -17.855945587158203, "num_tokens": 6, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -33.54606246948242, "logits_per_token": -2.9759909311930337, "logits_per_char": -0.5101698739188057, "num_chars": 35}, {"sum_logits": -36.32091522216797, "num_tokens": 7, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -43.84358215332031, "logits_per_token": -5.1887021745954245, "logits_per_char": -0.9313055185171274, "num_chars": 39}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 438, "native_id": "9-618", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.978707313537598, "incorrect_loss_raw": 9.95160961151123, "correct_loss_per_char": 1.4964512189229329, "incorrect_loss_per_char": 1.8522734853956433, "correct_loss_per_token": 2.9929024378458657, "incorrect_loss_per_token": 9.95160961151123, "correct_loss_uncond": -5.921374320983887, "incorrect_loss_uncond": -3.1620308558146157}, "model_output": [{"sum_logits": -8.978707313537598, "num_tokens": 3, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -14.900081634521484, "logits_per_token": -2.9929024378458657, "logits_per_char": -1.4964512189229329, "num_chars": 6}, {"sum_logits": -10.12934684753418, "num_tokens": 1, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -13.200215339660645, "logits_per_token": -10.12934684753418, "logits_per_char": -2.025869369506836, "num_chars": 5}, {"sum_logits": -12.424359321594238, "num_tokens": 1, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -12.089152336120605, "logits_per_token": -12.424359321594238, "logits_per_char": -2.0707265535990396, "num_chars": 6}, {"sum_logits": -7.301122665405273, "num_tokens": 1, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -14.051553726196289, "logits_per_token": -7.301122665405273, "logits_per_char": -1.4602245330810546, "num_chars": 5}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 439, "native_id": "9-523", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 15.728514671325684, "incorrect_loss_raw": 11.992698987325033, "correct_loss_per_char": 1.5728514671325684, "incorrect_loss_per_char": 1.0942813598748409, "correct_loss_per_token": 5.242838223775228, "incorrect_loss_per_token": 8.20105563269721, "correct_loss_uncond": -1.0040102005004883, "incorrect_loss_uncond": -3.4424575169881186}, "model_output": [{"sum_logits": -15.728514671325684, "num_tokens": 3, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -16.732524871826172, "logits_per_token": -5.242838223775228, "logits_per_char": -1.5728514671325684, "num_chars": 10}, {"sum_logits": -9.59117603302002, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -14.062372207641602, "logits_per_token": -9.59117603302002, "logits_per_char": -0.799264669418335, "num_chars": 12}, {"sum_logits": -17.062395095825195, "num_tokens": 3, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -17.478199005126953, "logits_per_token": -5.6874650319417315, "logits_per_char": -1.5511268268931995, "num_chars": 11}, {"sum_logits": -9.324525833129883, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -14.764898300170898, "logits_per_token": -9.324525833129883, "logits_per_char": -0.9324525833129883, "num_chars": 10}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 440, "native_id": "1126", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 26.270248413085938, "incorrect_loss_raw": 26.43760363260905, "correct_loss_per_char": 0.796068133729877, "incorrect_loss_per_char": 0.6124965488714315, "correct_loss_per_token": 3.752892630440848, "incorrect_loss_per_token": 3.173743020920526, "correct_loss_uncond": -5.885948181152344, "incorrect_loss_uncond": -12.878413518269857}, "model_output": [{"sum_logits": -37.66260528564453, "num_tokens": 12, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -47.477210998535156, "logits_per_token": -3.1385504404703775, "logits_per_char": -0.7106151940687647, "num_chars": 53}, {"sum_logits": -18.171268463134766, "num_tokens": 6, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -34.421142578125, "logits_per_token": -3.0285447438557944, "logits_per_char": -0.5678521394729614, "num_chars": 32}, {"sum_logits": -26.270248413085938, "num_tokens": 7, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -32.15619659423828, "logits_per_token": -3.752892630440848, "logits_per_char": -0.796068133729877, "num_chars": 33}, {"sum_logits": -23.47893714904785, "num_tokens": 7, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -36.04969787597656, "logits_per_token": -3.3541338784354076, "logits_per_char": -0.5590223130725679, "num_chars": 42}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 441, "native_id": "644", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.686372756958008, "incorrect_loss_raw": 9.251002470652262, "correct_loss_per_char": 0.5372745513916015, "incorrect_loss_per_char": 1.0416449934545189, "correct_loss_per_token": 2.686372756958008, "incorrect_loss_per_token": 8.009224812189737, "correct_loss_uncond": -9.03702449798584, "incorrect_loss_uncond": -4.756229241689046}, "model_output": [{"sum_logits": -7.4506659507751465, "num_tokens": 2, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -16.407073974609375, "logits_per_token": -3.7253329753875732, "logits_per_char": -0.6208888292312622, "num_chars": 12}, {"sum_logits": -11.61174201965332, "num_tokens": 1, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -13.407981872558594, "logits_per_token": -11.61174201965332, "logits_per_char": -1.0556129108775745, "num_chars": 11}, {"sum_logits": -2.686372756958008, "num_tokens": 1, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -11.723397254943848, "logits_per_token": -2.686372756958008, "logits_per_char": -0.5372745513916015, "num_chars": 5}, {"sum_logits": -8.69059944152832, "num_tokens": 1, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -12.206639289855957, "logits_per_token": -8.69059944152832, "logits_per_char": -1.44843324025472, "num_chars": 6}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 442, "native_id": "8-365", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.8038333058357239, "incorrect_loss_raw": 6.792736053466797, "correct_loss_per_char": 0.10047916322946548, "incorrect_loss_per_char": 1.1912602795494927, "correct_loss_per_token": 0.8038333058357239, "incorrect_loss_per_token": 6.792736053466797, "correct_loss_uncond": -12.966298758983612, "incorrect_loss_uncond": -6.286952972412109}, "model_output": [{"sum_logits": -0.8038333058357239, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": true, "sum_logits_uncond": -13.770132064819336, "logits_per_token": -0.8038333058357239, "logits_per_char": -0.10047916322946548, "num_chars": 8}, {"sum_logits": -5.322384357452393, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.224363327026367, "logits_per_token": -5.322384357452393, "logits_per_char": -1.0644768714904784, "num_chars": 5}, {"sum_logits": -8.365925788879395, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.583134651184082, "logits_per_token": -8.365925788879395, "logits_per_char": -1.3943209648132324, "num_chars": 6}, {"sum_logits": -6.6898980140686035, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.43156909942627, "logits_per_token": -6.6898980140686035, "logits_per_char": -1.1149830023447673, "num_chars": 6}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 443, "native_id": "9-727", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.0493745803833, "incorrect_loss_raw": 8.473947842915853, "correct_loss_per_char": 2.0082290967305503, "incorrect_loss_per_char": 1.6839745044708252, "correct_loss_per_token": 12.0493745803833, "incorrect_loss_per_token": 8.473947842915853, "correct_loss_uncond": -2.1088571548461914, "incorrect_loss_uncond": -2.5654239654541016}, "model_output": [{"sum_logits": -7.818404197692871, "num_tokens": 1, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -9.888992309570312, "logits_per_token": -7.818404197692871, "logits_per_char": -1.3030673662821453, "num_chars": 6}, {"sum_logits": -7.824044227600098, "num_tokens": 1, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -12.05653190612793, "logits_per_token": -7.824044227600098, "logits_per_char": -1.3040073712666829, "num_chars": 6}, {"sum_logits": -9.77939510345459, "num_tokens": 1, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -11.172591209411621, "logits_per_token": -9.77939510345459, "logits_per_char": -2.4448487758636475, "num_chars": 4}, {"sum_logits": -12.0493745803833, "num_tokens": 1, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -14.158231735229492, "logits_per_token": -12.0493745803833, "logits_per_char": -2.0082290967305503, "num_chars": 6}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 444, "native_id": "7-461", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.960892677307129, "incorrect_loss_raw": 6.233323891957601, "correct_loss_per_char": 1.3268154462178547, "incorrect_loss_per_char": 0.9154842967078799, "correct_loss_per_token": 7.960892677307129, "incorrect_loss_per_token": 5.11361034711202, "correct_loss_uncond": -6.020400047302246, "incorrect_loss_uncond": -7.426305611928304}, "model_output": [{"sum_logits": -6.718281269073486, "num_tokens": 2, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -14.388297080993652, "logits_per_token": -3.359140634536743, "logits_per_char": -0.7464756965637207, "num_chars": 9}, {"sum_logits": -5.045374870300293, "num_tokens": 1, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -13.276829719543457, "logits_per_token": -5.045374870300293, "logits_per_char": -1.0090749740600586, "num_chars": 5}, {"sum_logits": -6.936315536499023, "num_tokens": 1, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -13.313761711120605, "logits_per_token": -6.936315536499023, "logits_per_char": -0.9909022194998605, "num_chars": 7}, {"sum_logits": -7.960892677307129, "num_tokens": 1, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -13.981292724609375, "logits_per_token": -7.960892677307129, "logits_per_char": -1.3268154462178547, "num_chars": 6}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 445, "native_id": "9-1071", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 26.946300506591797, "incorrect_loss_raw": 21.009504318237305, "correct_loss_per_char": 0.5857891414476477, "incorrect_loss_per_char": 0.8693465064659242, "correct_loss_per_token": 2.9940333896213107, "incorrect_loss_per_token": 3.9335668881734214, "correct_loss_uncond": -12.21908187866211, "incorrect_loss_uncond": -11.407222747802734}, "model_output": [{"sum_logits": -15.551382064819336, "num_tokens": 4, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -27.908153533935547, "logits_per_token": -3.887845516204834, "logits_per_char": -0.7068810029463335, "num_chars": 22}, {"sum_logits": -26.946300506591797, "num_tokens": 9, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -39.165382385253906, "logits_per_token": -2.9940333896213107, "logits_per_char": -0.5857891414476477, "num_chars": 46}, {"sum_logits": -21.002716064453125, "num_tokens": 6, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -31.721269607543945, "logits_per_token": -3.500452677408854, "logits_per_char": -0.7500970023018974, "num_chars": 28}, {"sum_logits": -26.474414825439453, "num_tokens": 6, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -37.620758056640625, "logits_per_token": -4.412402470906575, "logits_per_char": -1.1510615141495415, "num_chars": 23}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 446, "native_id": "1918", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.583868026733398, "incorrect_loss_raw": 12.041041374206543, "correct_loss_per_char": 0.5992318107968285, "incorrect_loss_per_char": 0.5832214794115289, "correct_loss_per_token": 3.1459670066833496, "incorrect_loss_per_token": 3.5750226444668236, "correct_loss_uncond": -10.693449020385742, "incorrect_loss_uncond": -10.210593859354654}, "model_output": [{"sum_logits": -12.01701545715332, "num_tokens": 3, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -21.58344268798828, "logits_per_token": -4.0056718190511065, "logits_per_char": -0.5224789329197096, "num_chars": 23}, {"sum_logits": -12.583868026733398, "num_tokens": 4, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -23.27731704711914, "logits_per_token": -3.1459670066833496, "logits_per_char": -0.5992318107968285, "num_chars": 21}, {"sum_logits": -15.791681289672852, "num_tokens": 4, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -25.80904769897461, "logits_per_token": -3.947920322418213, "logits_per_char": -0.7895840644836426, "num_chars": 20}, {"sum_logits": -8.314427375793457, "num_tokens": 3, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -19.362415313720703, "logits_per_token": -2.7714757919311523, "logits_per_char": -0.4376014408312346, "num_chars": 19}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 447, "native_id": "1038", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 2.0982370376586914, "incorrect_loss_raw": 5.2323104540507, "correct_loss_per_char": 0.2997481482369559, "incorrect_loss_per_char": 1.0790631430489677, "correct_loss_per_token": 2.0982370376586914, "incorrect_loss_per_token": 5.2323104540507, "correct_loss_uncond": -9.783473014831543, "incorrect_loss_uncond": -8.449045022328695}, "model_output": [{"sum_logits": -6.166280746459961, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -13.029799461364746, "logits_per_token": -6.166280746459961, "logits_per_char": -1.5415701866149902, "num_chars": 4}, {"sum_logits": -3.6839404106140137, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -13.724745750427246, "logits_per_token": -3.6839404106140137, "logits_per_char": -0.5262772015162877, "num_chars": 7}, {"sum_logits": -5.846710205078125, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -14.289521217346191, "logits_per_token": -5.846710205078125, "logits_per_char": -1.169342041015625, "num_chars": 5}, {"sum_logits": -2.0982370376586914, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": true, "sum_logits_uncond": -11.881710052490234, "logits_per_token": -2.0982370376586914, "logits_per_char": -0.2997481482369559, "num_chars": 7}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 448, "native_id": "9-197", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.819766998291016, "incorrect_loss_raw": 6.667024930318196, "correct_loss_per_char": 1.704941749572754, "incorrect_loss_per_char": 1.4402270476023356, "correct_loss_per_token": 2.2732556660970054, "incorrect_loss_per_token": 6.667024930318196, "correct_loss_uncond": -3.3256263732910156, "incorrect_loss_uncond": -5.5516401926676435}, "model_output": [{"sum_logits": -5.871405601501465, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -11.43185043334961, "logits_per_token": -5.871405601501465, "logits_per_char": -1.174281120300293, "num_chars": 5}, {"sum_logits": -6.409323692321777, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -11.172591209411621, "logits_per_token": -6.409323692321777, "logits_per_char": -1.6023309230804443, "num_chars": 4}, {"sum_logits": -7.720345497131348, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -14.051553726196289, "logits_per_token": -7.720345497131348, "logits_per_char": -1.5440690994262696, "num_chars": 5}, {"sum_logits": -6.819766998291016, "num_tokens": 3, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -10.145393371582031, "logits_per_token": -2.2732556660970054, "logits_per_char": -1.704941749572754, "num_chars": 4}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 449, "native_id": "1393", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 20.33651351928711, "incorrect_loss_raw": 15.19573942820231, "correct_loss_per_char": 0.4960125248606612, "incorrect_loss_per_char": 0.5647008712780088, "correct_loss_per_token": 2.5420641899108887, "incorrect_loss_per_token": 2.6367232221545596, "correct_loss_uncond": -13.543136596679688, "incorrect_loss_uncond": -10.821590741475424}, "model_output": [{"sum_logits": -21.915990829467773, "num_tokens": 11, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -34.284942626953125, "logits_per_token": -1.9923628026788884, "logits_per_char": -0.44726511896873006, "num_chars": 49}, {"sum_logits": -20.33651351928711, "num_tokens": 8, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -33.8796501159668, "logits_per_token": -2.5420641899108887, "logits_per_char": -0.4960125248606612, "num_chars": 41}, {"sum_logits": -15.074159622192383, "num_tokens": 4, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -23.806941986083984, "logits_per_token": -3.7685399055480957, "logits_per_char": -0.8374533123440213, "num_chars": 18}, {"sum_logits": -8.597067832946777, "num_tokens": 4, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -19.960105895996094, "logits_per_token": -2.1492669582366943, "logits_per_char": -0.4093841825212751, "num_chars": 21}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 450, "native_id": "7-244", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.091732978820801, "incorrect_loss_raw": 4.556088765462239, "correct_loss_per_char": 0.8702475684029716, "incorrect_loss_per_char": 0.6021864251776056, "correct_loss_per_token": 6.091732978820801, "incorrect_loss_per_token": 3.513247807820638, "correct_loss_uncond": -7.462684631347656, "incorrect_loss_uncond": -9.735442797342936}, "model_output": [{"sum_logits": -6.091732978820801, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -13.554417610168457, "logits_per_token": -6.091732978820801, "logits_per_char": -0.8702475684029716, "num_chars": 7}, {"sum_logits": -6.257045745849609, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -17.64288330078125, "logits_per_token": -3.1285228729248047, "logits_per_char": -0.48131121121920073, "num_chars": 13}, {"sum_logits": -4.663789749145508, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -11.61186408996582, "logits_per_token": -4.663789749145508, "logits_per_char": -0.9327579498291015, "num_chars": 5}, {"sum_logits": -2.7474308013916016, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -13.619847297668457, "logits_per_token": -2.7474308013916016, "logits_per_char": -0.3924901144845145, "num_chars": 7}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 451, "native_id": "9-916", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.980222225189209, "incorrect_loss_raw": 9.007275104522705, "correct_loss_per_char": 0.7254747477444735, "incorrect_loss_per_char": 0.8915211235806023, "correct_loss_per_token": 3.9901111125946045, "incorrect_loss_per_token": 7.2906928062438965, "correct_loss_uncond": -7.481892108917236, "incorrect_loss_uncond": -6.092351754506429}, "model_output": [{"sum_logits": -11.023171424865723, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.296618461608887, "logits_per_token": -11.023171424865723, "logits_per_char": -1.002106493169611, "num_chars": 11}, {"sum_logits": -7.980222225189209, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.462114334106445, "logits_per_token": -3.9901111125946045, "logits_per_char": -0.7254747477444735, "num_chars": 11}, {"sum_logits": -5.699160099029541, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.920623779296875, "logits_per_token": -5.699160099029541, "logits_per_char": -0.8141657284327916, "num_chars": 7}, {"sum_logits": -10.299493789672852, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.08163833618164, "logits_per_token": -5.149746894836426, "logits_per_char": -0.8582911491394043, "num_chars": 12}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 452, "native_id": "9-1046", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.136640548706055, "incorrect_loss_raw": 9.727484226226807, "correct_loss_per_char": 1.5227734247843425, "incorrect_loss_per_char": 0.8319111413571424, "correct_loss_per_token": 9.136640548706055, "incorrect_loss_per_token": 4.863742113113403, "correct_loss_uncond": -3.952045440673828, "incorrect_loss_uncond": -7.277331829071045}, "model_output": [{"sum_logits": -9.136640548706055, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -13.088685989379883, "logits_per_token": -9.136640548706055, "logits_per_char": -1.5227734247843425, "num_chars": 6}, {"sum_logits": -12.205708503723145, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -15.992271423339844, "logits_per_token": -6.102854251861572, "logits_per_char": -1.2205708503723145, "num_chars": 10}, {"sum_logits": -11.381914138793945, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -18.669355392456055, "logits_per_token": -5.690957069396973, "logits_per_char": -0.8755318568303034, "num_chars": 13}, {"sum_logits": -5.59483003616333, "num_tokens": 2, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -16.352821350097656, "logits_per_token": -2.797415018081665, "logits_per_char": -0.3996307168688093, "num_chars": 14}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 453, "native_id": "167", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 2.7141547203063965, "incorrect_loss_raw": 5.029609839121501, "correct_loss_per_char": 0.33926934003829956, "incorrect_loss_per_char": 0.458368718624115, "correct_loss_per_token": 2.7141547203063965, "incorrect_loss_per_token": 2.092426432503594, "correct_loss_uncond": -7.425917148590088, "incorrect_loss_uncond": -10.025995095570883}, "model_output": [{"sum_logits": -2.7141547203063965, "num_tokens": 1, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -10.140071868896484, "logits_per_token": -2.7141547203063965, "logits_per_char": -0.33926934003829956, "num_chars": 8}, {"sum_logits": -2.579164981842041, "num_tokens": 2, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -12.77659797668457, "logits_per_token": -1.2895824909210205, "logits_per_char": -0.2865738868713379, "num_chars": 9}, {"sum_logits": -7.602812767028809, "num_tokens": 3, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -16.49249839782715, "logits_per_token": -2.534270922342936, "logits_per_char": -0.47517579793930054, "num_chars": 16}, {"sum_logits": -4.906851768493652, "num_tokens": 2, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -15.89771842956543, "logits_per_token": -2.453425884246826, "logits_per_char": -0.6133564710617065, "num_chars": 8}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 454, "native_id": "9-566", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 12.761706352233887, "incorrect_loss_raw": 9.79445711771647, "correct_loss_per_char": 0.7976066470146179, "incorrect_loss_per_char": 1.2922326017309118, "correct_loss_per_token": 4.253902117411296, "incorrect_loss_per_token": 6.738466686672634, "correct_loss_uncond": -14.656085014343262, "incorrect_loss_uncond": -5.310516357421875}, "model_output": [{"sum_logits": -9.474618911743164, "num_tokens": 1, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -13.69546127319336, "logits_per_token": -9.474618911743164, "logits_per_char": -1.579103151957194, "num_chars": 6}, {"sum_logits": -12.761706352233887, "num_tokens": 3, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -27.41779136657715, "logits_per_token": -4.253902117411296, "logits_per_char": -0.7976066470146179, "num_chars": 16}, {"sum_logits": -6.156795501708984, "num_tokens": 1, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -13.572891235351562, "logits_per_token": -6.156795501708984, "logits_per_char": -0.769599437713623, "num_chars": 8}, {"sum_logits": -13.751956939697266, "num_tokens": 3, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -18.046567916870117, "logits_per_token": -4.583985646565755, "logits_per_char": -1.5279952155219183, "num_chars": 9}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 455, "native_id": "8-28", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 20.144882202148438, "incorrect_loss_raw": 21.798490524291992, "correct_loss_per_char": 0.8758644435716711, "incorrect_loss_per_char": 0.811847061688116, "correct_loss_per_token": 2.23832024468316, "incorrect_loss_per_token": 2.8007334497239853, "correct_loss_uncond": -14.053672790527344, "incorrect_loss_uncond": -9.786972681681315}, "model_output": [{"sum_logits": -18.426197052001953, "num_tokens": 8, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -25.19159698486328, "logits_per_token": -2.303274631500244, "logits_per_char": -0.8375544114546343, "num_chars": 22}, {"sum_logits": -21.02997398376465, "num_tokens": 6, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -32.108943939208984, "logits_per_token": -3.504995663960775, "logits_per_char": -0.637271938901959, "num_chars": 33}, {"sum_logits": -25.939300537109375, "num_tokens": 10, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -37.455848693847656, "logits_per_token": -2.5939300537109373, "logits_per_char": -0.9607148347077547, "num_chars": 27}, {"sum_logits": -20.144882202148438, "num_tokens": 9, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -34.19855499267578, "logits_per_token": -2.23832024468316, "logits_per_char": -0.8758644435716711, "num_chars": 23}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 456, "native_id": "7-179", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 22.79092788696289, "incorrect_loss_raw": 12.444730917612711, "correct_loss_per_char": 2.279092788696289, "incorrect_loss_per_char": 1.4316562675294424, "correct_loss_per_token": 11.395463943481445, "incorrect_loss_per_token": 9.143587907155355, "correct_loss_uncond": -3.7499923706054688, "incorrect_loss_uncond": -4.49162753423055}, "model_output": [{"sum_logits": -10.618829727172852, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -13.438478469848633, "logits_per_token": -10.618829727172852, "logits_per_char": -1.3273537158966064, "num_chars": 8}, {"sum_logits": -22.79092788696289, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -26.54092025756836, "logits_per_token": -11.395463943481445, "logits_per_char": -2.279092788696289, "num_chars": 10}, {"sum_logits": -6.908504962921143, "num_tokens": 1, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -13.36446475982666, "logits_per_token": -6.908504962921143, "logits_per_char": -0.9869292804173061, "num_chars": 7}, {"sum_logits": -19.80685806274414, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -24.006132125854492, "logits_per_token": -9.90342903137207, "logits_per_char": -1.9806858062744142, "num_chars": 10}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 457, "native_id": "389", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.8483171463012695, "incorrect_loss_raw": 7.0191270510355634, "correct_loss_per_char": 0.8560396432876587, "incorrect_loss_per_char": 0.6373760998898342, "correct_loss_per_token": 6.8483171463012695, "incorrect_loss_per_token": 4.144886785083346, "correct_loss_uncond": -5.415348052978516, "incorrect_loss_uncond": -9.15380080540975}, "model_output": [{"sum_logits": -7.172863960266113, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -15.897981643676758, "logits_per_token": -7.172863960266113, "logits_per_char": -0.8966079950332642, "num_chars": 8}, {"sum_logits": -6.8483171463012695, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -12.263665199279785, "logits_per_token": -6.8483171463012695, "logits_per_char": -0.8560396432876587, "num_chars": 8}, {"sum_logits": -10.082773208618164, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -19.080965042114258, "logits_per_token": -3.360924402872721, "logits_per_char": -0.5931043063893038, "num_chars": 17}, {"sum_logits": -3.801743984222412, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": true, "sum_logits_uncond": -13.539836883544922, "logits_per_token": -1.900871992111206, "logits_per_char": -0.42241599824693465, "num_chars": 9}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 458, "native_id": "1528", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 30.491802215576172, "incorrect_loss_raw": 20.790396372477215, "correct_loss_per_char": 0.8968177122228286, "incorrect_loss_per_char": 0.7385038702694873, "correct_loss_per_token": 3.8114752769470215, "incorrect_loss_per_token": 4.158079274495442, "correct_loss_uncond": -18.647964477539062, "incorrect_loss_uncond": -14.023576100667318}, "model_output": [{"sum_logits": -17.310670852661133, "num_tokens": 5, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -35.15325164794922, "logits_per_token": -3.4621341705322264, "logits_per_char": -0.5969196845745218, "num_chars": 29}, {"sum_logits": -18.458194732666016, "num_tokens": 5, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -35.95160675048828, "logits_per_token": -3.691638946533203, "logits_per_char": -0.5954256365376134, "num_chars": 31}, {"sum_logits": -30.491802215576172, "num_tokens": 8, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -49.139766693115234, "logits_per_token": -3.8114752769470215, "logits_per_char": -0.8968177122228286, "num_chars": 34}, {"sum_logits": -26.602323532104492, "num_tokens": 5, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -33.337059020996094, "logits_per_token": -5.320464706420898, "logits_per_char": -1.0231662896963267, "num_chars": 26}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 459, "native_id": "1457", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 14.447668075561523, "incorrect_loss_raw": 14.08199946085612, "correct_loss_per_char": 0.9631778717041015, "incorrect_loss_per_char": 0.9204275647799175, "correct_loss_per_token": 7.223834037780762, "incorrect_loss_per_token": 4.693999820285374, "correct_loss_uncond": -15.817319869995117, "incorrect_loss_uncond": -10.682743708292643}, "model_output": [{"sum_logits": -11.564297676086426, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -25.075349807739258, "logits_per_token": -3.8547658920288086, "logits_per_char": -0.7709531784057617, "num_chars": 15}, {"sum_logits": -13.228127479553223, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -21.367687225341797, "logits_per_token": -4.409375826517741, "logits_per_char": -0.8267579674720764, "num_chars": 16}, {"sum_logits": -17.45357322692871, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -27.851192474365234, "logits_per_token": -5.81785774230957, "logits_per_char": -1.163571548461914, "num_chars": 15}, {"sum_logits": -14.447668075561523, "num_tokens": 2, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -30.26498794555664, "logits_per_token": -7.223834037780762, "logits_per_char": -0.9631778717041015, "num_chars": 15}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 460, "native_id": "1208", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 31.025341033935547, "incorrect_loss_raw": 26.58730951944987, "correct_loss_per_char": 0.6601136390199053, "incorrect_loss_per_char": 0.5988428127592976, "correct_loss_per_token": 3.1025341033935545, "incorrect_loss_per_token": 2.903257680317712, "correct_loss_uncond": -9.129024505615234, "incorrect_loss_uncond": -11.430435180664062}, "model_output": [{"sum_logits": -18.074146270751953, "num_tokens": 7, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -33.208595275878906, "logits_per_token": -2.5820208958217075, "logits_per_char": -0.47563542817768295, "num_chars": 38}, {"sum_logits": -31.025341033935547, "num_tokens": 10, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -40.15436553955078, "logits_per_token": -3.1025341033935545, "logits_per_char": -0.6601136390199053, "num_chars": 47}, {"sum_logits": -37.99729537963867, "num_tokens": 12, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -49.15525817871094, "logits_per_token": -3.166441281636556, "logits_per_char": -0.6440219555870962, "num_chars": 59}, {"sum_logits": -23.690486907958984, "num_tokens": 8, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -31.689380645751953, "logits_per_token": -2.961310863494873, "logits_per_char": -0.6768710545131138, "num_chars": 35}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 461, "native_id": "1170", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 14.662236213684082, "incorrect_loss_raw": 12.708013852437338, "correct_loss_per_char": 0.8145686785380045, "incorrect_loss_per_char": 1.0601415316263834, "correct_loss_per_token": 4.887412071228027, "incorrect_loss_per_token": 4.9922075271606445, "correct_loss_uncond": -14.94161319732666, "incorrect_loss_uncond": -5.984230359395345}, "model_output": [{"sum_logits": -13.611652374267578, "num_tokens": 2, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -18.204715728759766, "logits_per_token": -6.805826187133789, "logits_per_char": -1.2374229431152344, "num_chars": 11}, {"sum_logits": -10.959395408630371, "num_tokens": 3, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -19.78826332092285, "logits_per_token": -3.6531318028767905, "logits_per_char": -1.095939540863037, "num_chars": 10}, {"sum_logits": -14.662236213684082, "num_tokens": 3, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -29.603849411010742, "logits_per_token": -4.887412071228027, "logits_per_char": -0.8145686785380045, "num_chars": 18}, {"sum_logits": -13.552993774414062, "num_tokens": 3, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -18.08375358581543, "logits_per_token": -4.5176645914713545, "logits_per_char": -0.8470621109008789, "num_chars": 16}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 462, "native_id": "8-409", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.251807689666748, "incorrect_loss_raw": 6.287270545959473, "correct_loss_per_char": 0.8503615379333496, "incorrect_loss_per_char": 0.9791537178887261, "correct_loss_per_token": 4.251807689666748, "incorrect_loss_per_token": 4.855969111124675, "correct_loss_uncond": -7.075429439544678, "incorrect_loss_uncond": -7.899599075317383}, "model_output": [{"sum_logits": -6.440856456756592, "num_tokens": 3, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -18.773113250732422, "logits_per_token": -2.1469521522521973, "logits_per_char": -0.715650717417399, "num_chars": 9}, {"sum_logits": -4.549537181854248, "num_tokens": 1, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -12.63060188293457, "logits_per_token": -4.549537181854248, "logits_per_char": -0.9099074363708496, "num_chars": 5}, {"sum_logits": -4.251807689666748, "num_tokens": 1, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -11.327237129211426, "logits_per_token": -4.251807689666748, "logits_per_char": -0.8503615379333496, "num_chars": 5}, {"sum_logits": -7.871417999267578, "num_tokens": 1, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -11.156893730163574, "logits_per_token": -7.871417999267578, "logits_per_char": -1.3119029998779297, "num_chars": 6}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 463, "native_id": "8-307", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.459428787231445, "incorrect_loss_raw": 14.138320287068685, "correct_loss_per_char": 0.8306285858154296, "incorrect_loss_per_char": 0.69682828090978, "correct_loss_per_token": 3.1148571968078613, "incorrect_loss_per_token": 3.295010471343994, "correct_loss_uncond": -5.857484817504883, "incorrect_loss_uncond": -6.817038218180339}, "model_output": [{"sum_logits": -12.459428787231445, "num_tokens": 4, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -18.316913604736328, "logits_per_token": -3.1148571968078613, "logits_per_char": -0.8306285858154296, "num_chars": 15}, {"sum_logits": -13.060989379882812, "num_tokens": 4, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -21.609588623046875, "logits_per_token": -3.265247344970703, "logits_per_char": -0.6530494689941406, "num_chars": 20}, {"sum_logits": -14.374176025390625, "num_tokens": 5, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -20.903480529785156, "logits_per_token": -2.874835205078125, "logits_per_char": -0.7565355802837171, "num_chars": 19}, {"sum_logits": -14.979795455932617, "num_tokens": 4, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -20.35300636291504, "logits_per_token": -3.7449488639831543, "logits_per_char": -0.6808997934514825, "num_chars": 22}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 464, "native_id": "1948", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 15.982942581176758, "incorrect_loss_raw": 20.586339950561523, "correct_loss_per_char": 0.6659559408823649, "incorrect_loss_per_char": 0.5741119124977749, "correct_loss_per_token": 3.9957356452941895, "incorrect_loss_per_token": 3.846569297427223, "correct_loss_uncond": -15.45466423034668, "incorrect_loss_uncond": -13.055971145629883}, "model_output": [{"sum_logits": -28.316041946411133, "num_tokens": 5, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -39.23938751220703, "logits_per_token": -5.663208389282227, "logits_per_char": -0.7260523576002854, "num_chars": 39}, {"sum_logits": -14.211681365966797, "num_tokens": 7, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -28.87985610961914, "logits_per_token": -2.0302401951381137, "logits_per_char": -0.43065701108990295, "num_chars": 33}, {"sum_logits": -15.982942581176758, "num_tokens": 4, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -31.437606811523438, "logits_per_token": -3.9957356452941895, "logits_per_char": -0.6659559408823649, "num_chars": 24}, {"sum_logits": -19.23129653930664, "num_tokens": 5, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -32.80768966674805, "logits_per_token": -3.8462593078613283, "logits_per_char": -0.5656263688031364, "num_chars": 34}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 465, "native_id": "661", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 21.202823638916016, "incorrect_loss_raw": 18.777671178181965, "correct_loss_per_char": 0.8154932168813852, "incorrect_loss_per_char": 0.7579685885496814, "correct_loss_per_token": 4.2405647277832035, "incorrect_loss_per_token": 3.4905570560031465, "correct_loss_uncond": -7.365407943725586, "incorrect_loss_uncond": -6.291809717814128}, "model_output": [{"sum_logits": -16.062780380249023, "num_tokens": 5, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -22.855445861816406, "logits_per_token": -3.212556076049805, "logits_per_char": -0.7301263809204102, "num_chars": 22}, {"sum_logits": -16.422286987304688, "num_tokens": 5, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -22.435760498046875, "logits_per_token": -3.2844573974609377, "logits_per_char": -0.8211143493652344, "num_chars": 20}, {"sum_logits": -21.202823638916016, "num_tokens": 5, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -28.5682315826416, "logits_per_token": -4.2405647277832035, "logits_per_char": -0.8154932168813852, "num_chars": 26}, {"sum_logits": -23.847946166992188, "num_tokens": 6, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -29.917236328125, "logits_per_token": -3.9746576944986978, "logits_per_char": -0.7226650353633997, "num_chars": 33}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 466, "native_id": "7-435", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 8.462491989135742, "incorrect_loss_raw": 8.940129915873209, "correct_loss_per_char": 1.4104153315226238, "incorrect_loss_per_char": 1.7880259831746417, "correct_loss_per_token": 8.462491989135742, "incorrect_loss_per_token": 8.940129915873209, "correct_loss_uncond": -3.3965911865234375, "incorrect_loss_uncond": -3.432218551635742}, "model_output": [{"sum_logits": -9.48131275177002, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -12.591198921203613, "logits_per_token": -9.48131275177002, "logits_per_char": -1.8962625503540038, "num_chars": 5}, {"sum_logits": -8.502117156982422, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -11.723397254943848, "logits_per_token": -8.502117156982422, "logits_per_char": -1.7004234313964843, "num_chars": 5}, {"sum_logits": -8.462491989135742, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -11.85908317565918, "logits_per_token": -8.462491989135742, "logits_per_char": -1.4104153315226238, "num_chars": 6}, {"sum_logits": -8.836959838867188, "num_tokens": 1, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -12.802449226379395, "logits_per_token": -8.836959838867188, "logits_per_char": -1.7673919677734375, "num_chars": 5}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 467, "native_id": "8-332", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 19.76005744934082, "incorrect_loss_raw": 18.58354727427165, "correct_loss_per_char": 0.5645730699811663, "incorrect_loss_per_char": 0.7911436110043555, "correct_loss_per_token": 2.8228653499058316, "incorrect_loss_per_token": 3.4290015054127525, "correct_loss_uncond": -9.993131637573242, "incorrect_loss_uncond": -2.8264392217000327}, "model_output": [{"sum_logits": -27.207603454589844, "num_tokens": 7, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -28.73984718322754, "logits_per_token": -3.8868004935128346, "logits_per_char": -1.046446286714994, "num_chars": 26}, {"sum_logits": -9.858185768127441, "num_tokens": 3, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -12.768306732177734, "logits_per_token": -3.286061922709147, "logits_per_char": -0.7041561262948173, "num_chars": 14}, {"sum_logits": -19.76005744934082, "num_tokens": 7, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -29.753189086914062, "logits_per_token": -2.8228653499058316, "logits_per_char": -0.5645730699811663, "num_chars": 35}, {"sum_logits": -18.684852600097656, "num_tokens": 6, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -22.721805572509766, "logits_per_token": -3.114142100016276, "logits_per_char": -0.6228284200032552, "num_chars": 30}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 468, "native_id": "948", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 46.65703201293945, "incorrect_loss_raw": 32.13925298055013, "correct_loss_per_char": 0.7907971527616856, "incorrect_loss_per_char": 0.9567320472892673, "correct_loss_per_token": 3.3326451437813893, "incorrect_loss_per_token": 5.514806729271299, "correct_loss_uncond": -21.18551254272461, "incorrect_loss_uncond": -7.417198181152344}, "model_output": [{"sum_logits": -23.94304847717285, "num_tokens": 5, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -35.179046630859375, "logits_per_token": -4.7886096954345705, "logits_per_char": -0.8256223612818224, "num_chars": 29}, {"sum_logits": -46.65703201293945, "num_tokens": 14, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -67.84254455566406, "logits_per_token": -3.3326451437813893, "logits_per_char": -0.7907971527616856, "num_chars": 59}, {"sum_logits": -24.539907455444336, "num_tokens": 5, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -28.73440933227539, "logits_per_token": -4.907981491088867, "logits_per_char": -0.8462037053601495, "num_chars": 29}, {"sum_logits": -47.9348030090332, "num_tokens": 7, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -54.755897521972656, "logits_per_token": -6.8478290012904575, "logits_per_char": -1.1983700752258302, "num_chars": 40}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 469, "native_id": "381", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.111050605773926, "incorrect_loss_raw": 7.410775820414226, "correct_loss_per_char": 0.6851751009623209, "incorrect_loss_per_char": 1.1775148300897507, "correct_loss_per_token": 4.111050605773926, "incorrect_loss_per_token": 7.410775820414226, "correct_loss_uncond": -9.345404624938965, "incorrect_loss_uncond": -5.837993303934733}, "model_output": [{"sum_logits": -4.111050605773926, "num_tokens": 1, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -13.45645523071289, "logits_per_token": -4.111050605773926, "logits_per_char": -0.6851751009623209, "num_chars": 6}, {"sum_logits": -8.18262004852295, "num_tokens": 1, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -13.17258358001709, "logits_per_token": -8.18262004852295, "logits_per_char": -1.6365240097045899, "num_chars": 5}, {"sum_logits": -6.220512390136719, "num_tokens": 1, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -13.715497970581055, "logits_per_token": -6.220512390136719, "logits_per_char": -0.7775640487670898, "num_chars": 8}, {"sum_logits": -7.829195022583008, "num_tokens": 1, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -12.85822582244873, "logits_per_token": -7.829195022583008, "logits_per_char": -1.1184564317975725, "num_chars": 7}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 470, "native_id": "9-759", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 39.5379753112793, "incorrect_loss_raw": 23.026091893513996, "correct_loss_per_char": 0.9643408612507146, "incorrect_loss_per_char": 0.8951670338195047, "correct_loss_per_token": 3.95379753112793, "incorrect_loss_per_token": 4.895467599232991, "correct_loss_uncond": -9.684085845947266, "incorrect_loss_uncond": -0.5952618916829427}, "model_output": [{"sum_logits": -26.20009994506836, "num_tokens": 5, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -24.98406982421875, "logits_per_token": -5.240019989013672, "logits_per_char": -0.9703740720395688, "num_chars": 27}, {"sum_logits": -39.5379753112793, "num_tokens": 10, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -49.22206115722656, "logits_per_token": -3.95379753112793, "logits_per_char": -0.9643408612507146, "num_chars": 41}, {"sum_logits": -17.414953231811523, "num_tokens": 4, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -22.593231201171875, "logits_per_token": -4.353738307952881, "logits_per_char": -0.6965981292724609, "num_chars": 25}, {"sum_logits": -25.46322250366211, "num_tokens": 5, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -23.286760330200195, "logits_per_token": -5.092644500732422, "logits_per_char": -1.0185289001464843, "num_chars": 25}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 471, "native_id": "8-350", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 25.06204605102539, "incorrect_loss_raw": 17.811789512634277, "correct_loss_per_char": 0.7594559409401633, "incorrect_loss_per_char": 0.7919572294068654, "correct_loss_per_token": 4.177007675170898, "incorrect_loss_per_token": 3.4341953065660267, "correct_loss_uncond": -15.75030517578125, "incorrect_loss_uncond": -9.947365125020346}, "model_output": [{"sum_logits": -16.343612670898438, "num_tokens": 4, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -30.991506576538086, "logits_per_token": -4.085903167724609, "logits_per_char": -0.860190140573602, "num_chars": 19}, {"sum_logits": -25.06204605102539, "num_tokens": 6, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -40.81235122680664, "logits_per_token": -4.177007675170898, "logits_per_char": -0.7594559409401633, "num_chars": 33}, {"sum_logits": -27.662561416625977, "num_tokens": 9, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -40.32311248779297, "logits_per_token": -3.073617935180664, "logits_per_char": -0.790358897617885, "num_chars": 35}, {"sum_logits": -9.429194450378418, "num_tokens": 3, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -11.962844848632812, "logits_per_token": -3.143064816792806, "logits_per_char": -0.7253226500291091, "num_chars": 13}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 472, "native_id": "7-727", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 22.610015869140625, "incorrect_loss_raw": 22.923960367838543, "correct_loss_per_char": 1.884167989095052, "incorrect_loss_per_char": 1.7181711914192916, "correct_loss_per_token": 5.652503967285156, "incorrect_loss_per_token": 5.730990091959636, "correct_loss_uncond": 2.9198684692382812, "incorrect_loss_uncond": 3.901984532674154}, "model_output": [{"sum_logits": -24.54949188232422, "num_tokens": 4, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -18.49604606628418, "logits_per_token": -6.137372970581055, "logits_per_char": -1.636632792154948, "num_chars": 15}, {"sum_logits": -18.120731353759766, "num_tokens": 4, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -17.57349967956543, "logits_per_token": -4.530182838439941, "logits_per_char": -1.5100609461466472, "num_chars": 12}, {"sum_logits": -22.610015869140625, "num_tokens": 4, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -19.690147399902344, "logits_per_token": -5.652503967285156, "logits_per_char": -1.884167989095052, "num_chars": 12}, {"sum_logits": -26.10165786743164, "num_tokens": 4, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -20.996381759643555, "logits_per_token": -6.52541446685791, "logits_per_char": -2.00781983595628, "num_chars": 13}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 473, "native_id": "850", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 30.118751525878906, "incorrect_loss_raw": 29.476699193318684, "correct_loss_per_char": 0.7925987243652344, "incorrect_loss_per_char": 0.8024721960736136, "correct_loss_per_token": 3.7648439407348633, "incorrect_loss_per_token": 3.3484506730909467, "correct_loss_uncond": -9.575416564941406, "incorrect_loss_uncond": -11.075564702351889}, "model_output": [{"sum_logits": -18.19716453552246, "num_tokens": 7, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -34.641265869140625, "logits_per_token": -2.599594933646066, "logits_per_char": -0.606572151184082, "num_chars": 30}, {"sum_logits": -30.118751525878906, "num_tokens": 8, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -39.69416809082031, "logits_per_token": -3.7648439407348633, "logits_per_char": -0.7925987243652344, "num_chars": 38}, {"sum_logits": -49.80974197387695, "num_tokens": 11, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -55.884193420410156, "logits_per_token": -4.528158361261541, "logits_per_char": -1.27717287112505, "num_chars": 39}, {"sum_logits": -20.42319107055664, "num_tokens": 7, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -31.131332397460938, "logits_per_token": -2.9175987243652344, "logits_per_char": -0.5236715659117087, "num_chars": 39}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 474, "native_id": "970", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 16.788562774658203, "incorrect_loss_raw": 20.937023798624676, "correct_loss_per_char": 0.729937511941661, "incorrect_loss_per_char": 0.9322135689282658, "correct_loss_per_token": 3.3577125549316404, "incorrect_loss_per_token": 4.016074646843804, "correct_loss_uncond": -11.575738906860352, "incorrect_loss_uncond": -6.942281723022461}, "model_output": [{"sum_logits": -26.487258911132812, "num_tokens": 5, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -29.736907958984375, "logits_per_token": -5.297451782226562, "logits_per_char": -1.2039663141424006, "num_chars": 22}, {"sum_logits": -20.904102325439453, "num_tokens": 5, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -30.59855079650879, "logits_per_token": -4.180820465087891, "logits_per_char": -0.950186469338157, "num_chars": 22}, {"sum_logits": -15.419710159301758, "num_tokens": 6, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -23.302457809448242, "logits_per_token": -2.5699516932169595, "logits_per_char": -0.6424879233042399, "num_chars": 24}, {"sum_logits": -16.788562774658203, "num_tokens": 5, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -28.364301681518555, "logits_per_token": -3.3577125549316404, "logits_per_char": -0.729937511941661, "num_chars": 23}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 475, "native_id": "7-381", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 14.246471405029297, "incorrect_loss_raw": 7.568424224853516, "correct_loss_per_char": 0.8380277297076058, "incorrect_loss_per_char": 0.6513351422769053, "correct_loss_per_token": 4.748823801676433, "incorrect_loss_per_token": 3.784212112426758, "correct_loss_uncond": -3.0290184020996094, "incorrect_loss_uncond": -7.626125653584798}, "model_output": [{"sum_logits": -6.6776814460754395, "num_tokens": 2, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -12.77659797668457, "logits_per_token": -3.3388407230377197, "logits_per_char": -0.7419646051194932, "num_chars": 9}, {"sum_logits": -8.61208438873291, "num_tokens": 2, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -15.665518760681152, "logits_per_token": -4.306042194366455, "logits_per_char": -0.7176736990610758, "num_chars": 12}, {"sum_logits": -7.415506839752197, "num_tokens": 2, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -17.14153289794922, "logits_per_token": -3.7077534198760986, "logits_per_char": -0.4943671226501465, "num_chars": 15}, {"sum_logits": -14.246471405029297, "num_tokens": 3, "num_tokens_all": 107, "is_greedy": false, "sum_logits_uncond": -17.275489807128906, "logits_per_token": -4.748823801676433, "logits_per_char": -0.8380277297076058, "num_chars": 17}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 476, "native_id": "9-436", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.623066902160645, "incorrect_loss_raw": 11.074879010518393, "correct_loss_per_char": 0.5660627589506262, "incorrect_loss_per_char": 1.2553203178174568, "correct_loss_per_token": 4.811533451080322, "incorrect_loss_per_token": 7.326537450154622, "correct_loss_uncond": -12.751812934875488, "incorrect_loss_uncond": -4.371532758076985}, "model_output": [{"sum_logits": -8.934675216674805, "num_tokens": 2, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -16.44430160522461, "logits_per_token": -4.467337608337402, "logits_per_char": -0.7445562680562338, "num_chars": 12}, {"sum_logits": -10.734587669372559, "num_tokens": 1, "num_tokens_all": 105, "is_greedy": false, "sum_logits_uncond": -12.071405410766602, "logits_per_token": -10.734587669372559, "logits_per_char": -1.7890979448954265, "num_chars": 6}, {"sum_logits": -9.623066902160645, "num_tokens": 2, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -22.374879837036133, "logits_per_token": -4.811533451080322, "logits_per_char": -0.5660627589506262, "num_chars": 17}, {"sum_logits": -13.555374145507812, "num_tokens": 2, "num_tokens_all": 106, "is_greedy": false, "sum_logits_uncond": -17.823528289794922, "logits_per_token": -6.777687072753906, "logits_per_char": -1.2323067405007102, "num_chars": 11}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 477, "native_id": "9-411", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.500463485717773, "incorrect_loss_raw": 8.890159606933594, "correct_loss_per_char": 0.382380205042222, "incorrect_loss_per_char": 0.4828832780859664, "correct_loss_per_token": 2.1668211619059243, "incorrect_loss_per_token": 2.9633865356445312, "correct_loss_uncond": -14.310571670532227, "incorrect_loss_uncond": -9.628011067708334}, "model_output": [{"sum_logits": -6.881736755371094, "num_tokens": 3, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -17.976978302001953, "logits_per_token": -2.2939122517903647, "logits_per_char": -0.36219667133532074, "num_chars": 19}, {"sum_logits": -8.793588638305664, "num_tokens": 3, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -17.08498764038086, "logits_per_token": -2.9311962127685547, "logits_per_char": -0.4396794319152832, "num_chars": 20}, {"sum_logits": -6.500463485717773, "num_tokens": 3, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -20.81103515625, "logits_per_token": -2.1668211619059243, "logits_per_char": -0.382380205042222, "num_chars": 17}, {"sum_logits": -10.995153427124023, "num_tokens": 3, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -20.49254608154297, "logits_per_token": -3.6650511423746743, "logits_per_char": -0.6467737310072955, "num_chars": 17}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 478, "native_id": "9-692", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.047417640686035, "incorrect_loss_raw": 8.740948994954428, "correct_loss_per_char": 1.0047417640686036, "incorrect_loss_per_char": 0.9314778096748121, "correct_loss_per_token": 3.3491392135620117, "incorrect_loss_per_token": 5.946631113688151, "correct_loss_uncond": -4.787118911743164, "incorrect_loss_uncond": -4.600516319274902}, "model_output": [{"sum_logits": -7.070223808288574, "num_tokens": 2, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -14.971256256103516, "logits_per_token": -3.535111904144287, "logits_per_char": -0.642747618935325, "num_chars": 11}, {"sum_logits": -10.047417640686035, "num_tokens": 3, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -14.8345365524292, "logits_per_token": -3.3491392135620117, "logits_per_char": -1.0047417640686036, "num_chars": 10}, {"sum_logits": -9.695683479309082, "num_tokens": 2, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -13.88058853149414, "logits_per_token": -4.847841739654541, "logits_per_char": -0.9695683479309082, "num_chars": 10}, {"sum_logits": -9.456939697265625, "num_tokens": 1, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -11.172551155090332, "logits_per_token": -9.456939697265625, "logits_per_char": -1.1821174621582031, "num_chars": 8}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 479, "native_id": "1334", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 49.44915008544922, "incorrect_loss_raw": 35.17615509033203, "correct_loss_per_char": 0.9509451939509466, "incorrect_loss_per_char": 0.7368023196770145, "correct_loss_per_token": 6.181143760681152, "incorrect_loss_per_token": 4.0241753896077475, "correct_loss_uncond": -9.516571044921875, "incorrect_loss_uncond": -7.082949320475261}, "model_output": [{"sum_logits": -49.44915008544922, "num_tokens": 8, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -58.965721130371094, "logits_per_token": -6.181143760681152, "logits_per_char": -0.9509451939509466, "num_chars": 52}, {"sum_logits": -38.872291564941406, "num_tokens": 12, "num_tokens_all": 124, "is_greedy": false, "sum_logits_uncond": -47.49009704589844, "logits_per_token": -3.2393576304117837, "logits_per_char": -0.6372506813924821, "num_chars": 61}, {"sum_logits": -34.142906188964844, "num_tokens": 10, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -37.45859909057617, "logits_per_token": -3.4142906188964846, "logits_per_char": -0.64420577715028, "num_chars": 53}, {"sum_logits": -32.513267517089844, "num_tokens": 6, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -41.828617095947266, "logits_per_token": -5.418877919514974, "logits_per_char": -0.9289505004882812, "num_chars": 35}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 480, "native_id": "9-1160", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 27.510053634643555, "incorrect_loss_raw": 31.623706181844074, "correct_loss_per_char": 1.250456983392889, "incorrect_loss_per_char": 0.850356728270434, "correct_loss_per_token": 5.502010726928711, "incorrect_loss_per_token": 4.763941133177125, "correct_loss_uncond": -5.559682846069336, "incorrect_loss_uncond": -3.110353469848633}, "model_output": [{"sum_logits": -26.398656845092773, "num_tokens": 7, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -29.777027130126953, "logits_per_token": -3.7712366921561107, "logits_per_char": -0.8249580264091492, "num_chars": 32}, {"sum_logits": -27.002281188964844, "num_tokens": 4, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -33.33527755737305, "logits_per_token": -6.750570297241211, "logits_per_char": -1.125095049540202, "num_chars": 24}, {"sum_logits": -41.47018051147461, "num_tokens": 11, "num_tokens_all": 124, "is_greedy": false, "sum_logits_uncond": -41.089874267578125, "logits_per_token": -3.770016410134055, "logits_per_char": -0.6010171088619508, "num_chars": 69}, {"sum_logits": -27.510053634643555, "num_tokens": 5, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -33.06973648071289, "logits_per_token": -5.502010726928711, "logits_per_char": -1.250456983392889, "num_chars": 22}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 481, "native_id": "9-89", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 28.726970672607422, "incorrect_loss_raw": 20.431395212809246, "correct_loss_per_char": 0.8449109021355125, "incorrect_loss_per_char": 0.5875298716242061, "correct_loss_per_token": 4.103852953229632, "incorrect_loss_per_token": 3.920034953526088, "correct_loss_uncond": -14.819965362548828, "incorrect_loss_uncond": -12.519943873087565}, "model_output": [{"sum_logits": -28.726970672607422, "num_tokens": 7, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -43.54693603515625, "logits_per_token": -4.103852953229632, "logits_per_char": -0.8449109021355125, "num_chars": 34}, {"sum_logits": -22.76776885986328, "num_tokens": 7, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -36.12721633911133, "logits_per_token": -3.252538408551897, "logits_per_char": -0.5837889451246995, "num_chars": 39}, {"sum_logits": -22.480754852294922, "num_tokens": 5, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -35.177940368652344, "logits_per_token": -4.496150970458984, "logits_per_char": -0.6611986721263212, "num_chars": 34}, {"sum_logits": -16.04566192626953, "num_tokens": 4, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -27.548860549926758, "logits_per_token": -4.011415481567383, "logits_per_char": -0.5176019976215978, "num_chars": 31}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 482, "native_id": "9-1034", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 18.245494842529297, "incorrect_loss_raw": 14.376128196716309, "correct_loss_per_char": 0.6291549945699757, "incorrect_loss_per_char": 0.4969463992322612, "correct_loss_per_token": 3.6490989685058595, "incorrect_loss_per_token": 2.458528085738893, "correct_loss_uncond": -10.872234344482422, "incorrect_loss_uncond": -18.11605103810628}, "model_output": [{"sum_logits": -11.750619888305664, "num_tokens": 7, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -38.17760467529297, "logits_per_token": -1.6786599840436662, "logits_per_char": -0.3916873296101888, "num_chars": 30}, {"sum_logits": -17.35886001586914, "num_tokens": 6, "num_tokens_all": 116, "is_greedy": false, "sum_logits_uncond": -28.24591827392578, "logits_per_token": -2.89314333597819, "logits_per_char": -0.5599632263183594, "num_chars": 31}, {"sum_logits": -14.018904685974121, "num_tokens": 5, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -31.053014755249023, "logits_per_token": -2.803780937194824, "logits_per_char": -0.5391886417682354, "num_chars": 26}, {"sum_logits": -18.245494842529297, "num_tokens": 5, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -29.11772918701172, "logits_per_token": -3.6490989685058595, "logits_per_char": -0.6291549945699757, "num_chars": 29}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 483, "native_id": "8-293", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.022716522216797, "incorrect_loss_raw": 9.601223627726236, "correct_loss_per_char": 0.668559710184733, "incorrect_loss_per_char": 0.9636302776437589, "correct_loss_per_token": 4.011358261108398, "incorrect_loss_per_token": 4.800611813863118, "correct_loss_uncond": -6.056806564331055, "incorrect_loss_uncond": -6.982534090677897}, "model_output": [{"sum_logits": -7.567646026611328, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -16.758235931396484, "logits_per_token": -3.783823013305664, "logits_per_char": -0.8408495585123698, "num_chars": 9}, {"sum_logits": -8.022716522216797, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -14.079523086547852, "logits_per_token": -4.011358261108398, "logits_per_char": -0.668559710184733, "num_chars": 12}, {"sum_logits": -13.436195373535156, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -18.055273056030273, "logits_per_token": -6.718097686767578, "logits_per_char": -1.4929105970594618, "num_chars": 9}, {"sum_logits": -7.799829483032227, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -14.937764167785645, "logits_per_token": -3.8999147415161133, "logits_per_char": -0.5571306773594448, "num_chars": 14}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 484, "native_id": "9-652", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 17.876340866088867, "incorrect_loss_raw": 22.46152941385905, "correct_loss_per_char": 0.8938170433044433, "incorrect_loss_per_char": 1.1790974084918726, "correct_loss_per_token": 4.469085216522217, "incorrect_loss_per_token": 5.615382353464763, "correct_loss_uncond": -9.289060592651367, "incorrect_loss_uncond": -9.849989573160807}, "model_output": [{"sum_logits": -25.92742156982422, "num_tokens": 4, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -40.61826705932617, "logits_per_token": -6.481855392456055, "logits_per_char": -1.1785191622647373, "num_chars": 22}, {"sum_logits": -24.444442749023438, "num_tokens": 4, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -31.32915687561035, "logits_per_token": -6.111110687255859, "logits_per_char": -1.3580245971679688, "num_chars": 18}, {"sum_logits": -17.876340866088867, "num_tokens": 4, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -27.165401458740234, "logits_per_token": -4.469085216522217, "logits_per_char": -0.8938170433044433, "num_chars": 20}, {"sum_logits": -17.012723922729492, "num_tokens": 4, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -24.987133026123047, "logits_per_token": -4.253180980682373, "logits_per_char": -1.0007484660429113, "num_chars": 17}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 485, "native_id": "1391", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 29.965532302856445, "incorrect_loss_raw": 16.298967361450195, "correct_loss_per_char": 2.9965532302856444, "incorrect_loss_per_char": 1.6792133793686375, "correct_loss_per_token": 7.491383075714111, "incorrect_loss_per_token": 7.6744064754909935, "correct_loss_uncond": -1.6656169891357422, "incorrect_loss_uncond": 0.9261290232340494}, "model_output": [{"sum_logits": -17.861095428466797, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -17.561737060546875, "logits_per_token": -5.953698476155599, "logits_per_char": -1.623735948042436, "num_chars": 11}, {"sum_logits": -10.08637809753418, "num_tokens": 1, "num_tokens_all": 108, "is_greedy": false, "sum_logits_uncond": -11.867706298828125, "logits_per_token": -10.08637809753418, "logits_per_char": -2.0172756195068358, "num_chars": 5}, {"sum_logits": -29.965532302856445, "num_tokens": 4, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -31.631149291992188, "logits_per_token": -7.491383075714111, "logits_per_char": -2.9965532302856444, "num_chars": 10}, {"sum_logits": -20.94942855834961, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -16.689071655273438, "logits_per_token": -6.983142852783203, "logits_per_char": -1.3966285705566406, "num_chars": 15}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 486, "native_id": "9-948", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 18.50136375427246, "incorrect_loss_raw": 23.946954091389973, "correct_loss_per_char": 1.0278535419040256, "incorrect_loss_per_char": 1.3802839040756225, "correct_loss_per_token": 4.625340938568115, "incorrect_loss_per_token": 5.986738522847493, "correct_loss_uncond": -7.60051155090332, "incorrect_loss_uncond": -4.544946670532227}, "model_output": [{"sum_logits": -22.388320922851562, "num_tokens": 4, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -27.77194595336914, "logits_per_token": -5.597080230712891, "logits_per_char": -1.3992700576782227, "num_chars": 16}, {"sum_logits": -18.50136375427246, "num_tokens": 4, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -26.10187530517578, "logits_per_token": -4.625340938568115, "logits_per_char": -1.0278535419040256, "num_chars": 18}, {"sum_logits": -27.936174392700195, "num_tokens": 4, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -31.269229888916016, "logits_per_token": -6.984043598175049, "logits_per_char": -1.3968087196350099, "num_chars": 20}, {"sum_logits": -21.516366958618164, "num_tokens": 4, "num_tokens_all": 122, "is_greedy": false, "sum_logits_uncond": -26.434526443481445, "logits_per_token": -5.379091739654541, "logits_per_char": -1.3447729349136353, "num_chars": 16}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 487, "native_id": "8-213", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.010992527008057, "incorrect_loss_raw": 7.11103089650472, "correct_loss_per_char": 1.2021985054016113, "incorrect_loss_per_char": 1.262609789106581, "correct_loss_per_token": 6.010992527008057, "incorrect_loss_per_token": 7.11103089650472, "correct_loss_uncond": -4.318643093109131, "incorrect_loss_uncond": -2.751643498738607}, "model_output": [{"sum_logits": -9.19235610961914, "num_tokens": 1, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -10.445201873779297, "logits_per_token": -9.19235610961914, "logits_per_char": -1.5320593516031902, "num_chars": 6}, {"sum_logits": -5.171319007873535, "num_tokens": 1, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -8.184921264648438, "logits_per_token": -5.171319007873535, "logits_per_char": -0.8618865013122559, "num_chars": 6}, {"sum_logits": -6.969417572021484, "num_tokens": 1, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -10.957900047302246, "logits_per_token": -6.969417572021484, "logits_per_char": -1.393883514404297, "num_chars": 5}, {"sum_logits": -6.010992527008057, "num_tokens": 1, "num_tokens_all": 117, "is_greedy": false, "sum_logits_uncond": -10.329635620117188, "logits_per_token": -6.010992527008057, "logits_per_char": -1.2021985054016113, "num_chars": 5}], "label": 3, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 488, "native_id": "162", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.834291934967041, "incorrect_loss_raw": 9.271767298380533, "correct_loss_per_char": 1.5668583869934083, "incorrect_loss_per_char": 1.8543534596761067, "correct_loss_per_token": 3.9171459674835205, "incorrect_loss_per_token": 4.635883649190267, "correct_loss_uncond": -7.878392696380615, "incorrect_loss_uncond": -6.367367426554362}, "model_output": [{"sum_logits": -9.937562942504883, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -15.281658172607422, "logits_per_token": -4.968781471252441, "logits_per_char": -1.9875125885009766, "num_chars": 5}, {"sum_logits": -7.834291934967041, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -15.712684631347656, "logits_per_token": -3.9171459674835205, "logits_per_char": -1.5668583869934083, "num_chars": 5}, {"sum_logits": -8.202194213867188, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -15.440977096557617, "logits_per_token": -4.101097106933594, "logits_per_char": -1.6404388427734375, "num_chars": 5}, {"sum_logits": -9.675544738769531, "num_tokens": 2, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -16.19476890563965, "logits_per_token": -4.837772369384766, "logits_per_char": -1.9351089477539063, "num_chars": 5}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 489, "native_id": "1359", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 46.35094451904297, "incorrect_loss_raw": 47.53366788228353, "correct_loss_per_char": 0.7475958793394027, "incorrect_loss_per_char": 1.0674232770681902, "correct_loss_per_token": 5.1501049465603295, "incorrect_loss_per_token": 6.30480624759008, "correct_loss_uncond": -11.810920715332031, "incorrect_loss_uncond": -5.8226884206136065}, "model_output": [{"sum_logits": -74.39022827148438, "num_tokens": 12, "num_tokens_all": 128, "is_greedy": false, "sum_logits_uncond": -79.32756805419922, "logits_per_token": -6.199185689290364, "logits_per_char": -1.1807972741505457, "num_chars": 63}, {"sum_logits": -46.35094451904297, "num_tokens": 9, "num_tokens_all": 125, "is_greedy": false, "sum_logits_uncond": -58.161865234375, "logits_per_token": -5.1501049465603295, "logits_per_char": -0.7475958793394027, "num_chars": 62}, {"sum_logits": -40.482967376708984, "num_tokens": 7, "num_tokens_all": 123, "is_greedy": false, "sum_logits_uncond": -49.11959457397461, "logits_per_token": -5.7832810538155695, "logits_per_char": -1.0653412467554997, "num_chars": 38}, {"sum_logits": -27.727807998657227, "num_tokens": 4, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -31.621906280517578, "logits_per_token": -6.931951999664307, "logits_per_char": -0.956131310298525, "num_chars": 29}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 490, "native_id": "9-743", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.770012855529785, "incorrect_loss_raw": 14.654958089192709, "correct_loss_per_char": 1.0770012855529785, "incorrect_loss_per_char": 0.9038728872934977, "correct_loss_per_token": 3.590004285176595, "incorrect_loss_per_token": 4.884986029730903, "correct_loss_uncond": -7.270981788635254, "incorrect_loss_uncond": -7.697841644287109}, "model_output": [{"sum_logits": -10.770012855529785, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -18.04099464416504, "logits_per_token": -3.590004285176595, "logits_per_char": -1.0770012855529785, "num_chars": 10}, {"sum_logits": -18.866138458251953, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -29.781246185302734, "logits_per_token": -6.288712819417317, "logits_per_char": -1.179133653640747, "num_chars": 16}, {"sum_logits": -12.429971694946289, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -19.396413803100586, "logits_per_token": -4.14332389831543, "logits_per_char": -0.8286647796630859, "num_chars": 15}, {"sum_logits": -12.668764114379883, "num_tokens": 3, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -17.880739212036133, "logits_per_token": -4.222921371459961, "logits_per_char": -0.7038202285766602, "num_chars": 18}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 491, "native_id": "9-645", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.422857284545898, "incorrect_loss_raw": 11.225593566894531, "correct_loss_per_char": 0.5542857226203469, "incorrect_loss_per_char": 0.5772265155248967, "correct_loss_per_token": 2.3557143211364746, "incorrect_loss_per_token": 2.6145106315612794, "correct_loss_uncond": -11.038061141967773, "incorrect_loss_uncond": -9.39897600809733}, "model_output": [{"sum_logits": -9.422857284545898, "num_tokens": 4, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -20.460918426513672, "logits_per_token": -2.3557143211364746, "logits_per_char": -0.5542857226203469, "num_chars": 17}, {"sum_logits": -8.36182975769043, "num_tokens": 4, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -19.36132049560547, "logits_per_token": -2.0904574394226074, "logits_per_char": -0.4400963030363384, "num_chars": 19}, {"sum_logits": -11.513265609741211, "num_tokens": 5, "num_tokens_all": 119, "is_greedy": false, "sum_logits_uncond": -21.981470108032227, "logits_per_token": -2.302653121948242, "logits_per_char": -0.4797194004058838, "num_chars": 24}, {"sum_logits": -13.801685333251953, "num_tokens": 4, "num_tokens_all": 118, "is_greedy": false, "sum_logits_uncond": -20.53091812133789, "logits_per_token": -3.4504213333129883, "logits_per_char": -0.8118638431324678, "num_chars": 17}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 492, "native_id": "8-250", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.485647201538086, "incorrect_loss_raw": 15.186797777811686, "correct_loss_per_char": 1.2971294403076172, "incorrect_loss_per_char": 1.9661405867424564, "correct_loss_per_token": 6.485647201538086, "incorrect_loss_per_token": 9.788750648498535, "correct_loss_uncond": -7.4435930252075195, "incorrect_loss_uncond": -2.3850781122843423}, "model_output": [{"sum_logits": -11.768917083740234, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -11.566256523132324, "logits_per_token": -11.768917083740234, "logits_per_char": -2.9422292709350586, "num_chars": 4}, {"sum_logits": -9.500264167785645, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.686440467834473, "logits_per_token": -9.500264167785645, "logits_per_char": -1.900052833557129, "num_chars": 5}, {"sum_logits": -6.485647201538086, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.929240226745605, "logits_per_token": -6.485647201538086, "logits_per_char": -1.2971294403076172, "num_chars": 5}, {"sum_logits": -24.29121208190918, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -28.46293067932129, "logits_per_token": -8.097070693969727, "logits_per_char": -1.0561396557351816, "num_chars": 23}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 493, "native_id": "283", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 66.80657196044922, "incorrect_loss_raw": 35.00406837463379, "correct_loss_per_char": 0.9409376332457636, "incorrect_loss_per_char": 0.8234254345599771, "correct_loss_per_token": 4.4537714640299475, "incorrect_loss_per_token": 3.7090113382490855, "correct_loss_uncond": -7.819328308105469, "incorrect_loss_uncond": -9.696828842163086}, "model_output": [{"sum_logits": -26.249467849731445, "num_tokens": 7, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -35.51642990112305, "logits_per_token": -3.749923978533064, "logits_per_char": -0.9722025129530165, "num_chars": 27}, {"sum_logits": -62.81578063964844, "num_tokens": 15, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -71.49581146240234, "logits_per_token": -4.1877187093098955, "logits_per_char": -0.8847293047837808, "num_chars": 71}, {"sum_logits": -66.80657196044922, "num_tokens": 15, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -74.62590026855469, "logits_per_token": -4.4537714640299475, "logits_per_char": -0.9409376332457636, "num_chars": 71}, {"sum_logits": -15.946956634521484, "num_tokens": 5, "num_tokens_all": 111, "is_greedy": false, "sum_logits_uncond": -27.090450286865234, "logits_per_token": -3.189391326904297, "logits_per_char": -0.6133444859431341, "num_chars": 26}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 494, "native_id": "8-183", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 8.758705139160156, "incorrect_loss_raw": 16.19002056121826, "correct_loss_per_char": 0.43793525695800783, "incorrect_loss_per_char": 0.8311263505595518, "correct_loss_per_token": 2.189676284790039, "incorrect_loss_per_token": 3.756245628992717, "correct_loss_uncond": -9.924884796142578, "incorrect_loss_uncond": -8.159229596455893}, "model_output": [{"sum_logits": -8.758705139160156, "num_tokens": 4, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -18.683589935302734, "logits_per_token": -2.189676284790039, "logits_per_char": -0.43793525695800783, "num_chars": 20}, {"sum_logits": -18.696731567382812, "num_tokens": 4, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -30.445140838623047, "logits_per_token": -4.674182891845703, "logits_per_char": -0.890320550827753, "num_chars": 21}, {"sum_logits": -12.397759437561035, "num_tokens": 4, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -19.377397537231445, "logits_per_token": -3.099439859390259, "logits_per_char": -0.729279966915355, "num_chars": 17}, {"sum_logits": -17.475570678710938, "num_tokens": 5, "num_tokens_all": 167, "is_greedy": false, "sum_logits_uncond": -23.22521209716797, "logits_per_token": -3.4951141357421873, "logits_per_char": -0.8737785339355468, "num_chars": 20}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 495, "native_id": "9-284", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.404225826263428, "incorrect_loss_raw": 6.632708628972371, "correct_loss_per_char": 0.7720322608947754, "incorrect_loss_per_char": 0.9610707759857178, "correct_loss_per_token": 2.702112913131714, "incorrect_loss_per_token": 3.3163543144861856, "correct_loss_uncond": -14.11868143081665, "incorrect_loss_uncond": -10.16386310259501}, "model_output": [{"sum_logits": -5.404225826263428, "num_tokens": 2, "num_tokens_all": 128, "is_greedy": false, "sum_logits_uncond": -19.522907257080078, "logits_per_token": -2.702112913131714, "logits_per_char": -0.7720322608947754, "num_chars": 7}, {"sum_logits": -2.958711862564087, "num_tokens": 2, "num_tokens_all": 128, "is_greedy": false, "sum_logits_uncond": -16.362003326416016, "logits_per_token": -1.4793559312820435, "logits_per_char": -0.49311864376068115, "num_chars": 6}, {"sum_logits": -15.26934814453125, "num_tokens": 2, "num_tokens_all": 128, "is_greedy": false, "sum_logits_uncond": -17.468961715698242, "logits_per_token": -7.634674072265625, "logits_per_char": -2.18133544921875, "num_chars": 7}, {"sum_logits": -1.6700658798217773, "num_tokens": 2, "num_tokens_all": 128, "is_greedy": true, "sum_logits_uncond": -16.55875015258789, "logits_per_token": -0.8350329399108887, "logits_per_char": -0.20875823497772217, "num_chars": 8}], "label": 0, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 496, "native_id": "7-1186", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 39.73152160644531, "incorrect_loss_raw": 10.917972882588705, "correct_loss_per_char": 1.0455663580643504, "incorrect_loss_per_char": 0.4791068049237938, "correct_loss_per_token": 5.675931658063616, "incorrect_loss_per_token": 2.1826897197299533, "correct_loss_uncond": -11.732872009277344, "incorrect_loss_uncond": -13.450050671895346}, "model_output": [{"sum_logits": -8.02039909362793, "num_tokens": 4, "num_tokens_all": 112, "is_greedy": false, "sum_logits_uncond": -19.595458984375, "logits_per_token": -2.0050997734069824, "logits_per_char": -0.4455777274237739, "num_chars": 18}, {"sum_logits": -12.112035751342773, "num_tokens": 6, "num_tokens_all": 114, "is_greedy": false, "sum_logits_uncond": -26.363386154174805, "logits_per_token": -2.0186726252237954, "logits_per_char": -0.46584752889779896, "num_chars": 26}, {"sum_logits": -39.73152160644531, "num_tokens": 7, "num_tokens_all": 115, "is_greedy": false, "sum_logits_uncond": -51.464393615722656, "logits_per_token": -5.675931658063616, "logits_per_char": -1.0455663580643504, "num_chars": 38}, {"sum_logits": -12.62148380279541, "num_tokens": 5, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -27.145225524902344, "logits_per_token": -2.524296760559082, "logits_per_char": -0.5258951584498087, "num_chars": 24}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 497, "native_id": "926", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 15.088302612304688, "incorrect_loss_raw": 19.051363627115887, "correct_loss_per_char": 0.580319331242488, "incorrect_loss_per_char": 0.5666641367250574, "correct_loss_per_token": 3.0176605224609374, "incorrect_loss_per_token": 2.847352543583623, "correct_loss_uncond": -15.062576293945312, "incorrect_loss_uncond": -11.93758773803711}, "model_output": [{"sum_logits": -16.964523315429688, "num_tokens": 6, "num_tokens_all": 110, "is_greedy": false, "sum_logits_uncond": -31.480873107910156, "logits_per_token": -2.8274205525716147, "logits_per_char": -0.5654841105143229, "num_chars": 30}, {"sum_logits": -14.05270767211914, "num_tokens": 5, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -27.013626098632812, "logits_per_token": -2.810541534423828, "logits_per_char": -0.540488756619967, "num_chars": 26}, {"sum_logits": -15.088302612304688, "num_tokens": 5, "num_tokens_all": 109, "is_greedy": false, "sum_logits_uncond": -30.15087890625, "logits_per_token": -3.0176605224609374, "logits_per_char": -0.580319331242488, "num_chars": 26}, {"sum_logits": -26.136859893798828, "num_tokens": 9, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -34.472354888916016, "logits_per_token": -2.9040955437554254, "logits_per_char": -0.5940195430408824, "num_chars": 44}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 498, "native_id": "7-519", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 24.863800048828125, "incorrect_loss_raw": 24.54314359029134, "correct_loss_per_char": 1.0810347847316577, "incorrect_loss_per_char": 0.9881470218063337, "correct_loss_per_token": 4.972760009765625, "incorrect_loss_per_token": 5.008693540663947, "correct_loss_uncond": -11.3330078125, "incorrect_loss_uncond": -5.512061436971028}, "model_output": [{"sum_logits": -29.23676872253418, "num_tokens": 5, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -29.48989486694336, "logits_per_token": -5.847353744506836, "logits_per_char": -1.1694707489013672, "num_chars": 25}, {"sum_logits": -24.863800048828125, "num_tokens": 5, "num_tokens_all": 121, "is_greedy": false, "sum_logits_uncond": -36.196807861328125, "logits_per_token": -4.972760009765625, "logits_per_char": -1.0810347847316577, "num_chars": 23}, {"sum_logits": -26.477901458740234, "num_tokens": 4, "num_tokens_all": 120, "is_greedy": false, "sum_logits_uncond": -33.354217529296875, "logits_per_token": -6.619475364685059, "logits_per_char": -0.980663016990379, "num_chars": 27}, {"sum_logits": -17.91476058959961, "num_tokens": 7, "num_tokens_all": 123, "is_greedy": false, "sum_logits_uncond": -27.321502685546875, "logits_per_token": -2.559251512799944, "logits_per_char": -0.814307299527255, "num_chars": 22}], "label": 1, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 499, "native_id": "7-7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.400174379348755, "incorrect_loss_raw": 4.424876530965169, "correct_loss_per_char": 0.680034875869751, "incorrect_loss_per_char": 0.6564055195561163, "correct_loss_per_token": 3.400174379348755, "incorrect_loss_per_token": 4.424876530965169, "correct_loss_uncond": -8.323222875595093, "incorrect_loss_uncond": -7.892494519551595}, "model_output": [{"sum_logits": -3.3041810989379883, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -12.63060188293457, "logits_per_token": -3.3041810989379883, "logits_per_char": -0.6608362197875977, "num_chars": 5}, {"sum_logits": -3.6099491119384766, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -9.888992309570312, "logits_per_token": -3.6099491119384766, "logits_per_char": -0.6016581853230795, "num_chars": 6}, {"sum_logits": -3.400174379348755, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -11.723397254943848, "logits_per_token": -3.400174379348755, "logits_per_char": -0.680034875869751, "num_chars": 5}, {"sum_logits": -6.360499382019043, "num_tokens": 1, "num_tokens_all": 113, "is_greedy": false, "sum_logits_uncond": -14.43251895904541, "logits_per_token": -6.360499382019043, "logits_per_char": -0.7067221535576714, "num_chars": 9}], "label": 2, "task_hash": "bcd3c6e0e23954870d75bd4cd800afc9", "model_hash": "03418cf8091a9882619950ffb07429a5"}